From: Brian Warner <warner@lothar.com>
Date: Thu, 19 Feb 2009 20:55:15 +0000 (-0700)
Subject: reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell... 
X-Git-Tag: allmydata-tahoe-1.4.0~180
X-Git-Url: https://git.rkrishnan.org/pf/content//%22%22?a=commitdiff_plain;h=790a10d1b2a8bc78ea4c81e12ec330a3e2d9a27e;p=tahoe-lafs%2Ftahoe-lafs.git

reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell Russell for the help.
---

diff --git a/src/allmydata/reliability.py b/src/allmydata/reliability.py
index 2729692a..76d930f3 100644
--- a/src/allmydata/reliability.py
+++ b/src/allmydata/reliability.py
@@ -2,7 +2,7 @@
 
 import math
 from allmydata.util import statistics
-from numpy import array
+from numpy import array, matrix, dot
 
 DAY=24*60*60
 MONTH=31*DAY
@@ -78,14 +78,14 @@ class ReliabilityModel:
         #print "REPAIR:", repair
         #print "DIFF:", (old_post_repair - decay * repair)
 
-        START = array([[0]*N + [1]])
-        ALIVE = array([[0]*k + [1]*(1+N-k)])
-        DEAD = array([[1]*k + [0]*(1+N-k)])
-        REPAIRp = array([[0]*k + [1]*(R-k) + [0]*(1+N-R)])
-        REPAIR_newshares = array([[0]*k +
-                                  [N-i for i in range(k, R)] +
-                                  [0]*(1+N-R)])
-        assert REPAIR_newshares.shape[1] == N+1
+        START = array([0]*N + [1])
+        ALIVE = array([0]*k + [1]*(1+N-k))
+        DEAD = array([1]*k + [0]*(1+N-k))
+        REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R))
+        REPAIR_newshares = array([0]*k +
+                                 [N-i for i in range(k, R)] +
+                                 [0]*(1+N-R))
+        assert REPAIR_newshares.shape[0] == N+1
         #print "START", START
         #print "ALIVE", ALIVE
         #print "REPAIRp", REPAIRp
@@ -101,24 +101,25 @@ class ReliabilityModel:
         report = ReliabilityReport()
 
         for t in range(0, report_span+delta, delta):
-            unmaintained_state = unmaintained_state * decay
-            maintained_state = maintained_state * decay
+            # the .A[0] turns the one-row matrix back into an array
+            unmaintained_state = (unmaintained_state * decay).A[0]
+            maintained_state = (maintained_state * decay).A[0]
             if (t-last_check) > check_period:
                 last_check = t
                 # we do a check-and-repair this frequently
-                need_repair = (maintained_state * REPAIRp).sum()
+                need_repair = dot(maintained_state, REPAIRp)
 
                 P_repaired_last_check_period = need_repair
-                new_shares = (maintained_state * REPAIR_newshares).sum()
+                new_shares = dot(maintained_state, REPAIR_newshares)
                 needed_repairs.append(need_repair)
                 needed_new_shares.append(new_shares)
 
-                maintained_state = maintained_state * repair
+                maintained_state = (maintained_state * repair).A[0]
 
             if (t-last_report) > report_period:
                 last_report = t
-                P_dead_unmaintained = (unmaintained_state * DEAD).sum()
-                P_dead_maintained = (maintained_state * DEAD).sum()
+                P_dead_unmaintained = dot(unmaintained_state, DEAD)
+                P_dead_maintained = dot(maintained_state, DEAD)
                 cumulative_number_of_repairs = sum(needed_repairs)
                 cumulative_number_of_new_shares = sum(needed_new_shares)
                 report.add_sample(t, unmaintained_state, maintained_state,
@@ -128,8 +129,8 @@ class ReliabilityModel:
                                   P_dead_unmaintained, P_dead_maintained)
 
         # record one more sample at the end of the run
-        P_dead_unmaintained = (unmaintained_state * DEAD).sum()
-        P_dead_maintained = (maintained_state * DEAD).sum()
+        P_dead_unmaintained = dot(unmaintained_state, DEAD)
+        P_dead_maintained = dot(maintained_state, DEAD)
         cumulative_number_of_repairs = sum(needed_repairs)
         cumulative_number_of_new_shares = sum(needed_new_shares)
         report.add_sample(t, unmaintained_state, maintained_state,
@@ -174,7 +175,7 @@ class ReliabilityModel:
             assert len(decay_row) == (N+1), len(decay_row)
             decay_rows.append(decay_row)
 
-        decay = array(decay_rows)
+        decay = matrix(decay_rows)
         return decay
 
     def build_decay_row(self, start_shares, P):
@@ -205,7 +206,7 @@ class ReliabilityModel:
                 new_repair_row[start_shares] = 1
             new_repair_rows.append(new_repair_row)
 
-        repair = array(new_repair_rows)
+        repair = matrix(new_repair_rows)
         return repair
 
 class ReliabilityReport:
diff --git a/src/allmydata/test/test_provisioning.py b/src/allmydata/test/test_provisioning.py
index 74c57c4a..7e1f9a2f 100644
--- a/src/allmydata/test/test_provisioning.py
+++ b/src/allmydata/test/test_provisioning.py
@@ -71,7 +71,41 @@ class Reliability(unittest.TestCase):
     def test_basic(self):
         if ReliabilityModel is None:
             raise unittest.SkipTest("reliability model requires NumPy")
+
+        # test that numpy math works the way I think it does
+        import numpy
+        decay = numpy.matrix([[1,0,0],
+                             [.1,.9,0],
+                             [.01,.09,.9],
+                             ])
+        start = numpy.array([0,0,1])
+        g2 = (start * decay).A[0]
+        self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9])))
+        g3 = (g2 * decay).A[0]
+        self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81])))
+
+        # and the dot product
+        recoverable = numpy.array([0,1,1])
+        P_recoverable_g2 = numpy.dot(g2, recoverable)
+        self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09)
+        P_recoverable_g3 = numpy.dot(g3, recoverable)
+        self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162)
+
         r = ReliabilityModel.run(delta=100000,
                                  report_period=3*MONTH,
                                  report_span=5*YEAR)
         self.failUnlessEqual(len(r.samples), 20)
+
+        last_row = r.samples[-1]
+        #print last_row
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = last_row
+        self.failUnless(isinstance(P_repaired_last_check_period, float))
+        self.failUnless(isinstance(P_dead_unmaintained, float))
+        self.failUnless(isinstance(P_dead_maintained, float))
+        self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272)
+        self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08)
+
diff --git a/src/allmydata/web/reliability.xhtml b/src/allmydata/web/reliability.xhtml
index d8502031..b01c7927 100644
--- a/src/allmydata/web/reliability.xhtml
+++ b/src/allmydata/web/reliability.xhtml
@@ -38,7 +38,7 @@ repair bandwidth to configure on a Tahoe grid.</p>
   check period.</li>
   <li>P_dead (unmaintained): the chance that the file will be unrecoverable
   without periodic check+repair</li>
-  <li>P_dead (maintained): the chance that the file will be recoverable even
+  <li>P_dead (maintained): the chance that the file will be unrecoverable even
   with periodic check+repair</li>
 </ul>