reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell...

author Brian Warner <warner@lothar.com>

Thu, 19 Feb 2009 20:55:15 +0000 (13:55 -0700)

committer Brian Warner <warner@lothar.com>

Thu, 19 Feb 2009 20:55:15 +0000 (13:55 -0700)
author Brian Warner <warner@lothar.com>
Thu, 19 Feb 2009 20:55:15 +0000 (13:55 -0700)
committer Brian Warner <warner@lothar.com>
Thu, 19 Feb 2009 20:55:15 +0000 (13:55 -0700)
diff --git a/src/allmydata/reliability.py b/src/allmydata/reliability.py

index 2729692a7e1883ea975f6c5c7e7a33abf862ce59..76d930f356a67721f464edfba0a00a3e61596c95 100644 (file)
--- a/src/allmydata/reliability.py
+++ b/src/allmydata/reliability.py
@@ -2,7 +2,7 @@
  
  import math
  from allmydata.util import statistics
-from numpy import array
+from numpy import array, matrix, dot
  
  DAY=24*60*60
  MONTH=31*DAY
@@ -78,14 +78,14 @@ class ReliabilityModel:
          #print "REPAIR:", repair
          #print "DIFF:", (old_post_repair - decay * repair)
  
-        START = array([[0]*N + [1]])
-        ALIVE = array([[0]*k + [1]*(1+N-k)])
-        DEAD = array([[1]*k + [0]*(1+N-k)])
-        REPAIRp = array([[0]*k + [1]*(R-k) + [0]*(1+N-R)])
-        REPAIR_newshares = array([[0]*k +
-                                  [N-i for i in range(k, R)] +
-                                  [0]*(1+N-R)])
-        assert REPAIR_newshares.shape[1] == N+1
+        START = array([0]*N + [1])
+        ALIVE = array([0]*k + [1]*(1+N-k))
+        DEAD = array([1]*k + [0]*(1+N-k))
+        REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R))
+        REPAIR_newshares = array([0]*k +
+                                 [N-i for i in range(k, R)] +
+                                 [0]*(1+N-R))
+        assert REPAIR_newshares.shape[0] == N+1
          #print "START", START
          #print "ALIVE", ALIVE
          #print "REPAIRp", REPAIRp
@@ -101,24 +101,25 @@ class ReliabilityModel:
          report = ReliabilityReport()
  
          for t in range(0, report_span+delta, delta):
-            unmaintained_state = unmaintained_state * decay
-            maintained_state = maintained_state * decay
+            # the .A[0] turns the one-row matrix back into an array
+            unmaintained_state = (unmaintained_state * decay).A[0]
+            maintained_state = (maintained_state * decay).A[0]
              if (t-last_check) > check_period:
                  last_check = t
                  # we do a check-and-repair this frequently
-                need_repair = (maintained_state * REPAIRp).sum()
+                need_repair = dot(maintained_state, REPAIRp)
  
                  P_repaired_last_check_period = need_repair
-                new_shares = (maintained_state * REPAIR_newshares).sum()
+                new_shares = dot(maintained_state, REPAIR_newshares)
                  needed_repairs.append(need_repair)
                  needed_new_shares.append(new_shares)
  
-                maintained_state = maintained_state * repair
+                maintained_state = (maintained_state * repair).A[0]
  
              if (t-last_report) > report_period:
                  last_report = t
-                P_dead_unmaintained = (unmaintained_state * DEAD).sum()
-                P_dead_maintained = (maintained_state * DEAD).sum()
+                P_dead_unmaintained = dot(unmaintained_state, DEAD)
+                P_dead_maintained = dot(maintained_state, DEAD)
                  cumulative_number_of_repairs = sum(needed_repairs)
                  cumulative_number_of_new_shares = sum(needed_new_shares)
                  report.add_sample(t, unmaintained_state, maintained_state,
@@ -128,8 +129,8 @@ class ReliabilityModel:
                                    P_dead_unmaintained, P_dead_maintained)
  
          # record one more sample at the end of the run
-        P_dead_unmaintained = (unmaintained_state * DEAD).sum()
-        P_dead_maintained = (maintained_state * DEAD).sum()
+        P_dead_unmaintained = dot(unmaintained_state, DEAD)
+        P_dead_maintained = dot(maintained_state, DEAD)
          cumulative_number_of_repairs = sum(needed_repairs)
          cumulative_number_of_new_shares = sum(needed_new_shares)
          report.add_sample(t, unmaintained_state, maintained_state,
@@ -174,7 +175,7 @@ class ReliabilityModel:
              assert len(decay_row) == (N+1), len(decay_row)
              decay_rows.append(decay_row)
  
-        decay = array(decay_rows)
+        decay = matrix(decay_rows)
          return decay
  
      def build_decay_row(self, start_shares, P):
@@ -205,7 +206,7 @@ class ReliabilityModel:
                  new_repair_row[start_shares] = 1
              new_repair_rows.append(new_repair_row)
  
-        repair = array(new_repair_rows)
+        repair = matrix(new_repair_rows)
          return repair
  
  class ReliabilityReport:
diff --git a/src/allmydata/test/test_provisioning.py b/src/allmydata/test/test_provisioning.py

index 74c57c4ad9b726ad100af2204d0ec7418884da47..7e1f9a2f5771a03b35bfaf53b93983fbcb45f030 100644 (file)
--- a/src/allmydata/test/test_provisioning.py
+++ b/src/allmydata/test/test_provisioning.py
@@ -71,7 +71,41 @@ class Reliability(unittest.TestCase):
      def test_basic(self):
          if ReliabilityModel is None:
              raise unittest.SkipTest("reliability model requires NumPy")
+
+        # test that numpy math works the way I think it does
+        import numpy
+        decay = numpy.matrix([[1,0,0],
+                             [.1,.9,0],
+                             [.01,.09,.9],
+                             ])
+        start = numpy.array([0,0,1])
+        g2 = (start * decay).A[0]
+        self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9])))
+        g3 = (g2 * decay).A[0]
+        self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81])))
+
+        # and the dot product
+        recoverable = numpy.array([0,1,1])
+        P_recoverable_g2 = numpy.dot(g2, recoverable)
+        self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09)
+        P_recoverable_g3 = numpy.dot(g3, recoverable)
+        self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162)
+
          r = ReliabilityModel.run(delta=100000,
                                   report_period=3*MONTH,
                                   report_span=5*YEAR)
          self.failUnlessEqual(len(r.samples), 20)
+
+        last_row = r.samples[-1]
+        #print last_row
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = last_row
+        self.failUnless(isinstance(P_repaired_last_check_period, float))
+        self.failUnless(isinstance(P_dead_unmaintained, float))
+        self.failUnless(isinstance(P_dead_maintained, float))
+        self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272)
+        self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08)
+
diff --git a/src/allmydata/web/reliability.xhtml b/src/allmydata/web/reliability.xhtml

index d8502031bee097a2e6f8f724486a84670cf40b83..b01c79273f71d983a49841d281d91e08dd7d5c4c 100644 (file)
--- a/src/allmydata/web/reliability.xhtml
+++ b/src/allmydata/web/reliability.xhtml
@@ -38,7 +38,7 @@ repair bandwidth to configure on a Tahoe grid.</p>
    check period.</li>
    <li>P_dead (unmaintained): the chance that the file will be unrecoverable
    without periodic check+repair</li>
-  <li>P_dead (maintained): the chance that the file will be recoverable even
+  <li>P_dead (maintained): the chance that the file will be unrecoverable even
    with periodic check+repair</li>
  </ul>
author	Brian Warner <warner@lothar.com>
	Thu, 19 Feb 2009 20:55:15 +0000 (13:55 -0700)
committer	Brian Warner <warner@lothar.com>
	Thu, 19 Feb 2009 20:55:15 +0000 (13:55 -0700)
src/allmydata/reliability.py		patch \| blob \| history
src/allmydata/test/test_provisioning.py		patch \| blob \| history
src/allmydata/web/reliability.xhtml		patch \| blob \| history