From: Brian Warner Date: Thu, 19 Feb 2009 20:55:15 +0000 (-0700) Subject: reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell... X-Git-Tag: allmydata-tahoe-1.4.0~180 X-Git-Url: https://git.rkrishnan.org/status?a=commitdiff_plain;h=790a10d1b2a8bc78ea4c81e12ec330a3e2d9a27e;p=tahoe-lafs%2Ftahoe-lafs.git reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell Russell for the help. --- diff --git a/src/allmydata/reliability.py b/src/allmydata/reliability.py index 2729692a..76d930f3 100644 --- a/src/allmydata/reliability.py +++ b/src/allmydata/reliability.py @@ -2,7 +2,7 @@ import math from allmydata.util import statistics -from numpy import array +from numpy import array, matrix, dot DAY=24*60*60 MONTH=31*DAY @@ -78,14 +78,14 @@ class ReliabilityModel: #print "REPAIR:", repair #print "DIFF:", (old_post_repair - decay * repair) - START = array([[0]*N + [1]]) - ALIVE = array([[0]*k + [1]*(1+N-k)]) - DEAD = array([[1]*k + [0]*(1+N-k)]) - REPAIRp = array([[0]*k + [1]*(R-k) + [0]*(1+N-R)]) - REPAIR_newshares = array([[0]*k + - [N-i for i in range(k, R)] + - [0]*(1+N-R)]) - assert REPAIR_newshares.shape[1] == N+1 + START = array([0]*N + [1]) + ALIVE = array([0]*k + [1]*(1+N-k)) + DEAD = array([1]*k + [0]*(1+N-k)) + REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R)) + REPAIR_newshares = array([0]*k + + [N-i for i in range(k, R)] + + [0]*(1+N-R)) + assert REPAIR_newshares.shape[0] == N+1 #print "START", START #print "ALIVE", ALIVE #print "REPAIRp", REPAIRp @@ -101,24 +101,25 @@ class ReliabilityModel: report = ReliabilityReport() for t in range(0, report_span+delta, delta): - unmaintained_state = unmaintained_state * decay - maintained_state = maintained_state * decay + # the .A[0] turns the one-row matrix back into an array + unmaintained_state = (unmaintained_state * decay).A[0] + maintained_state = (maintained_state * decay).A[0] if (t-last_check) > check_period: last_check = t # we do a check-and-repair this frequently - need_repair = (maintained_state * REPAIRp).sum() + need_repair = dot(maintained_state, REPAIRp) P_repaired_last_check_period = need_repair - new_shares = (maintained_state * REPAIR_newshares).sum() + new_shares = dot(maintained_state, REPAIR_newshares) needed_repairs.append(need_repair) needed_new_shares.append(new_shares) - maintained_state = maintained_state * repair + maintained_state = (maintained_state * repair).A[0] if (t-last_report) > report_period: last_report = t - P_dead_unmaintained = (unmaintained_state * DEAD).sum() - P_dead_maintained = (maintained_state * DEAD).sum() + P_dead_unmaintained = dot(unmaintained_state, DEAD) + P_dead_maintained = dot(maintained_state, DEAD) cumulative_number_of_repairs = sum(needed_repairs) cumulative_number_of_new_shares = sum(needed_new_shares) report.add_sample(t, unmaintained_state, maintained_state, @@ -128,8 +129,8 @@ class ReliabilityModel: P_dead_unmaintained, P_dead_maintained) # record one more sample at the end of the run - P_dead_unmaintained = (unmaintained_state * DEAD).sum() - P_dead_maintained = (maintained_state * DEAD).sum() + P_dead_unmaintained = dot(unmaintained_state, DEAD) + P_dead_maintained = dot(maintained_state, DEAD) cumulative_number_of_repairs = sum(needed_repairs) cumulative_number_of_new_shares = sum(needed_new_shares) report.add_sample(t, unmaintained_state, maintained_state, @@ -174,7 +175,7 @@ class ReliabilityModel: assert len(decay_row) == (N+1), len(decay_row) decay_rows.append(decay_row) - decay = array(decay_rows) + decay = matrix(decay_rows) return decay def build_decay_row(self, start_shares, P): @@ -205,7 +206,7 @@ class ReliabilityModel: new_repair_row[start_shares] = 1 new_repair_rows.append(new_repair_row) - repair = array(new_repair_rows) + repair = matrix(new_repair_rows) return repair class ReliabilityReport: diff --git a/src/allmydata/test/test_provisioning.py b/src/allmydata/test/test_provisioning.py index 74c57c4a..7e1f9a2f 100644 --- a/src/allmydata/test/test_provisioning.py +++ b/src/allmydata/test/test_provisioning.py @@ -71,7 +71,41 @@ class Reliability(unittest.TestCase): def test_basic(self): if ReliabilityModel is None: raise unittest.SkipTest("reliability model requires NumPy") + + # test that numpy math works the way I think it does + import numpy + decay = numpy.matrix([[1,0,0], + [.1,.9,0], + [.01,.09,.9], + ]) + start = numpy.array([0,0,1]) + g2 = (start * decay).A[0] + self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9]))) + g3 = (g2 * decay).A[0] + self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81]))) + + # and the dot product + recoverable = numpy.array([0,1,1]) + P_recoverable_g2 = numpy.dot(g2, recoverable) + self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09) + P_recoverable_g3 = numpy.dot(g3, recoverable) + self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162) + r = ReliabilityModel.run(delta=100000, report_period=3*MONTH, report_span=5*YEAR) self.failUnlessEqual(len(r.samples), 20) + + last_row = r.samples[-1] + #print last_row + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = last_row + self.failUnless(isinstance(P_repaired_last_check_period, float)) + self.failUnless(isinstance(P_dead_unmaintained, float)) + self.failUnless(isinstance(P_dead_maintained, float)) + self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272) + self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08) + diff --git a/src/allmydata/web/reliability.xhtml b/src/allmydata/web/reliability.xhtml index d8502031..b01c7927 100644 --- a/src/allmydata/web/reliability.xhtml +++ b/src/allmydata/web/reliability.xhtml @@ -38,7 +38,7 @@ repair bandwidth to configure on a Tahoe grid.

check period.
  • P_dead (unmaintained): the chance that the file will be unrecoverable without periodic check+repair
  • -
  • P_dead (maintained): the chance that the file will be recoverable even +
  • P_dead (maintained): the chance that the file will be unrecoverable even with periodic check+repair