From: Brian Warner
Date: Thu, 19 Feb 2009 20:55:15 +0000 (-0700)
Subject: reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell...
X-Git-Tag: allmydata-tahoe-1.4.0~180
X-Git-Url: https://git.rkrishnan.org/pf/content//%22%22?a=commitdiff_plain;h=790a10d1b2a8bc78ea4c81e12ec330a3e2d9a27e;p=tahoe-lafs%2Ftahoe-lafs.git
reliability.py: fix the numpy conversion, it was completely broken. Thanks to Terrell Russell for the help.
---
diff --git a/src/allmydata/reliability.py b/src/allmydata/reliability.py
index 2729692a..76d930f3 100644
--- a/src/allmydata/reliability.py
+++ b/src/allmydata/reliability.py
@@ -2,7 +2,7 @@
import math
from allmydata.util import statistics
-from numpy import array
+from numpy import array, matrix, dot
DAY=24*60*60
MONTH=31*DAY
@@ -78,14 +78,14 @@ class ReliabilityModel:
#print "REPAIR:", repair
#print "DIFF:", (old_post_repair - decay * repair)
- START = array([[0]*N + [1]])
- ALIVE = array([[0]*k + [1]*(1+N-k)])
- DEAD = array([[1]*k + [0]*(1+N-k)])
- REPAIRp = array([[0]*k + [1]*(R-k) + [0]*(1+N-R)])
- REPAIR_newshares = array([[0]*k +
- [N-i for i in range(k, R)] +
- [0]*(1+N-R)])
- assert REPAIR_newshares.shape[1] == N+1
+ START = array([0]*N + [1])
+ ALIVE = array([0]*k + [1]*(1+N-k))
+ DEAD = array([1]*k + [0]*(1+N-k))
+ REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R))
+ REPAIR_newshares = array([0]*k +
+ [N-i for i in range(k, R)] +
+ [0]*(1+N-R))
+ assert REPAIR_newshares.shape[0] == N+1
#print "START", START
#print "ALIVE", ALIVE
#print "REPAIRp", REPAIRp
@@ -101,24 +101,25 @@ class ReliabilityModel:
report = ReliabilityReport()
for t in range(0, report_span+delta, delta):
- unmaintained_state = unmaintained_state * decay
- maintained_state = maintained_state * decay
+ # the .A[0] turns the one-row matrix back into an array
+ unmaintained_state = (unmaintained_state * decay).A[0]
+ maintained_state = (maintained_state * decay).A[0]
if (t-last_check) > check_period:
last_check = t
# we do a check-and-repair this frequently
- need_repair = (maintained_state * REPAIRp).sum()
+ need_repair = dot(maintained_state, REPAIRp)
P_repaired_last_check_period = need_repair
- new_shares = (maintained_state * REPAIR_newshares).sum()
+ new_shares = dot(maintained_state, REPAIR_newshares)
needed_repairs.append(need_repair)
needed_new_shares.append(new_shares)
- maintained_state = maintained_state * repair
+ maintained_state = (maintained_state * repair).A[0]
if (t-last_report) > report_period:
last_report = t
- P_dead_unmaintained = (unmaintained_state * DEAD).sum()
- P_dead_maintained = (maintained_state * DEAD).sum()
+ P_dead_unmaintained = dot(unmaintained_state, DEAD)
+ P_dead_maintained = dot(maintained_state, DEAD)
cumulative_number_of_repairs = sum(needed_repairs)
cumulative_number_of_new_shares = sum(needed_new_shares)
report.add_sample(t, unmaintained_state, maintained_state,
@@ -128,8 +129,8 @@ class ReliabilityModel:
P_dead_unmaintained, P_dead_maintained)
# record one more sample at the end of the run
- P_dead_unmaintained = (unmaintained_state * DEAD).sum()
- P_dead_maintained = (maintained_state * DEAD).sum()
+ P_dead_unmaintained = dot(unmaintained_state, DEAD)
+ P_dead_maintained = dot(maintained_state, DEAD)
cumulative_number_of_repairs = sum(needed_repairs)
cumulative_number_of_new_shares = sum(needed_new_shares)
report.add_sample(t, unmaintained_state, maintained_state,
@@ -174,7 +175,7 @@ class ReliabilityModel:
assert len(decay_row) == (N+1), len(decay_row)
decay_rows.append(decay_row)
- decay = array(decay_rows)
+ decay = matrix(decay_rows)
return decay
def build_decay_row(self, start_shares, P):
@@ -205,7 +206,7 @@ class ReliabilityModel:
new_repair_row[start_shares] = 1
new_repair_rows.append(new_repair_row)
- repair = array(new_repair_rows)
+ repair = matrix(new_repair_rows)
return repair
class ReliabilityReport:
diff --git a/src/allmydata/test/test_provisioning.py b/src/allmydata/test/test_provisioning.py
index 74c57c4a..7e1f9a2f 100644
--- a/src/allmydata/test/test_provisioning.py
+++ b/src/allmydata/test/test_provisioning.py
@@ -71,7 +71,41 @@ class Reliability(unittest.TestCase):
def test_basic(self):
if ReliabilityModel is None:
raise unittest.SkipTest("reliability model requires NumPy")
+
+ # test that numpy math works the way I think it does
+ import numpy
+ decay = numpy.matrix([[1,0,0],
+ [.1,.9,0],
+ [.01,.09,.9],
+ ])
+ start = numpy.array([0,0,1])
+ g2 = (start * decay).A[0]
+ self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9])))
+ g3 = (g2 * decay).A[0]
+ self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81])))
+
+ # and the dot product
+ recoverable = numpy.array([0,1,1])
+ P_recoverable_g2 = numpy.dot(g2, recoverable)
+ self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09)
+ P_recoverable_g3 = numpy.dot(g3, recoverable)
+ self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162)
+
r = ReliabilityModel.run(delta=100000,
report_period=3*MONTH,
report_span=5*YEAR)
self.failUnlessEqual(len(r.samples), 20)
+
+ last_row = r.samples[-1]
+ #print last_row
+ (when, unmaintained_shareprobs, maintained_shareprobs,
+ P_repaired_last_check_period,
+ cumulative_number_of_repairs,
+ cumulative_number_of_new_shares,
+ P_dead_unmaintained, P_dead_maintained) = last_row
+ self.failUnless(isinstance(P_repaired_last_check_period, float))
+ self.failUnless(isinstance(P_dead_unmaintained, float))
+ self.failUnless(isinstance(P_dead_maintained, float))
+ self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272)
+ self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08)
+
diff --git a/src/allmydata/web/reliability.xhtml b/src/allmydata/web/reliability.xhtml
index d8502031..b01c7927 100644
--- a/src/allmydata/web/reliability.xhtml
+++ b/src/allmydata/web/reliability.xhtml
@@ -38,7 +38,7 @@ repair bandwidth to configure on a Tahoe grid.
check period.
P_dead (unmaintained): the chance that the file will be unrecoverable
without periodic check+repair
- P_dead (maintained): the chance that the file will be recoverable even
+ P_dead (maintained): the chance that the file will be unrecoverable even
with periodic check+repair