def foo(): pass # keep the line number constant
-import os, time, random
+import os, time
+from StringIO import StringIO
from twisted.trial import unittest
from twisted.internet import defer, reactor
from twisted.python import failure
self.should_assert("Should assert if not 0<=p<=1", f, 1, -1)
self.should_assert("Should assert if n < 1", f, 0, .1)
+ out = StringIO()
+ statistics.print_pmf(pmf_comp, out=out)
+ lines = out.getvalue().splitlines()
+ self.failUnlessEqual(lines[0], "i=0: 0.81")
+ self.failUnlessEqual(lines[1], "i=1: 0.18")
+ self.failUnlessEqual(lines[2], "i=2: 0.01")
+
def test_survival_pmf(self):
f = statistics.survival_pmf
# Cross-check binomial-distribution method against convolution
self.failUnlessTrue(statistics.valid_pmf(pmf1))
self.should_assert("Should assert if p_i > 1", f, [1.1]);
self.should_assert("Should assert if p_i < 0", f, [-.1]);
-
+
+ def test_repair_count_pmf(self):
+ survival_pmf = statistics.binomial_distribution_pmf(5, .9)
+ repair_pmf = statistics.repair_count_pmf(survival_pmf, 3)
+ # repair_pmf[0] == sum(survival_pmf[0,1,2,5])
+ # repair_pmf[1] == survival_pmf[4]
+ # repair_pmf[2] = survival_pmf[3]
+ self.failUnlessListAlmostEqual(repair_pmf,
+ [0.00001 + 0.00045 + 0.0081 + 0.59049,
+ .32805,
+ .0729,
+ 0, 0, 0])
+
+ def test_repair_cost(self):
+ survival_pmf = statistics.binomial_distribution_pmf(5, .9)
+ bwcost = statistics.bandwidth_cost_function
+ cost = statistics.mean_repair_cost(bwcost, 1000,
+ survival_pmf, 3, ul_dl_ratio=1.0)
+ self.failUnlessAlmostEqual(cost, 558.90)
+ cost = statistics.mean_repair_cost(bwcost, 1000,
+ survival_pmf, 3, ul_dl_ratio=8.0)
+ self.failUnlessAlmostEqual(cost, 1664.55)
+
+ # I haven't manually checked the math beyond here -warner
+ cost = statistics.eternal_repair_cost(bwcost, 1000,
+ survival_pmf, 3,
+ discount_rate=0, ul_dl_ratio=1.0)
+ self.failUnlessAlmostEqual(cost, 65292.056074766246)
+ cost = statistics.eternal_repair_cost(bwcost, 1000,
+ survival_pmf, 3,
+ discount_rate=0.05,
+ ul_dl_ratio=1.0)
+ self.failUnlessAlmostEqual(cost, 9133.6097158191551)
def test_convolve(self):
f = statistics.convolve
def test_find_k(self):
f = statistics.find_k
g = statistics.pr_file_loss
- plist = [.9] * 10 + [.8] * 10
+ plist = [.9] * 10 + [.8] * 10 # N=20
t = .0001
k = f(plist, t)
self.failUnlessEqual(k, 10)
from __future__ import division
from mathutil import round_sigfigs
import math
-import array
+import sys
def pr_file_loss(p_list, k):
"""
pmf_list = [ [1 - p, p] for p in p_list ];
return reduce(convolve, pmf_list)
-def print_pmf(pmf, n=4):
+def print_pmf(pmf, n=4, out=sys.stdout):
"""
Print a PMF in a readable form, with values rounded to n
significant digits.
"""
for k, p in enumerate(pmf):
- print "i=" + str(k) + ":", round_sigfigs(p, n)
+ print >>out, "i=" + str(k) + ":", round_sigfigs(p, n)
def pr_backup_file_loss(p_list, backup_p, k):
"""
if loss_prob > target_loss_prob:
return k
+ # we shouldn't be able to get here, since sum(pmf)==1.0
k = len(pmf) - 1
return k
def bandwidth_cost_function(file_size, shares, k, ul_dl_ratio):
return file_size + float(file_size) / k * shares * ul_dl_ratio
-def mean_repair_cost(cost_function, file_size, survival_pmf, k):
+def mean_repair_cost(cost_function, file_size, survival_pmf, k, ul_dl_ratio):
"""
Return the expected cost for a repair run on a file with the given
- survival_pmf and requiring k shares.
+ survival_pmf and requiring k shares, in which upload cost is
+ 'ul_dl_ratio' times download cost.
"""
repair_pmf = repair_count_pmf(survival_pmf, k)
- exp_cnt = sum([d * repair_pmf[d] for d in range(1, len(repair_pmf))])
- return cost_function(file_size, exp_cnt, k)
+ expected_cost = sum([cost_function(file_size, new_shares, k, ul_dl_ratio)
+ * repair_pmf[new_shares]
+ for new_shares in range(1, len(repair_pmf))])
+ return expected_cost
-def eternal_repair_cost(cost_function, file_size, survival_pmf, k, discount_rate=0):
+def eternal_repair_cost(cost_function, file_size, survival_pmf, k,
+ discount_rate=0, ul_dl_ratio=1.0):
"""
Calculate the eternal repair cost for a file that is aggressively
- repaired.
+ repaired, i.e. the sum of repair costs until the file is dead.
"""
- c = mean_repair_cost(cost_function, file_size, survival_pmf, k)
+ c = mean_repair_cost(cost_function, file_size, survival_pmf, k, ul_dl_ratio)
f = 1 - sum(survival_pmf[0:k])
- r = discount_rate
+ r = float(discount_rate)
return (c * (1-r)) / (1 - (1-r) * f)
"""
assert n >= k
- if k > n:
- return 0
-
if k > n/2:
k = n - k