bench/bench_zfec.py

   1 from zfec import easyfec, Encoder, filefec
   2 from pyutil import mathutil
   3
   4 import os, sys
   5
   6 from pyutil import benchutil
   7
   8 FNAME="benchrandom.data"
   9
  10 def _make_new_rand_file(size):
  11     open(FNAME, "wb").write(os.urandom(size))
  12
  13 def donothing(results, reslenthing):
  14     pass
  15
  16 K=3
  17 M=10
  18
  19 d = ""
  20 ds = []
  21 easyfecenc = None
  22 fecenc = None
  23 def _make_new_rand_data(size, k, m):
  24     global d, easyfecenc, fecenc, K, M
  25     K = k
  26     M = m
  27     d = os.urandom(size)
  28     del ds[:]
  29     ds.extend([None]*k)
  30     blocksize = mathutil.div_ceil(size, k)
  31     for i in range(k):
  32         ds[i] = d[i*blocksize:(i+1)*blocksize]
  33     ds[-1] = ds[-1] + "\x00" * (len(ds[-2]) - len(ds[-1]))
  34     easyfecenc = easyfec.Encoder(k, m)
  35     fecenc = Encoder(k, m)
  36
  37 import sha
  38 hashers = [ sha.new() for i in range(M) ]
  39 def hashem(results, reslenthing):
  40     for i, result in enumerate(results):
  41         hashers[i].update(result)
  42
  43 def _encode_file(N):
  44     filefec.encode_file(open(FNAME, "rb"), donothing, K, M)
  45
  46 def _encode_file_stringy(N):
  47     filefec.encode_file_stringy(open(FNAME, "rb"), donothing, K, M)
  48
  49 def _encode_file_stringy_easyfec(N):
  50     filefec.encode_file_stringy_easyfec(open(FNAME, "rb"), donothing, K, M)
  51
  52 def _encode_file_not_really(N):
  53     filefec.encode_file_not_really(open(FNAME, "rb"), donothing, K, M)
  54
  55 def _encode_file_not_really_and_hash(N):
  56     filefec.encode_file_not_really_and_hash(open(FNAME, "rb"), donothing, K, M)
  57
  58 def _encode_file_and_hash(N):
  59     filefec.encode_file(open(FNAME, "rb"), hashem, K, M)
  60
  61 def _encode_data_not_really(N):
  62     # This function is to see how long it takes to run the Python code
  63     # that does this benchmarking and accounting and so on but not
  64     # actually do any erasure-coding, in order to get an idea of how
  65     # much overhead there is in using Python.  This exercises the
  66     # basic behavior of allocating buffers to hold the secondary
  67     # shares.
  68     sz = N // K
  69     for i in range(M-K):
  70         x = '\x00' * sz
  71
  72 def _encode_data_easyfec(N):
  73     easyfecenc.encode(d)
  74
  75 def _encode_data_fec(N):
  76     fecenc.encode(ds)
  77
  78 def bench(k, m):
  79     SIZE = 10**6
  80     MAXREPS = 64
  81     # for f in [_encode_file_stringy_easyfec, _encode_file_stringy, _encode_file, _encode_file_not_really,]:
  82     # for f in [_encode_file,]:
  83     # for f in [_encode_file_not_really, _encode_file_not_really_and_hash, _encode_file, _encode_file_and_hash,]:
  84     # for f in [_encode_data_not_really, _encode_data_easyfec, _encode_data_fec,]:
  85     print "measuring encoding of data with K=%d, M=%d, reporting results in nanoseconds per byte after encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS)
  86     # for f in [_encode_data_fec, _encode_data_not_really]:
  87     for f in [_encode_data_fec]:
  88         def _init_func(size):
  89             return _make_new_rand_data(size, k, m)
  90         for BSIZE in [SIZE]:
  91             results = benchutil.rep_bench(f, n=BSIZE, initfunc=_init_func, MAXREPS=MAXREPS, MAXTIME=None, UNITS_PER_SECOND=1000000000)
  92             print "and now represented in MB/s..."
  93             print
  94             best = results['best']
  95             mean = results['mean']
  96             worst = results['worst']
  97             print "best:  % 4.3f MB/sec" % (10**3 / best)
  98             print "mean:  % 4.3f MB/sec" % (10**3 / mean)
  99             print "worst: % 4.3f MB/sec" % (10**3 / worst)
 100
 101 k = K
 102 m = M
 103 for arg in sys.argv:
 104     if arg.startswith('--k='):
 105         k = int(arg[len('--k='):])
 106     if arg.startswith('--m='):
 107         m = int(arg[len('--m='):])
 108
 109 bench(k, m)