trivial: hush pyflakes

[tahoe-lafs/zfec.git] / zfec / zfec / filefec.py
diff --git a/zfec/zfec/filefec.py b/zfec/zfec/filefec.py

index 47e4e0fa1d8202191b3d4d254889e97f8c5a1e0e..682c387a951d822bda8d24bd17a3ad37184c82fe 100644 (file)
--- a/zfec/zfec/filefec.py
+++ b/zfec/zfec/filefec.py
@@ -1,11 +1,22 @@
  import easyfec, zfec
-from util import fileutil
-from util.mathutil import log_ceil
+from pyutil import fileutil
+from pyutil.mathutil import pad_size, log_ceil
  
-import array, os, re, struct, traceback
+import array, os, struct
  
  CHUNKSIZE = 4096
  
+from base64 import b32encode
+def ab(x): # debuggery
+    if len(x) >= 3:
+        return "%s:%s" % (len(x), b32encode(x[-3:]),)
+    elif len(x) == 2:
+        return "%s:%s" % (len(x), b32encode(x[-2:]),)
+    elif len(x) == 1:
+        return "%s:%s" % (len(x), b32encode(x[-1:]),)
+    elif len(x) == 0:
+        return "%s:%s" % (len(x), "--empty--",)
+
  class InsufficientShareFilesError(zfec.Error):
      def __init__(self, k, kb, *args, **kwargs):
          zfec.Error.__init__(self, *args, **kwargs)
@@ -23,17 +34,17 @@ class CorruptedShareFilesError(zfec.Error):
  
  def _build_header(m, k, pad, sh):
      """
-    @param m: the total number of shares; 3 <= m <= 256
-    @param k: the number of shares required to reconstruct; 2 <= k < m
+    @param m: the total number of shares; 1 <= m <= 256
+    @param k: the number of shares required to reconstruct; 1 <= k <= m
      @param pad: the number of bytes of padding added to the file before encoding; 0 <= pad < k
      @param sh: the shnum of this share; 0 <= k < m
  
-    @return: a string (which is hopefully short) encoding m, k, sh, and pad
+    @return: a compressed string encoding m, k, pad, and sh
      """
-    assert m >= 3
+    assert m >= 1
      assert m <= 2**8
-    assert k >= 2
-    assert k < m
+    assert k >= 1
+    assert k <= m
      assert pad >= 0
      assert pad < k
  
@@ -43,14 +54,14 @@ def _build_header(m, k, pad, sh):
      bitsused = 0
      val = 0
  
-    val |= (m - 3)
+    val |= (m - 1)
      bitsused += 8 # the first 8 bits always encode m
  
-    kbits = log_ceil(m-2, 2) # num bits needed to store all possible values of k
+    kbits = log_ceil(m, 2) # num bits needed to store all possible values of k
      val <<= kbits
      bitsused += kbits
  
-    val |= (k - 2)
+    val |= (k - 1)
  
      padbits = log_ceil(k, 2) # num bits needed to store all possible values of pad
      val <<= padbits
@@ -64,8 +75,8 @@ def _build_header(m, k, pad, sh):
  
      val |= sh
  
-    assert bitsused >= 11
-    assert bitsused <= 32
+    assert bitsused >= 8, bitsused
+    assert bitsused <= 32, bitsused
  
      if bitsused <= 16:
          val <<= (16-bitsused)
@@ -98,17 +109,17 @@ def _parse_header(inf):
      if not ch:
          raise CorruptedShareFilesError("Share files were corrupted -- share file %r didn't have a complete metadata header at the front.  Perhaps the file was truncated." % (inf.name,))
      byte = ord(ch)
-    m = byte + 3
+    m = byte + 1
  
      # The next few bits encode k.
-    kbits = log_ceil(m-2, 2) # num bits needed to store all possible values of k
+    kbits = log_ceil(m, 2) # num bits needed to store all possible values of k
      b2_bits_left = 8-kbits
      kbitmask = MASK(kbits) << b2_bits_left
      ch = inf.read(1)
      if not ch:
          raise CorruptedShareFilesError("Share files were corrupted -- share file %r didn't have a complete metadata header at the front.  Perhaps the file was truncated." % (inf.name,))
      byte = ord(ch)
-    k = ((byte & kbitmask) >> b2_bits_left) + 2
+    k = ((byte & kbitmask) >> b2_bits_left) + 1
  
      shbits = log_ceil(m, 2) # num bits needed to store all possible values of shnum
      padbits = log_ceil(k, 2) # num bits needed to store all possible values of pad
@@ -160,7 +171,7 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite=
      mlen = len(str(m))
      format = FORMAT_FORMAT % (mlen, mlen,)
  
-    padbytes = zfec.util.mathutil.pad_size(fsize, k)
+    padbytes = pad_size(fsize, k)
  
      fns = []
      fs = []
@@ -216,9 +227,9 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite=
      return 0
  
  # Note: if you really prefer base-2 and you change this code, then please
-# denote 2^20 as "MiB" instead of "MB" in order to avoid ambiguity.
-# Thanks.
+# denote 2^20 as "MiB" instead of "MB" in order to avoid ambiguity.  See:
  # http://en.wikipedia.org/wiki/Megabyte
+# Thanks.
  MILLION_BYTES=10**6
  
  def decode_from_files(outf, infiles, verbose=False):
@@ -336,6 +347,79 @@ def encode_file(inf, cb, k, m, chunksize=4096):
          res = enc.encode(l)
          cb(res, indatasize)
  
+try:
+    from hashlib import sha1
+    sha1 = sha1 # hush pyflakes
+except ImportError:
+    # hashlib was added in Python 2.5.0.
+    import sha
+    sha1 = sha
+
+def encode_file_not_really(inf, cb, k, m, chunksize=4096):
+    enc = zfec.Encoder(k, m)
+    l = tuple([ array.array('c') for i in range(k) ])
+    indatasize = k*chunksize # will be reset to shorter upon EOF
+    eof = False
+    ZEROES=array.array('c', ['\x00'])*chunksize
+    while not eof:
+        # This loop body executes once per segment.
+        i = 0
+        while (i<len(l)):
+            # This loop body executes once per chunk.
+            a = l[i]
+            del a[:]
+            try:
+                a.fromfile(inf, chunksize)
+                i += 1
+            except EOFError:
+                eof = True
+                indatasize = i*chunksize + len(a)
+                
+                # padding
+                a.fromstring("\x00" * (chunksize-len(a)))
+                i += 1
+                while (i<len(l)):
+                    a = l[i]
+                    a[:] = ZEROES
+                    i += 1
+
+        # res = enc.encode(l)
+        cb(None, None)
+
+def encode_file_not_really_and_hash(inf, cb, k, m, chunksize=4096):
+    hasher = sha1.new()
+    enc = zfec.Encoder(k, m)
+    l = tuple([ array.array('c') for i in range(k) ])
+    indatasize = k*chunksize # will be reset to shorter upon EOF
+    eof = False
+    ZEROES=array.array('c', ['\x00'])*chunksize
+    while not eof:
+        # This loop body executes once per segment.
+        i = 0
+        while (i<len(l)):
+            # This loop body executes once per chunk.
+            a = l[i]
+            del a[:]
+            try:
+                a.fromfile(inf, chunksize)
+                i += 1
+            except EOFError:
+                eof = True
+                indatasize = i*chunksize + len(a)
+                
+                # padding
+                a.fromstring("\x00" * (chunksize-len(a)))
+                i += 1
+                while (i<len(l)):
+                    a = l[i]
+                    a[:] = ZEROES
+                    i += 1
+
+        # res = enc.encode(l)
+        for thing in l:
+            hasher.update(thing)
+        cb(None, None)
+
  def encode_file_stringy(inf, cb, k, m, chunksize=4096):
      """
      Read in the contents of inf, encode, and call cb with the results.
@@ -416,20 +500,5 @@ def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096):
  # Author: Zooko Wilcox-O'Hearn
  # 
  # This file is part of zfec.
-# 
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or (at your option)
-# any later version, with the added permission that, if you become obligated
-# to release a derived work under this licence (as per section 2.b), you may
-# delay the fulfillment of this obligation for up to 12 months.  See the file
-# COPYING for details.
  #
-# If you would like to inquire about a commercial relationship with Allmydata,
-# Inc., please contact partnerships@allmydata.com and visit
-# http://allmydata.com/.
-# 
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
+# See README.txt for licensing information.