pyfec: fix preconditions, tighten internal C types, fix bugs in the file-encoding...

author Zooko O'Whielacronx <zooko@zooko.com>

Tue, 30 Jan 2007 17:37:35 +0000 (10:37 -0700)

committer Zooko O'Whielacronx <zooko@zooko.com>

Tue, 30 Jan 2007 17:37:35 +0000 (10:37 -0700)
author Zooko O'Whielacronx <zooko@zooko.com>
Tue, 30 Jan 2007 17:37:35 +0000 (10:37 -0700)
committer Zooko O'Whielacronx <zooko@zooko.com>
Tue, 30 Jan 2007 17:37:35 +0000 (10:37 -0700)
diff --git a/pyfec/fec/_fecmodule.c b/pyfec/fec/_fecmodule.c

index 49627f261167d00642162d43defb26de079812ee..6bd5dc6d6b1d24ba90ad7e8eb47f6f1c8296e3a4 100644 (file)
--- a/pyfec/fec/_fecmodule.c
+++ b/pyfec/fec/_fecmodule.c
@@ -135,14 +135,14 @@ Encoder_encode(Encoder *self, PyObject *args) {
  
      gf* check_shares_produced[self->mm - self->kk]; /* This is an upper bound -- we will actually use only num_check_shares_produced of these elements (see below). */
      PyObject* pystrs_produced[self->mm - self->kk]; /* This is an upper bound -- we will actually use only num_check_shares_produced of these elements (see below). */
-    unsigned num_check_shares_produced = 0; /* The first num_check_shares_produced elements of the check_shares_produced array and of the pystrs_produced array will be used. */
+    unsigned char num_check_shares_produced = 0; /* The first num_check_shares_produced elements of the check_shares_produced array and of the pystrs_produced array will be used. */
      const gf* incshares[self->kk];
      unsigned char num_desired_shares;
      PyObject* fast_desired_shares_ids = NULL;
      PyObject** fast_desired_shares_ids_items;
      unsigned char c_desired_shares_ids[self->mm];
-    unsigned i;
-    unsigned prev_desired_id = 256; /* impossible value */
+    unsigned char c_desired_checkshares_ids[self->mm - self->kk];
+    unsigned char i;
      if (desired_shares_ids) {
          fast_desired_shares_ids = PySequence_Fast(desired_shares_ids, "Second argument (optional) was not a sequence.");
          num_desired_shares = PySequence_Fast_GET_SIZE(fast_desired_shares_ids);
@@ -151,11 +151,6 @@ Encoder_encode(Encoder *self, PyObject *args) {
              if (!PyInt_Check(fast_desired_shares_ids_items[i]))
                  goto err;
              c_desired_shares_ids[i] = PyInt_AsLong(fast_desired_shares_ids_items[i]);
-            if (prev_desired_id != 256 && prev_desired_id >= c_desired_shares_ids[i]) {
-                py_raise_fec_error("Precondition violation: first argument is required to be in order -- each requested shareid in the sequence must be a higher number than the previous one.  current requested shareid: %u, previous requested shareid: %u\n", c_desired_shares_ids[i], prev_desired_id);
-                goto err;
-            }
-            prev_desired_id = c_desired_shares_ids[i];
              if (c_desired_shares_ids[i] >= self->kk)
                  num_check_shares_produced++;
          }
@@ -181,7 +176,7 @@ Encoder_encode(Encoder *self, PyObject *args) {
      PyObject** fastinsharesitems = PySequence_Fast_ITEMS(fastinshares);
      if (!fastinsharesitems)
          goto err;
-    int sz, oldsz = 0;
+    Py_ssize_t sz, oldsz = 0;
      for (i=0; i<self->kk; i++) {
          if (!PyObject_CheckReadBuffer(fastinsharesitems[i])) {
              py_raise_fec_error("Precondition violation: %u'th item is required to offer the single-segment read character buffer protocol, but it does not.\n", i);
@@ -200,6 +195,7 @@ Encoder_encode(Encoder *self, PyObject *args) {
      unsigned check_share_index = 0; /* index into the check_shares_produced and (parallel) pystrs_produced arrays */
      for (i=0; i<num_desired_shares; i++) {
          if (c_desired_shares_ids[i] >= self->kk) {
+            c_desired_checkshares_ids[check_share_index] = c_desired_shares_ids[i];
              pystrs_produced[check_share_index] = PyString_FromStringAndSize(NULL, sz);
              if (pystrs_produced[check_share_index] == NULL)
                  goto err;
@@ -212,7 +208,7 @@ Encoder_encode(Encoder *self, PyObject *args) {
      assert (check_share_index == num_check_shares_produced);
  
      /* Encode any check shares that are needed. */
-    fec_encode(self->fec_matrix, incshares, check_shares_produced, c_desired_shares_ids, num_desired_shares, sz);
+    fec_encode(self->fec_matrix, incshares, check_shares_produced, c_desired_checkshares_ids, num_check_shares_produced, sz);
  
      /* Wrap all requested shares up into a Python list of Python strings. */
      result = PyList_New(num_desired_shares);
@@ -386,14 +382,14 @@ Decoder_decode(Decoder *self, PyObject *args) {
      PyObject*restrict shareids;
      PyObject* result = NULL;
  
-    if (!PyArg_ParseTuple(args, "O!O!", &PyList_Type, &shares, &PyList_Type, &shareids))
+    if (!PyArg_ParseTuple(args, "OO", &shares, &shareids))
          return NULL;
  
      const gf*restrict cshares[self->kk];
      unsigned char cshareids[self->kk];
      gf*restrict recoveredcstrs[self->kk]; /* self->kk is actually an upper bound -- we probably won't need all of this space. */
      PyObject*restrict recoveredpystrs[self->kk]; /* self->kk is actually an upper bound -- we probably won't need all of this space. */
-    unsigned i;
+    unsigned char i;
      for (i=0; i<self->kk; i++)
          recoveredpystrs[i] = NULL;
      PyObject*restrict fastshares = PySequence_Fast(shares, "First argument was not a sequence.");
@@ -413,14 +409,14 @@ Decoder_decode(Decoder *self, PyObject *args) {
      }
  
      /* Construct a C array of gf*'s of the data and another of C ints of the shareids. */
-    unsigned needtorecover=0;
+    unsigned char needtorecover=0;
      PyObject** fastshareidsitems = PySequence_Fast_ITEMS(fastshareids);
      if (!fastshareidsitems)
          goto err;
      PyObject** fastsharesitems = PySequence_Fast_ITEMS(fastshares);
      if (!fastsharesitems)
          goto err;
-    int sz, oldsz = 0;
+    Py_ssize_t sz, oldsz = 0;
      for (i=0; i<self->kk; i++) {
          if (!PyInt_Check(fastshareidsitems[i]))
              goto err;
@@ -437,7 +433,7 @@ Decoder_decode(Decoder *self, PyObject *args) {
              py_raise_fec_error("Precondition violation: %u'th item is required to offer the single-segment read character buffer protocol, but it does not.\n", i);
              goto err;
          }
-        if (PyObject_AsReadBuffer(fastsharesitems[i], &(cshares[i]),  &sz))
+        if (PyObject_AsReadBuffer(fastsharesitems[i], &(cshares[i]), &sz))
              goto err;
          if (oldsz != 0 && oldsz != sz) {
              py_raise_fec_error("Precondition violation: Input shares are required to be all the same length.  oldsz: %Zu, sz: %Zu\n", oldsz, sz);
@@ -474,7 +470,7 @@ Decoder_decode(Decoder *self, PyObject *args) {
      fec_decode(self->fec_matrix, cshares, recoveredcstrs, cshareids, sz);
  
      /* Wrap up both original primary shares and decoded shares into a Python list of Python strings. */
-    unsigned nextrecoveredix=0;
+    unsigned char nextrecoveredix=0;
      result = PyList_New(self->kk);
      if (result == NULL)
          goto err;
diff --git a/pyfec/fec/filefec.py b/pyfec/fec/filefec.py

index 8acc0c9bf8901c61faa7b0e4eadbfbac4c97f5fa..4899eded3749690061c0cbbc65fb72ba1960bdc7 100644 (file)
--- a/pyfec/fec/filefec.py
+++ b/pyfec/fec/filefec.py
@@ -22,21 +22,21 @@
  
  import fec
  
-import array
+import array, random
  
  def encode_to_files(inf, prefix, k, m):
      """
      Encode inf, writing the shares to named $prefix+$shareid.
      """
      l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
-    def cb(shares, len):
+    def cb(shares, length):
          assert len(shares) == len(l)
          for i in range(len(shares)):
-            l.write(share)
+            l[i].write(shares[i])
  
      encode_file(inf, cb, k, m, chunksize=4096)
   
-def decode_from_files(outf, prefix, k, m):
+def decode_from_files(outf, filesize, prefix, k, m):
      """
      Decode from the first k files in the current directory whose names begin 
      with prefix, writing the results to outf.
@@ -44,7 +44,9 @@ def decode_from_files(outf, prefix, k, m):
      import os
      infs = []
      shareids = []
-    for f in os.listdir("."):
+    listd = os.listdir(".")
+    random.shuffle(listd)
+    for f in listd:
          if f.startswith(prefix):
              infs.append(open(f, "rb"))
              shareids.append(int(f[len(prefix):]))
@@ -57,9 +59,12 @@ def decode_from_files(outf, prefix, k, m):
          x = [ inf.read(CHUNKSIZE) for inf in infs ]
          decshares = dec.decode(x, shareids)
          for decshare in decshares:
-            outf.write(decshare)
-        if len(x[-1]) != CHUNKSIZE:
-            break
+            if filesize >= len(decshare):
+                outf.write(decshare)
+                filesize -= len(decshare)
+            else: 
+                outf.write(decshare[:filesize])
+                return
  
  def encode_file(inf, cb, k, m, chunksize=4096):
      """
@@ -110,6 +115,7 @@ def encode_file(inf, cb, k, m, chunksize=4096):
                  # padding
                  a.fromstring("\x00" * (chunksize-len(a)))
                  while (i<len(l)):
+                    a = l[i]
                      a[:] = ZEROES
                      i += 1
  
@@ -144,7 +150,7 @@ def encode_file_stringy(inf, cb, k, m, chunksize=4096):
      while indatasize == k*chunksize:
          # This loop body executes once per segment.
          i = 0
-       l = []
+        l = []
          ZEROES = '\x00'*chunksize
          while i<k:
              # This loop body executes once per chunk.
diff --git a/pyfec/fec/test/bench_pyfec.py b/pyfec/fec/test/bench_pyfec.py

index 03311704f83f121d68f15179ebd6bd00772cf3f9..613c67ff57d144de59c59c25066e5786a758ff8d 100644 (file)
--- a/pyfec/fec/test/bench_pyfec.py
+++ b/pyfec/fec/test/bench_pyfec.py
@@ -22,7 +22,7 @@
  
  import fec
  
-import array
+import array, random
  
  def bench_encode_to_files_shuffle_decode_from_files():
      FILESIZE=1000000
@@ -47,15 +47,15 @@ def bench_encode_to_files_shuffle_decode_from_files():
          print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, M, so-st, FILESIZE/((so-st)*1000000),)
          enctime = so-st
          # Now delete m-k of the tempfiles at random.
-        tempfs = [ f for f in os.listdir(".") if f.startwith(PREFIX) ]
-        tempfs.shuffle()
+        tempfs = [ f for f in os.listdir(".") if f.startswith(PREFIX) ]
+        random.shuffle(tempfs)
          for victimtempf in tempfs[:M-K]:
              os.remove(victimtempf)
          recoveredfile = open("tmpranddata-recovered", "wb")
          st = time.time()
-        fec.filefec.decode_from_files(recoveredfile, PREFIX, K, M)
+        fec.filefec.decode_from_files(recoveredfile, 1000000, PREFIX, K, M)
          so = time.time()
-        print "Encoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, K, so-st, FILESIZE/((so-st)*1000000),)
+        print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, K, so-st, FILESIZE/((so-st)*1000000),)
          return enctime + (so-st)
      finally:
          # os.remove("tmpranddata")
author	Zooko O'Whielacronx <zooko@zooko.com>
	Tue, 30 Jan 2007 17:37:35 +0000 (10:37 -0700)
committer	Zooko O'Whielacronx <zooko@zooko.com>
	Tue, 30 Jan 2007 17:37:35 +0000 (10:37 -0700)
pyfec/fec/_fecmodule.c		patch \| blob \| history
pyfec/fec/filefec.py		patch \| blob \| history
pyfec/fec/test/bench_pyfec.py		patch \| blob \| history