2 * zfec -- fast forward error correction library with Python interface
13 // actually, some of the flavors (i.e. Enterprise) do support restrict
14 //#define restrict __restrict
16 #define inline __inline
17 #define alloca _alloca
20 #define alloca(x) __builtin_alloca(x)
28 * Primitive polynomials - see Lin & Costello, Appendix A,
29 * and Lee & Messerschmitt, p. 453.
31 static const char*const Pp="101110001";
35 * To speed up computations, we have tables for logarithm, exponent and
36 * inverse of a number. We use a table for multiplication as well (it takes
37 * 64K, no big deal even on a PDA, especially because it can be
38 * pre-initialized an put into a ROM!), otherwhise we use a table of
39 * logarithms. In any case the macro gf_mul(x,y) takes care of
43 static gf gf_exp[510]; /* index->poly form conversion table */
44 static int gf_log[256]; /* Poly->index form conversion table */
45 static gf inverse[256]; /* inverse of field elem. */
46 /* inv[\alpha**i]=\alpha**(GF_SIZE-i-1) */
49 * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
50 * without a slow divide.
56 x = (x >> 8) + (x & 255);
61 #define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
64 * gf_mul(x,y) multiplies two numbers. It is much faster to use a
65 * multiplication table.
67 * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
68 * many numbers by the same constant. In this case the first call sets the
69 * constant, and others perform the multiplications. A value related to the
70 * multiplication is held in a local variable declared with USE_GF_MULC . See
71 * usage in _addmul1().
73 static gf gf_mul_table[256][256];
75 #define gf_mul(x,y) gf_mul_table[x][y]
77 #define USE_GF_MULC register gf * __gf_mulc_
79 #define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
80 #define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
83 * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
85 * index->polynomial form gf_exp[] contains j= \alpha^i;
86 * polynomial form -> index form gf_log[ j = \alpha^i ] = i
87 * \alpha=x is the primitive element of GF(2^m)
89 * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
90 * multiplication of two numbers can be resolved without calling modnn
93 _init_mul_table(void) {
95 for (i = 0; i < 256; i++)
96 for (j = 0; j < 256; j++)
97 gf_mul_table[i][j] = gf_exp[modnn (gf_log[i] + gf_log[j])];
99 for (j = 0; j < 256; j++)
100 gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
103 #define NEW_GF_MATRIX(rows, cols) \
104 (gf*)malloc(rows * cols)
107 * initialize the data structures used for computations in GF.
114 mask = 1; /* x ** 0 = 1 */
115 gf_exp[8] = 0; /* will be updated at the end of the 1st loop */
117 * first, generate the (polynomial representation of) powers of \alpha,
118 * which are stored in gf_exp[i] = \alpha ** i .
119 * At the same time build gf_log[gf_exp[i]] = i .
120 * The first 8 powers are simply bits shifted to the left.
122 for (i = 0; i < 8; i++, mask <<= 1) {
124 gf_log[gf_exp[i]] = i;
126 * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
127 * gf_exp[8] = \alpha ** 8
133 * now gf_exp[8] = \alpha ** 8 is complete, so can also
134 * compute its inverse.
136 gf_log[gf_exp[8]] = 8;
138 * Poly-repr of \alpha ** (i+1) is given by poly-repr of
139 * \alpha ** i shifted left one-bit and accounting for any
140 * \alpha ** 8 term that may occur when poly-repr of
141 * \alpha ** i is shifted.
144 for (i = 9; i < 255; i++) {
145 if (gf_exp[i - 1] >= mask)
146 gf_exp[i] = gf_exp[8] ^ ((gf_exp[i - 1] ^ mask) << 1);
148 gf_exp[i] = gf_exp[i - 1] << 1;
149 gf_log[gf_exp[i]] = i;
152 * log(0) is not defined, so use a special value
155 /* set the extended gf_exp values for fast multiply */
156 for (i = 0; i < 255; i++)
157 gf_exp[i + 255] = gf_exp[i];
160 * again special cases. 0 has no inverse. This used to
161 * be initialized to 255, but it should make no difference
162 * since noone is supposed to read from here.
166 for (i = 2; i <= 255; i++)
167 inverse[i] = gf_exp[255 - gf_log[i]];
171 * Various linear algebra operations that i use often.
175 * addmul() computes dst[] = dst[] + c * src[]
176 * This is used often, so better optimize it! Currently the loop is
177 * unrolled 16 times, a good value for 486 and pentium-class machines.
178 * The case c=0 is also optimized, whereas c=1 is not. These
179 * calls are unfrequent in my typical apps so I did not bother.
181 #define addmul(dst, src, c, sz) \
182 if (c != 0) _addmul1(dst, src, c, sz)
184 #define UNROLL 16 /* 1, 4, 8, 16 */
186 _addmul1(register gf*restrict dst, const register gf*restrict src, gf c, size_t sz) {
188 const gf* lim = &dst[sz - UNROLL + 1];
192 #if (UNROLL > 1) /* unrolling by 8/16 is quite effective on the pentium */
193 for (; dst < lim; dst += UNROLL, src += UNROLL) {
194 GF_ADDMULC (dst[0], src[0]);
195 GF_ADDMULC (dst[1], src[1]);
196 GF_ADDMULC (dst[2], src[2]);
197 GF_ADDMULC (dst[3], src[3]);
199 GF_ADDMULC (dst[4], src[4]);
200 GF_ADDMULC (dst[5], src[5]);
201 GF_ADDMULC (dst[6], src[6]);
202 GF_ADDMULC (dst[7], src[7]);
205 GF_ADDMULC (dst[8], src[8]);
206 GF_ADDMULC (dst[9], src[9]);
207 GF_ADDMULC (dst[10], src[10]);
208 GF_ADDMULC (dst[11], src[11]);
209 GF_ADDMULC (dst[12], src[12]);
210 GF_ADDMULC (dst[13], src[13]);
211 GF_ADDMULC (dst[14], src[14]);
212 GF_ADDMULC (dst[15], src[15]);
217 for (; dst < lim; dst++, src++) /* final components */
218 GF_ADDMULC (*dst, *src);
222 * computes C = AB where A is n*k, B is k*m, C is n*m
225 _matmul(gf * a, gf * b, gf * c, unsigned n, unsigned k, unsigned m) {
226 unsigned row, col, i;
228 for (row = 0; row < n; row++) {
229 for (col = 0; col < m; col++) {
230 gf *pa = &a[row * k];
233 for (i = 0; i < k; i++, pa++, pb += m)
234 acc ^= gf_mul (*pa, *pb);
235 c[row * m + col] = acc;
241 * _invert_mat() takes a matrix and produces its inverse
242 * k is the size of the matrix.
243 * (Gauss-Jordan, adapted from Numerical Recipes in C)
244 * Return non-zero if singular.
247 _invert_mat(gf* src, unsigned k) {
251 unsigned row, col, i, ix;
253 unsigned* indxc = (unsigned*) malloc (k * sizeof(unsigned));
254 unsigned* indxr = (unsigned*) malloc (k * sizeof(unsigned));
255 unsigned* ipiv = (unsigned*) malloc (k * sizeof(unsigned));
256 gf *id_row = NEW_GF_MATRIX (1, k);
258 memset (id_row, '\0', k * sizeof (gf));
260 * ipiv marks elements already used as pivots.
262 for (i = 0; i < k; i++)
265 for (col = 0; col < k; col++) {
268 * Zeroing column 'col', look for a non-zero element.
269 * First try on the diagonal, if it fails, look elsewhere.
271 if (ipiv[col] != 1 && src[col * k + col] != 0) {
276 for (row = 0; row < k; row++) {
277 if (ipiv[row] != 1) {
278 for (ix = 0; ix < k; ix++) {
280 if (src[row * k + ix] != 0) {
286 assert (ipiv[ix] <= 1);
293 * swap rows irow and icol, so afterwards the diagonal
294 * element will be correct. Rarely done, not worth
298 for (ix = 0; ix < k; ix++)
299 SWAP (src[irow * k + ix], src[icol * k + ix], gf);
302 pivot_row = &src[icol * k];
305 if (c != 1) { /* otherwhise this is a NOP */
307 * this is done often , but optimizing is not so
308 * fruitful, at least in the obvious ways (unrolling)
312 for (ix = 0; ix < k; ix++)
313 pivot_row[ix] = gf_mul (c, pivot_row[ix]);
316 * from all rows, remove multiples of the selected row
317 * to zero the relevant entry (in fact, the entry is not zero
318 * because we know it must be zero).
319 * (Here, if we know that the pivot_row is the identity,
320 * we can optimize the addmul).
323 if (memcmp (pivot_row, id_row, k * sizeof (gf)) != 0) {
324 for (p = src, ix = 0; ix < k; ix++, p += k) {
328 addmul (p, pivot_row, c, k);
333 } /* done all columns */
334 for (col = k; col > 0; col--)
335 if (indxr[col-1] != indxc[col-1])
336 for (row = 0; row < k; row++)
337 SWAP (src[row * k + indxr[col-1]], src[row * k + indxc[col-1]], gf);
341 * fast code for inverting a vandermonde matrix.
343 * NOTE: It assumes that the matrix is not singular and _IS_ a vandermonde
344 * matrix. Only uses the second column of the matrix, containing the p_i's.
346 * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
347 * revised for my purposes.
348 * p = coefficients of the matrix (p_i)
349 * q = values of the polynomial (known)
352 _invert_vdm (gf* src, unsigned k) {
353 unsigned i, j, row, col;
357 if (k == 1) /* degenerate case, matrix must be p^0 = 1 */
360 * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
361 * b holds the coefficient for the matrix inversion
363 c = NEW_GF_MATRIX (1, k);
364 b = NEW_GF_MATRIX (1, k);
366 p = NEW_GF_MATRIX (1, k);
368 for (j = 1, i = 0; i < k; i++, j += k) {
370 p[i] = src[j]; /* p[i] */
373 * construct coeffs. recursively. We know c[k] = 1 (implicit)
374 * and start P_0 = x - p_0, then at each stage multiply by
375 * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
376 * After k steps we are done.
378 c[k - 1] = p[0]; /* really -p(0), but x = -x in GF(2^m) */
379 for (i = 1; i < k; i++) {
380 gf p_i = p[i]; /* see above comment */
381 for (j = k - 1 - (i - 1); j < k - 1; j++)
382 c[j] ^= gf_mul (p_i, c[j + 1]);
386 for (row = 0; row < k; row++) {
388 * synthetic division etc.
392 b[k - 1] = 1; /* this is in fact c[k] */
393 for (i = k - 1; i > 0; i--) {
394 b[i-1] = c[i] ^ gf_mul (xx, b[i]);
395 t = gf_mul (xx, t) ^ b[i-1];
397 for (col = 0; col < k; col++)
398 src[col * k + row] = gf_mul (inverse[t], b[col]);
406 static int fec_initialized = 0;
415 * This section contains the proper FEC encoding/decoding routines.
416 * The encoding matrix is computed starting with a Vandermonde matrix,
417 * and then transforming it into a systematic matrix.
420 #define FEC_MAGIC 0xFECC0DEC
423 fec_free (fec_t *p) {
424 assert (p != NULL && p->magic == (((FEC_MAGIC ^ p->k) ^ p->n) ^ (unsigned long) (p->enc_matrix)));
425 free (p->enc_matrix);
430 fec_new(unsigned k, unsigned n) {
436 if (fec_initialized == 0)
439 retval = (fec_t *) malloc (sizeof (fec_t));
442 retval->enc_matrix = NEW_GF_MATRIX (n, k);
443 retval->magic = ((FEC_MAGIC ^ k) ^ n) ^ (unsigned long) (retval->enc_matrix);
444 tmp_m = NEW_GF_MATRIX (n, k);
446 * fill the matrix with powers of field elements, starting from 0.
447 * The first row is special, cannot be computed with exp. table.
450 for (col = 1; col < k; col++)
452 for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k)
453 for (col = 0; col < k; col++)
454 p[col] = gf_exp[modnn (row * col)];
457 * quick code to build systematic matrix: invert the top
458 * k*k vandermonde matrix, multiply right the bottom n-k rows
459 * by the inverse, and construct the identity matrix at the top.
461 _invert_vdm (tmp_m, k); /* much faster than _invert_mat */
462 _matmul(tmp_m + k * k, tmp_m, retval->enc_matrix + k * k, n - k, k, k);
464 * the upper matrix is I so do not bother with a slow multiply
466 memset (retval->enc_matrix, '\0', k * k * sizeof (gf));
467 for (p = retval->enc_matrix, col = 0; col < k; col++, p += k + 1)
474 /* To make sure that we stay within cache in the inner loops of fec_encode()
479 fec_encode(const fec_t* code, const gf*restrict const*restrict const src, gf*restrict const*restrict const fecs, const unsigned*restrict const block_nums, size_t num_block_nums, size_t sz) {
485 for (k = 0; k < sz; k += STRIDE) {
486 size_t stride = ((sz-k) < STRIDE)?(sz-k):STRIDE;
487 for (i=0; i<num_block_nums; i++) {
488 fecnum=block_nums[i];
489 assert (fecnum >= code->k);
490 memset(fecs[i]+k, 0, stride);
491 p = &(code->enc_matrix[fecnum * code->k]);
492 for (j = 0; j < code->k; j++)
493 addmul(fecs[i]+k, src[j]+k, p[j], stride);
499 * Build decode matrix into some memory space.
501 * @param matrix a space allocated for a k by k matrix
504 build_decode_matrix_into_space(const fec_t*restrict const code, const unsigned*const restrict index, const unsigned k, gf*restrict const matrix) {
507 for (i=0, p=matrix; i < k; i++, p += k) {
512 memcpy(p, &(code->enc_matrix[index[i] * code->k]), k);
515 _invert_mat (matrix, k);
519 fec_decode(const fec_t* code, const gf*restrict const*restrict const inpkts, gf*restrict const*restrict const outpkts, const unsigned*restrict const index, size_t sz) {
520 gf* m_dec = (gf*)alloca(code->k * code->k);
521 unsigned char outix=0;
524 build_decode_matrix_into_space(code, index, code->k, m_dec);
526 for (row=0; row<code->k; row++) {
527 if (index[row] >= code->k) {
528 memset(outpkts[outix], 0, sz);
529 for (col=0; col < code->k; col++)
530 addmul(outpkts[outix], inpkts[col], m_dec[row * code->k + col], sz);
537 * zfec -- fast forward error correction library with Python interface
539 * Copyright (C) 2007 Allmydata, Inc.
540 * Author: Zooko Wilcox-O'Hearn
542 * This file is part of zfec.
544 * See README.txt for licensing information.
548 * This work is derived from the "fec" software by Luigi Rizzo, et al., the
549 * copyright notice and licence terms of which are included below for reference.
550 * fec.c -- forward error correction based on Vandermonde matrices 980624 (C)
551 * 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
553 * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
554 * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
555 * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
557 * Modifications by Dan Rubenstein (see Modifications.txt for
559 * Modifications (C) 1998 Dan Rubenstein (drubenst@cs.umass.edu)
561 * Redistribution and use in source and binary forms, with or without
562 * modification, are permitted provided that the following conditions
565 * 1. Redistributions of source code must retain the above copyright
566 * notice, this list of conditions and the following disclaimer.
567 * 2. Redistributions in binary form must reproduce the above
568 * copyright notice, this list of conditions and the following
569 * disclaimer in the documentation and/or other materials
570 * provided with the distribution.
572 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
573 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
574 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
575 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
576 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
577 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
578 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
579 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
580 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
581 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
582 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY