zfec/fec.c

   1 /**
   2  * zfec -- fast forward error correction library with Python interface
   3  */
   4
   5 #include "fec.h"
   6
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <assert.h>
  11
  12 /*
  13  * Primitive polynomials - see Lin & Costello, Appendix A,
  14  * and  Lee & Messerschmitt, p. 453.
  15  */
  16 static const char*const Pp="101110001";
  17
  18
  19 /*
  20  * To speed up computations, we have tables for logarithm, exponent and
  21  * inverse of a number.  We use a table for multiplication as well (it takes
  22  * 64K, no big deal even on a PDA, especially because it can be
  23  * pre-initialized an put into a ROM!), otherwhise we use a table of
  24  * logarithms. In any case the macro gf_mul(x,y) takes care of
  25  * multiplications.
  26  */
  27
  28 static gf gf_exp[510];  /* index->poly form conversion table    */
  29 static int gf_log[256]; /* Poly->index form conversion table    */
  30 static gf inverse[256]; /* inverse of field elem.               */
  31                                 /* inv[\alpha**i]=\alpha**(GF_SIZE-i-1) */
  32
  33 /*
  34  * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
  35  * without a slow divide.
  36  */
  37 static gf
  38 modnn(int x) {
  39     while (x >= 255) {
  40         x -= 255;
  41         x = (x >> 8) + (x & 255);
  42     }
  43     return x;
  44 }
  45
  46 #define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
  47
  48 /*
  49  * gf_mul(x,y) multiplies two numbers.  It is much faster to use a
  50  * multiplication table.
  51  *
  52  * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
  53  * many numbers by the same constant. In this case the first call sets the
  54  * constant, and others perform the multiplications.  A value related to the
  55  * multiplication is held in a local variable declared with USE_GF_MULC . See
  56  * usage in _addmul1().
  57  */
  58 static gf gf_mul_table[256][256];
  59
  60 #define gf_mul(x,y) gf_mul_table[x][y]
  61
  62 #define USE_GF_MULC register gf * __gf_mulc_
  63
  64 #define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
  65 #define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
  66
  67 /*
  68  * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
  69  * Lookup tables:
  70  *     index->polynomial form           gf_exp[] contains j= \alpha^i;
  71  *     polynomial form -> index form    gf_log[ j = \alpha^i ] = i
  72  * \alpha=x is the primitive element of GF(2^m)
  73  *
  74  * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
  75  * multiplication of two numbers can be resolved without calling modnn
  76  */
  77 static void
  78 _init_mul_table(void) {
  79   int i, j;
  80   for (i = 0; i < 256; i++)
  81       for (j = 0; j < 256; j++)
  82           gf_mul_table[i][j] = gf_exp[modnn (gf_log[i] + gf_log[j])];
  83
  84   for (j = 0; j < 256; j++)
  85       gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
  86 }
  87
  88 #define NEW_GF_MATRIX(rows, cols) \
  89     (gf*)malloc(rows * cols)
  90
  91 /*
  92  * initialize the data structures used for computations in GF.
  93  */
  94 static void
  95 generate_gf (void) {
  96     int i;
  97     gf mask;
  98
  99     mask = 1;                     /* x ** 0 = 1 */
 100     gf_exp[8] = 0;          /* will be updated at the end of the 1st loop */
 101     /*
 102      * first, generate the (polynomial representation of) powers of \alpha,
 103      * which are stored in gf_exp[i] = \alpha ** i .
 104      * At the same time build gf_log[gf_exp[i]] = i .
 105      * The first 8 powers are simply bits shifted to the left.
 106      */
 107     for (i = 0; i < 8; i++, mask <<= 1) {
 108         gf_exp[i] = mask;
 109         gf_log[gf_exp[i]] = i;
 110         /*
 111          * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
 112          * gf_exp[8] = \alpha ** 8
 113          */
 114         if (Pp[i] == '1')
 115             gf_exp[8] ^= mask;
 116     }
 117     /*
 118      * now gf_exp[8] = \alpha ** 8 is complete, so can also
 119      * compute its inverse.
 120      */
 121     gf_log[gf_exp[8]] = 8;
 122     /*
 123      * Poly-repr of \alpha ** (i+1) is given by poly-repr of
 124      * \alpha ** i shifted left one-bit and accounting for any
 125      * \alpha ** 8 term that may occur when poly-repr of
 126      * \alpha ** i is shifted.
 127      */
 128     mask = 1 << 7;
 129     for (i = 9; i < 255; i++) {
 130         if (gf_exp[i - 1] >= mask)
 131             gf_exp[i] = gf_exp[8] ^ ((gf_exp[i - 1] ^ mask) << 1);
 132         else
 133             gf_exp[i] = gf_exp[i - 1] << 1;
 134         gf_log[gf_exp[i]] = i;
 135     }
 136     /*
 137      * log(0) is not defined, so use a special value
 138      */
 139     gf_log[0] = 255;
 140     /* set the extended gf_exp values for fast multiply */
 141     for (i = 0; i < 255; i++)
 142         gf_exp[i + 255] = gf_exp[i];
 143
 144     /*
 145      * again special cases. 0 has no inverse. This used to
 146      * be initialized to 255, but it should make no difference
 147      * since noone is supposed to read from here.
 148      */
 149     inverse[0] = 0;
 150     inverse[1] = 1;
 151     for (i = 2; i <= 255; i++)
 152         inverse[i] = gf_exp[255 - gf_log[i]];
 153 }
 154
 155 /*
 156  * Various linear algebra operations that i use often.
 157  */
 158
 159 /*
 160  * addmul() computes dst[] = dst[] + c * src[]
 161  * This is used often, so better optimize it! Currently the loop is
 162  * unrolled 16 times, a good value for 486 and pentium-class machines.
 163  * The case c=0 is also optimized, whereas c=1 is not. These
 164  * calls are unfrequent in my typical apps so I did not bother.
 165  */
 166 #define addmul(dst, src, c, sz)                 \
 167     if (c != 0) _addmul1(dst, src, c, sz)
 168
 169 #define UNROLL 16               /* 1, 4, 8, 16 */
 170 static void
 171 _addmul1(register gf*restrict dst, const register gf*restrict src, gf c, size_t sz) {
 172     USE_GF_MULC;
 173     const gf* lim = &dst[sz - UNROLL + 1];
 174
 175     GF_MULC0 (c);
 176
 177 #if (UNROLL > 1)                /* unrolling by 8/16 is quite effective on the pentium */
 178     for (; dst < lim; dst += UNROLL, src += UNROLL) {
 179         GF_ADDMULC (dst[0], src[0]);
 180         GF_ADDMULC (dst[1], src[1]);
 181         GF_ADDMULC (dst[2], src[2]);
 182         GF_ADDMULC (dst[3], src[3]);
 183 #if (UNROLL > 4)
 184         GF_ADDMULC (dst[4], src[4]);
 185         GF_ADDMULC (dst[5], src[5]);
 186         GF_ADDMULC (dst[6], src[6]);
 187         GF_ADDMULC (dst[7], src[7]);
 188 #endif
 189 #if (UNROLL > 8)
 190         GF_ADDMULC (dst[8], src[8]);
 191         GF_ADDMULC (dst[9], src[9]);
 192         GF_ADDMULC (dst[10], src[10]);
 193         GF_ADDMULC (dst[11], src[11]);
 194         GF_ADDMULC (dst[12], src[12]);
 195         GF_ADDMULC (dst[13], src[13]);
 196         GF_ADDMULC (dst[14], src[14]);
 197         GF_ADDMULC (dst[15], src[15]);
 198 #endif
 199     }
 200 #endif
 201     lim += UNROLL - 1;
 202     for (; dst < lim; dst++, src++)       /* final components */
 203         GF_ADDMULC (*dst, *src);
 204 }
 205
 206 /*
 207  * computes C = AB where A is n*k, B is k*m, C is n*m
 208  */
 209 static void
 210 _matmul(gf * a, gf * b, gf * c, unsigned n, unsigned k, unsigned m) {
 211     unsigned row, col, i;
 212
 213     for (row = 0; row < n; row++) {
 214         for (col = 0; col < m; col++) {
 215             gf *pa = &a[row * k];
 216             gf *pb = &b[col];
 217             gf acc = 0;
 218             for (i = 0; i < k; i++, pa++, pb += m)
 219                 acc ^= gf_mul (*pa, *pb);
 220             c[row * m + col] = acc;
 221         }
 222     }
 223 }
 224
 225 /*
 226  * _invert_mat() takes a matrix and produces its inverse
 227  * k is the size of the matrix.
 228  * (Gauss-Jordan, adapted from Numerical Recipes in C)
 229  * Return non-zero if singular.
 230  */
 231 static void
 232 _invert_mat(gf* src, size_t k) {
 233     gf c;
 234     size_t irow = 0;
 235     size_t icol = 0;
 236
 237     unsigned* indxc = (unsigned*) malloc (k * sizeof(unsigned));
 238     unsigned* indxr = (unsigned*) malloc (k * sizeof(unsigned));
 239     unsigned* ipiv = (unsigned*) malloc (k * sizeof(unsigned));
 240     gf *id_row = NEW_GF_MATRIX (1, k);
 241
 242     memset (id_row, '\0', k * sizeof (gf));
 243     /*
 244      * ipiv marks elements already used as pivots.
 245      */
 246     for (size_t i = 0; i < k; i++)
 247         ipiv[i] = 0;
 248
 249     for (size_t col = 0; col < k; col++) {
 250         gf *pivot_row;
 251         /*
 252          * Zeroing column 'col', look for a non-zero element.
 253          * First try on the diagonal, if it fails, look elsewhere.
 254          */
 255         if (ipiv[col] != 1 && src[col * k + col] != 0) {
 256             irow = col;
 257             icol = col;
 258             goto found_piv;
 259         }
 260         for (size_t row = 0; row < k; row++) {
 261             if (ipiv[row] != 1) {
 262                 for (size_t ix = 0; ix < k; ix++) {
 263                     if (ipiv[ix] == 0) {
 264                         if (src[row * k + ix] != 0) {
 265                             irow = row;
 266                             icol = ix;
 267                             goto found_piv;
 268                         }
 269                     } else
 270                         assert (ipiv[ix] <= 1);
 271                 }
 272             }
 273         }
 274       found_piv:
 275         ++(ipiv[icol]);
 276         /*
 277          * swap rows irow and icol, so afterwards the diagonal
 278          * element will be correct. Rarely done, not worth
 279          * optimizing.
 280          */
 281         if (irow != icol)
 282             for (size_t ix = 0; ix < k; ix++)
 283                 SWAP (src[irow * k + ix], src[icol * k + ix], gf);
 284         indxr[col] = irow;
 285         indxc[col] = icol;
 286         pivot_row = &src[icol * k];
 287         c = pivot_row[icol];
 288         assert (c != 0);
 289         if (c != 1) {                       /* otherwhise this is a NOP */
 290             /*
 291              * this is done often , but optimizing is not so
 292              * fruitful, at least in the obvious ways (unrolling)
 293              */
 294             c = inverse[c];
 295             pivot_row[icol] = 1;
 296             for (size_t ix = 0; ix < k; ix++)
 297                 pivot_row[ix] = gf_mul (c, pivot_row[ix]);
 298         }
 299         /*
 300          * from all rows, remove multiples of the selected row
 301          * to zero the relevant entry (in fact, the entry is not zero
 302          * because we know it must be zero).
 303          * (Here, if we know that the pivot_row is the identity,
 304          * we can optimize the addmul).
 305          */
 306         id_row[icol] = 1;
 307         if (memcmp (pivot_row, id_row, k * sizeof (gf)) != 0) {
 308             gf *p = src;
 309             for (size_t ix = 0; ix < k; ix++, p += k) {
 310                 if (ix != icol) {
 311                     c = p[icol];
 312                     p[icol] = 0;
 313                     addmul (p, pivot_row, c, k);
 314                 }
 315             }
 316         }
 317         id_row[icol] = 0;
 318     }                           /* done all columns */
 319     for (size_t col = k; col > 0; col--)
 320         if (indxr[col-1] != indxc[col-1])
 321             for (size_t row = 0; row < k; row++)
 322                 SWAP (src[row * k + indxr[col-1]], src[row * k + indxc[col-1]], gf);
 323 }
 324
 325 /*
 326  * fast code for inverting a vandermonde matrix.
 327  *
 328  * NOTE: It assumes that the matrix is not singular and _IS_ a vandermonde
 329  * matrix. Only uses the second column of the matrix, containing the p_i's.
 330  *
 331  * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
 332  * revised for my purposes.
 333  * p = coefficients of the matrix (p_i)
 334  * q = values of the polynomial (known)
 335  */
 336 void
 337 _invert_vdm (gf* src, unsigned k) {
 338     unsigned i, j, row, col;
 339     gf *b, *c, *p;
 340     gf t, xx;
 341
 342     if (k == 1)                   /* degenerate case, matrix must be p^0 = 1 */
 343         return;
 344     /*
 345      * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
 346      * b holds the coefficient for the matrix inversion
 347      */
 348     c = NEW_GF_MATRIX (1, k);
 349     b = NEW_GF_MATRIX (1, k);
 350
 351     p = NEW_GF_MATRIX (1, k);
 352
 353     for (j = 1, i = 0; i < k; i++, j += k) {
 354         c[i] = 0;
 355         p[i] = src[j];            /* p[i] */
 356     }
 357     /*
 358      * construct coeffs. recursively. We know c[k] = 1 (implicit)
 359      * and start P_0 = x - p_0, then at each stage multiply by
 360      * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
 361      * After k steps we are done.
 362      */
 363     c[k - 1] = p[0];              /* really -p(0), but x = -x in GF(2^m) */
 364     for (i = 1; i < k; i++) {
 365         gf p_i = p[i];            /* see above comment */
 366         for (j = k - 1 - (i - 1); j < k - 1; j++)
 367             c[j] ^= gf_mul (p_i, c[j + 1]);
 368         c[k - 1] ^= p_i;
 369     }
 370
 371     for (row = 0; row < k; row++) {
 372         /*
 373          * synthetic division etc.
 374          */
 375         xx = p[row];
 376         t = 1;
 377         b[k - 1] = 1;             /* this is in fact c[k] */
 378         for (i = k - 1; i > 0; i--) {
 379             b[i-1] = c[i] ^ gf_mul (xx, b[i]);
 380             t = gf_mul (xx, t) ^ b[i-1];
 381         }
 382         for (col = 0; col < k; col++)
 383             src[col * k + row] = gf_mul (inverse[t], b[col]);
 384     }
 385     free (c);
 386     free (b);
 387     free (p);
 388     return;
 389 }
 390
 391 static int fec_initialized = 0;
 392 static void
 393 init_fec (void) {
 394     generate_gf();
 395     _init_mul_table();
 396     fec_initialized = 1;
 397 }
 398
 399 /*
 400  * This section contains the proper FEC encoding/decoding routines.
 401  * The encoding matrix is computed starting with a Vandermonde matrix,
 402  * and then transforming it into a systematic matrix.
 403  */
 404
 405 #define FEC_MAGIC       0xFECC0DEC
 406
 407 void
 408 fec_free (fec_t *p) {
 409     assert (p != NULL && p->magic == (((FEC_MAGIC ^ p->k) ^ p->n) ^ (unsigned long) (p->enc_matrix)));
 410     free (p->enc_matrix);
 411     free (p);
 412 }
 413
 414 fec_t *
 415 fec_new(unsigned short k, unsigned short n) {
 416     unsigned row, col;
 417     gf *p, *tmp_m;
 418
 419     fec_t *retval;
 420
 421     if (fec_initialized == 0)
 422         init_fec ();
 423
 424     retval = (fec_t *) malloc (sizeof (fec_t));
 425     retval->k = k;
 426     retval->n = n;
 427     retval->enc_matrix = NEW_GF_MATRIX (n, k);
 428     retval->magic = ((FEC_MAGIC ^ k) ^ n) ^ (unsigned long) (retval->enc_matrix);
 429     tmp_m = NEW_GF_MATRIX (n, k);
 430     /*
 431      * fill the matrix with powers of field elements, starting from 0.
 432      * The first row is special, cannot be computed with exp. table.
 433      */
 434     tmp_m[0] = 1;
 435     for (col = 1; col < k; col++)
 436         tmp_m[col] = 0;
 437     for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k)
 438         for (col = 0; col < k; col++)
 439             p[col] = gf_exp[modnn (row * col)];
 440
 441     /*
 442      * quick code to build systematic matrix: invert the top
 443      * k*k vandermonde matrix, multiply right the bottom n-k rows
 444      * by the inverse, and construct the identity matrix at the top.
 445      */
 446     _invert_vdm (tmp_m, k);        /* much faster than _invert_mat */
 447     _matmul(tmp_m + k * k, tmp_m, retval->enc_matrix + k * k, n - k, k, k);
 448     /*
 449      * the upper matrix is I so do not bother with a slow multiply
 450      */
 451     memset (retval->enc_matrix, '\0', k * k * sizeof (gf));
 452     for (p = retval->enc_matrix, col = 0; col < k; col++, p += k + 1)
 453         *p = 1;
 454     free (tmp_m);
 455
 456     return retval;
 457 }
 458
 459 /* To make sure that we stay within cache in the inner loops of fec_encode().  (It would
 460    probably help to also do this for fec_decode(). */
 461 #ifndef STRIDE
 462 #define STRIDE 8192
 463 #endif
 464
 465 void
 466 fec_encode(const fec_t* code, const gf*restrict const*restrict const src, gf*restrict const*restrict const fecs, const unsigned*restrict const block_nums, size_t num_block_nums, size_t sz) {
 467     unsigned char i, j;
 468     size_t k;
 469     unsigned fecnum;
 470     const gf* p;
 471
 472     for (k = 0; k < sz; k += STRIDE) {
 473         size_t stride = ((sz-k) < STRIDE)?(sz-k):STRIDE;
 474         for (i=0; i<num_block_nums; i++) {
 475             fecnum=block_nums[i];
 476             assert (fecnum >= code->k);
 477             memset(fecs[i]+k, 0, stride);
 478             p = &(code->enc_matrix[fecnum * code->k]);
 479             for (j = 0; j < code->k; j++)
 480                 addmul(fecs[i]+k, src[j]+k, p[j], stride);
 481         }
 482     }
 483 }
 484
 485 /**
 486  * Build decode matrix into some memory space.
 487  *
 488  * @param matrix a space allocated for a k by k matrix
 489  */
 490 void
 491 build_decode_matrix_into_space(const fec_t*restrict const code, const unsigned*const restrict index, const unsigned k, gf*restrict const matrix) {
 492     unsigned char i;
 493     gf* p;
 494     for (i=0, p=matrix; i < k; i++, p += k) {
 495         if (index[i] < k) {
 496             memset(p, 0, k);
 497             p[i] = 1;
 498         } else {
 499             memcpy(p, &(code->enc_matrix[index[i] * code->k]), k);
 500         }
 501     }
 502     _invert_mat (matrix, k);
 503 }
 504
 505 void
 506 fec_decode(const fec_t* code, const gf*restrict const*restrict const inpkts, gf*restrict const*restrict const outpkts, const unsigned*restrict const index, size_t sz) {
 507     gf* m_dec = (gf*)alloca(code->k * code->k);
 508     unsigned char outix=0;
 509     unsigned char row=0;
 510     unsigned char col=0;
 511     build_decode_matrix_into_space(code, index, code->k, m_dec);
 512
 513     for (row=0; row<code->k; row++) {
 514         assert ((index[row] >= code->k) || (index[row] == row)); /* If the block whose number is i is present, then it is required to be in the i'th element. */
 515         if (index[row] >= code->k) {
 516             memset(outpkts[outix], 0, sz);
 517             for (col=0; col < code->k; col++)
 518                 addmul(outpkts[outix], inpkts[col], m_dec[row * code->k + col], sz);
 519             outix++;
 520         }
 521     }
 522 }
 523
 524 /**
 525  * zfec -- fast forward error correction library with Python interface
 526  *
 527  * Copyright (C) 2007-2010 Zooko Wilcox-O'Hearn
 528  * Author: Zooko Wilcox-O'Hearn
 529  *
 530  * This file is part of zfec.
 531  *
 532  * See README.rst for licensing information.
 533  */
 534
 535 /*
 536  * This work is derived from the "fec" software by Luigi Rizzo, et al., the
 537  * copyright notice and licence terms of which are included below for reference.
 538  * fec.c -- forward error correction based on Vandermonde matrices 980624 (C)
 539  * 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
 540  *
 541  * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
 542  * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
 543  * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
 544  *
 545  * Modifications by Dan Rubenstein (see Modifications.txt for
 546  * their description.
 547  * Modifications (C) 1998 Dan Rubenstein (drubenst@cs.umass.edu)
 548  *
 549  * Redistribution and use in source and binary forms, with or without
 550  * modification, are permitted provided that the following conditions
 551  * are met:
 552  *
 553  * 1. Redistributions of source code must retain the above copyright
 554  *    notice, this list of conditions and the following disclaimer.
 555  * 2. Redistributions in binary form must reproduce the above
 556  *    copyright notice, this list of conditions and the following
 557  *    disclaimer in the documentation and/or other materials
 558  *    provided with the distribution.
 559  *
 560  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
 561  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 562  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 563  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
 564  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 565  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 566  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 567  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 568  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 569  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 570  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 571  * OF SUCH DAMAGE.
 572  */