zfec/zfec/fec.c

   1 /**
   2  * zfec -- fast forward error correction library with Python interface
   3  */
   4
   5 #include "fec.h"
   6
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <assert.h>
  11
  12 #if defined(_MSC_VER)
  13 // actually, some of the flavors (i.e. Enterprise) do support restrict
  14 //#define restrict __restrict
  15 #define restrict
  16 #define inline __inline
  17 #define alloca _alloca
  18 #else
  19 #ifdef __GNUC__
  20 #define alloca(x) __builtin_alloca(x)
  21 #else
  22 #include <alloca.h>
  23 #endif
  24 #endif
  25
  26
  27 /*
  28  * Primitive polynomials - see Lin & Costello, Appendix A,
  29  * and  Lee & Messerschmitt, p. 453.
  30  */
  31 static const char*const Pp="101110001";
  32
  33
  34 /*
  35  * To speed up computations, we have tables for logarithm, exponent and
  36  * inverse of a number.  We use a table for multiplication as well (it takes
  37  * 64K, no big deal even on a PDA, especially because it can be
  38  * pre-initialized an put into a ROM!), otherwhise we use a table of
  39  * logarithms. In any case the macro gf_mul(x,y) takes care of
  40  * multiplications.
  41  */
  42
  43 static gf gf_exp[510];  /* index->poly form conversion table    */
  44 static int gf_log[256]; /* Poly->index form conversion table    */
  45 static gf inverse[256]; /* inverse of field elem.               */
  46                                 /* inv[\alpha**i]=\alpha**(GF_SIZE-i-1) */
  47
  48 /*
  49  * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
  50  * without a slow divide.
  51  */
  52 static inline gf
  53 modnn(int x) {
  54     while (x >= 255) {
  55         x -= 255;
  56         x = (x >> 8) + (x & 255);
  57     }
  58     return x;
  59 }
  60
  61 #define SWAP(a,b,t) {t tmp; tmp=a; a=b; b=tmp;}
  62
  63 /*
  64  * gf_mul(x,y) multiplies two numbers.  It is much faster to use a
  65  * multiplication table.
  66  *
  67  * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
  68  * many numbers by the same constant. In this case the first call sets the
  69  * constant, and others perform the multiplications.  A value related to the
  70  * multiplication is held in a local variable declared with USE_GF_MULC . See
  71  * usage in _addmul1().
  72  */
  73 static gf gf_mul_table[256][256];
  74
  75 #define gf_mul(x,y) gf_mul_table[x][y]
  76
  77 #define USE_GF_MULC register gf * __gf_mulc_
  78
  79 #define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
  80 #define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
  81
  82 /*
  83  * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
  84  * Lookup tables:
  85  *     index->polynomial form           gf_exp[] contains j= \alpha^i;
  86  *     polynomial form -> index form    gf_log[ j = \alpha^i ] = i
  87  * \alpha=x is the primitive element of GF(2^m)
  88  *
  89  * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
  90  * multiplication of two numbers can be resolved without calling modnn
  91  */
  92 static void
  93 _init_mul_table(void) {
  94   int i, j;
  95   for (i = 0; i < 256; i++)
  96       for (j = 0; j < 256; j++)
  97           gf_mul_table[i][j] = gf_exp[modnn (gf_log[i] + gf_log[j])];
  98
  99   for (j = 0; j < 256; j++)
 100       gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
 101 }
 102
 103 #define NEW_GF_MATRIX(rows, cols) \
 104     (gf*)malloc(rows * cols)
 105
 106 /*
 107  * initialize the data structures used for computations in GF.
 108  */
 109 static void
 110 generate_gf (void) {
 111     int i;
 112     gf mask;
 113
 114     mask = 1;                     /* x ** 0 = 1 */
 115     gf_exp[8] = 0;          /* will be updated at the end of the 1st loop */
 116     /*
 117      * first, generate the (polynomial representation of) powers of \alpha,
 118      * which are stored in gf_exp[i] = \alpha ** i .
 119      * At the same time build gf_log[gf_exp[i]] = i .
 120      * The first 8 powers are simply bits shifted to the left.
 121      */
 122     for (i = 0; i < 8; i++, mask <<= 1) {
 123         gf_exp[i] = mask;
 124         gf_log[gf_exp[i]] = i;
 125         /*
 126          * If Pp[i] == 1 then \alpha ** i occurs in poly-repr
 127          * gf_exp[8] = \alpha ** 8
 128          */
 129         if (Pp[i] == '1')
 130             gf_exp[8] ^= mask;
 131     }
 132     /*
 133      * now gf_exp[8] = \alpha ** 8 is complete, so can also
 134      * compute its inverse.
 135      */
 136     gf_log[gf_exp[8]] = 8;
 137     /*
 138      * Poly-repr of \alpha ** (i+1) is given by poly-repr of
 139      * \alpha ** i shifted left one-bit and accounting for any
 140      * \alpha ** 8 term that may occur when poly-repr of
 141      * \alpha ** i is shifted.
 142      */
 143     mask = 1 << 7;
 144     for (i = 9; i < 255; i++) {
 145         if (gf_exp[i - 1] >= mask)
 146             gf_exp[i] = gf_exp[8] ^ ((gf_exp[i - 1] ^ mask) << 1);
 147         else
 148             gf_exp[i] = gf_exp[i - 1] << 1;
 149         gf_log[gf_exp[i]] = i;
 150     }
 151     /*
 152      * log(0) is not defined, so use a special value
 153      */
 154     gf_log[0] = 255;
 155     /* set the extended gf_exp values for fast multiply */
 156     for (i = 0; i < 255; i++)
 157         gf_exp[i + 255] = gf_exp[i];
 158
 159     /*
 160      * again special cases. 0 has no inverse. This used to
 161      * be initialized to 255, but it should make no difference
 162      * since noone is supposed to read from here.
 163      */
 164     inverse[0] = 0;
 165     inverse[1] = 1;
 166     for (i = 2; i <= 255; i++)
 167         inverse[i] = gf_exp[255 - gf_log[i]];
 168 }
 169
 170 /*
 171  * Various linear algebra operations that i use often.
 172  */
 173
 174 /*
 175  * addmul() computes dst[] = dst[] + c * src[]
 176  * This is used often, so better optimize it! Currently the loop is
 177  * unrolled 16 times, a good value for 486 and pentium-class machines.
 178  * The case c=0 is also optimized, whereas c=1 is not. These
 179  * calls are unfrequent in my typical apps so I did not bother.
 180  */
 181 #define addmul(dst, src, c, sz)                 \
 182     if (c != 0) _addmul1(dst, src, c, sz)
 183
 184 #define UNROLL 16               /* 1, 4, 8, 16 */
 185 static void
 186 _addmul1(register gf*restrict dst, const register gf*restrict src, gf c, size_t sz) {
 187     USE_GF_MULC;
 188     const gf* lim = &dst[sz - UNROLL + 1];
 189
 190     GF_MULC0 (c);
 191
 192 #if (UNROLL > 1)                /* unrolling by 8/16 is quite effective on the pentium */
 193     for (; dst < lim; dst += UNROLL, src += UNROLL) {
 194         GF_ADDMULC (dst[0], src[0]);
 195         GF_ADDMULC (dst[1], src[1]);
 196         GF_ADDMULC (dst[2], src[2]);
 197         GF_ADDMULC (dst[3], src[3]);
 198 #if (UNROLL > 4)
 199         GF_ADDMULC (dst[4], src[4]);
 200         GF_ADDMULC (dst[5], src[5]);
 201         GF_ADDMULC (dst[6], src[6]);
 202         GF_ADDMULC (dst[7], src[7]);
 203 #endif
 204 #if (UNROLL > 8)
 205         GF_ADDMULC (dst[8], src[8]);
 206         GF_ADDMULC (dst[9], src[9]);
 207         GF_ADDMULC (dst[10], src[10]);
 208         GF_ADDMULC (dst[11], src[11]);
 209         GF_ADDMULC (dst[12], src[12]);
 210         GF_ADDMULC (dst[13], src[13]);
 211         GF_ADDMULC (dst[14], src[14]);
 212         GF_ADDMULC (dst[15], src[15]);
 213 #endif
 214     }
 215 #endif
 216     lim += UNROLL - 1;
 217     for (; dst < lim; dst++, src++)       /* final components */
 218         GF_ADDMULC (*dst, *src);
 219 }
 220
 221 /*
 222  * computes C = AB where A is n*k, B is k*m, C is n*m
 223  */
 224 static void
 225 _matmul(gf * a, gf * b, gf * c, unsigned n, unsigned k, unsigned m) {
 226     unsigned row, col, i;
 227
 228     for (row = 0; row < n; row++) {
 229         for (col = 0; col < m; col++) {
 230             gf *pa = &a[row * k];
 231             gf *pb = &b[col];
 232             gf acc = 0;
 233             for (i = 0; i < k; i++, pa++, pb += m)
 234                 acc ^= gf_mul (*pa, *pb);
 235             c[row * m + col] = acc;
 236         }
 237     }
 238 }
 239
 240 /*
 241  * _invert_mat() takes a matrix and produces its inverse
 242  * k is the size of the matrix.
 243  * (Gauss-Jordan, adapted from Numerical Recipes in C)
 244  * Return non-zero if singular.
 245  */
 246 static void
 247 _invert_mat(gf* src, unsigned k) {
 248     gf c, *p;
 249     unsigned irow = 0;
 250     unsigned icol = 0;
 251     unsigned row, col, i, ix;
 252
 253     unsigned* indxc = (unsigned*) malloc (k * sizeof(unsigned));
 254     unsigned* indxr = (unsigned*) malloc (k * sizeof(unsigned));
 255     unsigned* ipiv = (unsigned*) malloc (k * sizeof(unsigned));
 256     gf *id_row = NEW_GF_MATRIX (1, k);
 257
 258     memset (id_row, '\0', k * sizeof (gf));
 259     /*
 260      * ipiv marks elements already used as pivots.
 261      */
 262     for (i = 0; i < k; i++)
 263         ipiv[i] = 0;
 264
 265     for (col = 0; col < k; col++) {
 266         gf *pivot_row;
 267         /*
 268          * Zeroing column 'col', look for a non-zero element.
 269          * First try on the diagonal, if it fails, look elsewhere.
 270          */
 271         if (ipiv[col] != 1 && src[col * k + col] != 0) {
 272             irow = col;
 273             icol = col;
 274             goto found_piv;
 275         }
 276         for (row = 0; row < k; row++) {
 277             if (ipiv[row] != 1) {
 278                 for (ix = 0; ix < k; ix++) {
 279                     if (ipiv[ix] == 0) {
 280                         if (src[row * k + ix] != 0) {
 281                             irow = row;
 282                             icol = ix;
 283                             goto found_piv;
 284                         }
 285                     } else
 286                         assert (ipiv[ix] <= 1);
 287                 }
 288             }
 289         }
 290       found_piv:
 291         ++(ipiv[icol]);
 292         /*
 293          * swap rows irow and icol, so afterwards the diagonal
 294          * element will be correct. Rarely done, not worth
 295          * optimizing.
 296          */
 297         if (irow != icol)
 298             for (ix = 0; ix < k; ix++)
 299                 SWAP (src[irow * k + ix], src[icol * k + ix], gf);
 300         indxr[col] = irow;
 301         indxc[col] = icol;
 302         pivot_row = &src[icol * k];
 303         c = pivot_row[icol];
 304         assert (c != 0);
 305         if (c != 1) {                       /* otherwhise this is a NOP */
 306             /*
 307              * this is done often , but optimizing is not so
 308              * fruitful, at least in the obvious ways (unrolling)
 309              */
 310             c = inverse[c];
 311             pivot_row[icol] = 1;
 312             for (ix = 0; ix < k; ix++)
 313                 pivot_row[ix] = gf_mul (c, pivot_row[ix]);
 314         }
 315         /*
 316          * from all rows, remove multiples of the selected row
 317          * to zero the relevant entry (in fact, the entry is not zero
 318          * because we know it must be zero).
 319          * (Here, if we know that the pivot_row is the identity,
 320          * we can optimize the addmul).
 321          */
 322         id_row[icol] = 1;
 323         if (memcmp (pivot_row, id_row, k * sizeof (gf)) != 0) {
 324             for (p = src, ix = 0; ix < k; ix++, p += k) {
 325                 if (ix != icol) {
 326                     c = p[icol];
 327                     p[icol] = 0;
 328                     addmul (p, pivot_row, c, k);
 329                 }
 330             }
 331         }
 332         id_row[icol] = 0;
 333     }                           /* done all columns */
 334     for (col = k; col > 0; col--)
 335         if (indxr[col-1] != indxc[col-1])
 336             for (row = 0; row < k; row++)
 337                 SWAP (src[row * k + indxr[col-1]], src[row * k + indxc[col-1]], gf);
 338 }
 339
 340 /*
 341  * fast code for inverting a vandermonde matrix.
 342  *
 343  * NOTE: It assumes that the matrix is not singular and _IS_ a vandermonde
 344  * matrix. Only uses the second column of the matrix, containing the p_i's.
 345  *
 346  * Algorithm borrowed from "Numerical recipes in C" -- sec.2.8, but largely
 347  * revised for my purposes.
 348  * p = coefficients of the matrix (p_i)
 349  * q = values of the polynomial (known)
 350  */
 351 void
 352 _invert_vdm (gf* src, unsigned k) {
 353     unsigned i, j, row, col;
 354     gf *b, *c, *p;
 355     gf t, xx;
 356
 357     if (k == 1)                   /* degenerate case, matrix must be p^0 = 1 */
 358         return;
 359     /*
 360      * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
 361      * b holds the coefficient for the matrix inversion
 362      */
 363     c = NEW_GF_MATRIX (1, k);
 364     b = NEW_GF_MATRIX (1, k);
 365
 366     p = NEW_GF_MATRIX (1, k);
 367
 368     for (j = 1, i = 0; i < k; i++, j += k) {
 369         c[i] = 0;
 370         p[i] = src[j];            /* p[i] */
 371     }
 372     /*
 373      * construct coeffs. recursively. We know c[k] = 1 (implicit)
 374      * and start P_0 = x - p_0, then at each stage multiply by
 375      * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
 376      * After k steps we are done.
 377      */
 378     c[k - 1] = p[0];              /* really -p(0), but x = -x in GF(2^m) */
 379     for (i = 1; i < k; i++) {
 380         gf p_i = p[i];            /* see above comment */
 381         for (j = k - 1 - (i - 1); j < k - 1; j++)
 382             c[j] ^= gf_mul (p_i, c[j + 1]);
 383         c[k - 1] ^= p_i;
 384     }
 385
 386     for (row = 0; row < k; row++) {
 387         /*
 388          * synthetic division etc.
 389          */
 390         xx = p[row];
 391         t = 1;
 392         b[k - 1] = 1;             /* this is in fact c[k] */
 393         for (i = k - 1; i > 0; i--) {
 394             b[i-1] = c[i] ^ gf_mul (xx, b[i]);
 395             t = gf_mul (xx, t) ^ b[i-1];
 396         }
 397         for (col = 0; col < k; col++)
 398             src[col * k + row] = gf_mul (inverse[t], b[col]);
 399     }
 400     free (c);
 401     free (b);
 402     free (p);
 403     return;
 404 }
 405
 406 static int fec_initialized = 0;
 407 static void
 408 init_fec (void) {
 409     generate_gf();
 410     _init_mul_table();
 411     fec_initialized = 1;
 412 }
 413
 414 /*
 415  * This section contains the proper FEC encoding/decoding routines.
 416  * The encoding matrix is computed starting with a Vandermonde matrix,
 417  * and then transforming it into a systematic matrix.
 418  */
 419
 420 #define FEC_MAGIC       0xFECC0DEC
 421
 422 void
 423 fec_free (fec_t *p) {
 424     assert (p != NULL && p->magic == (((FEC_MAGIC ^ p->k) ^ p->n) ^ (unsigned long) (p->enc_matrix)));
 425     free (p->enc_matrix);
 426     free (p);
 427 }
 428
 429 fec_t *
 430 fec_new(unsigned k, unsigned n) {
 431     unsigned row, col;
 432     gf *p, *tmp_m;
 433
 434     fec_t *retval;
 435
 436     if (fec_initialized == 0)
 437         init_fec ();
 438
 439     retval = (fec_t *) malloc (sizeof (fec_t));
 440     retval->k = k;
 441     retval->n = n;
 442     retval->enc_matrix = NEW_GF_MATRIX (n, k);
 443     retval->magic = ((FEC_MAGIC ^ k) ^ n) ^ (unsigned long) (retval->enc_matrix);
 444     tmp_m = NEW_GF_MATRIX (n, k);
 445     /*
 446      * fill the matrix with powers of field elements, starting from 0.
 447      * The first row is special, cannot be computed with exp. table.
 448      */
 449     tmp_m[0] = 1;
 450     for (col = 1; col < k; col++)
 451         tmp_m[col] = 0;
 452     for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k)
 453         for (col = 0; col < k; col++)
 454             p[col] = gf_exp[modnn (row * col)];
 455
 456     /*
 457      * quick code to build systematic matrix: invert the top
 458      * k*k vandermonde matrix, multiply right the bottom n-k rows
 459      * by the inverse, and construct the identity matrix at the top.
 460      */
 461     _invert_vdm (tmp_m, k);        /* much faster than _invert_mat */
 462     _matmul(tmp_m + k * k, tmp_m, retval->enc_matrix + k * k, n - k, k, k);
 463     /*
 464      * the upper matrix is I so do not bother with a slow multiply
 465      */
 466     memset (retval->enc_matrix, '\0', k * k * sizeof (gf));
 467     for (p = retval->enc_matrix, col = 0; col < k; col++, p += k + 1)
 468         *p = 1;
 469     free (tmp_m);
 470
 471     return retval;
 472 }
 473
 474 /* To make sure that we stay within cache in the inner loops of fec_encode()
 475    and fec_decode(). */
 476 #define STRIDE 8192
 477
 478 void
 479 fec_encode(const fec_t* code, const gf*restrict const*restrict const src, gf*restrict const*restrict const fecs, const unsigned*restrict const block_nums, size_t num_block_nums, size_t sz) {
 480     unsigned char i, j;
 481     size_t k;
 482     unsigned fecnum;
 483     const gf* p;
 484
 485     for (k = 0; k < sz; k += STRIDE) {
 486         size_t stride = ((sz-k) < STRIDE)?(sz-k):STRIDE;
 487         for (i=0; i<num_block_nums; i++) {
 488             fecnum=block_nums[i];
 489             assert (fecnum >= code->k);
 490             memset(fecs[i]+k, 0, stride);
 491             p = &(code->enc_matrix[fecnum * code->k]);
 492             for (j = 0; j < code->k; j++)
 493                 addmul(fecs[i]+k, src[j]+k, p[j], stride);
 494         }
 495     }
 496 }
 497
 498 /**
 499  * Build decode matrix into some memory space.
 500  *
 501  * @param matrix a space allocated for a k by k matrix
 502  */
 503 void
 504 build_decode_matrix_into_space(const fec_t*restrict const code, const unsigned*const restrict index, const unsigned k, gf*restrict const matrix) {
 505     unsigned char i;
 506     gf* p;
 507     for (i=0, p=matrix; i < k; i++, p += k) {
 508         if (index[i] < k) {
 509             memset(p, 0, k);
 510             p[i] = 1;
 511         } else {
 512             memcpy(p, &(code->enc_matrix[index[i] * code->k]), k);
 513         }
 514     }
 515     _invert_mat (matrix, k);
 516 }
 517
 518 void
 519 fec_decode(const fec_t* code, const gf*restrict const*restrict const inpkts, gf*restrict const*restrict const outpkts, const unsigned*restrict const index, size_t sz) {
 520     gf* m_dec = (gf*)alloca(code->k * code->k);
 521     unsigned char outix=0;
 522     unsigned char row=0;
 523     unsigned char col=0;
 524     build_decode_matrix_into_space(code, index, code->k, m_dec);
 525
 526     for (row=0; row<code->k; row++) {
 527         if (index[row] >= code->k) {
 528             memset(outpkts[outix], 0, sz);
 529             for (col=0; col < code->k; col++)
 530                 addmul(outpkts[outix], inpkts[col], m_dec[row * code->k + col], sz);
 531             outix++;
 532         }
 533     }
 534 }
 535
 536 /**
 537  * zfec -- fast forward error correction library with Python interface
 538  *
 539  * Copyright (C) 2007 Allmydata, Inc.
 540  * Author: Zooko Wilcox-O'Hearn
 541  *
 542  * This file is part of zfec.
 543  *
 544  * This program is free software; you can redistribute it and/or modify it
 545  * under the terms of the GNU General Public License as published by the Free
 546  * Software Foundation; either version 2 of the License, or (at your option)
 547  * any later version, with the added permission that, if you become obligated
 548  * to release a derived work under this licence (as per section 2.b), you may
 549  * delay the fulfillment of this obligation for up to 12 months.  See the file
 550  * COPYING for details.
 551  *
 552  * If you would like to inquire about a commercial relationship with Allmydata,
 553  * Inc., please contact partnerships@allmydata.com and visit
 554  * http://allmydata.com/.
 555  */
 556
 557 /*
 558  * Much of this work is derived from the "fec" software by Luigi Rizzo, et
 559  * al., the copyright notice and licence terms of which are included below
 560  * for reference.
 561  * fec.c -- forward error correction based on Vandermonde matrices
 562  * 980624
 563  * (C) 1997-98 Luigi Rizzo (luigi@iet.unipi.it)
 564  *
 565  * Portions derived from code by Phil Karn (karn@ka9q.ampr.org),
 566  * Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
 567  * Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
 568  *
 569  * Modifications by Dan Rubenstein (see Modifications.txt for
 570  * their description.
 571  * Modifications (C) 1998 Dan Rubenstein (drubenst@cs.umass.edu)
 572  *
 573  * Redistribution and use in source and binary forms, with or without
 574  * modification, are permitted provided that the following conditions
 575  * are met:
 576  *
 577  * 1. Redistributions of source code must retain the above copyright
 578  *    notice, this list of conditions and the following disclaimer.
 579  * 2. Redistributions in binary form must reproduce the above
 580  *    copyright notice, this list of conditions and the following
 581  *    disclaimer in the documentation and/or other materials
 582  *    provided with the distribution.
 583  *
 584  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
 585  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 586  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 587  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
 588  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 589  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 590  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 591  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 592  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 593  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 594  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 595  * OF SUCH DAMAGE.
 596  */