From: zooko Date: Mon, 12 Nov 2007 14:58:19 +0000 (+0530) Subject: zfec: reorder the inner loop to be more cache-friendly X-Git-Url: https://git.rkrishnan.org/vdrive/%5B/%5D%20/uri/flags/architecture.txt?a=commitdiff_plain;h=6c68dd897ab354bb141102dd15acc9fde21b3372;p=tahoe-lafs%2Fzfec.git zfec: reorder the inner loop to be more cache-friendly Loop over this stride of each input block before looping over all strides of this input block. In theory, this should allow the strides of the input blocks to remain in cache while we produce all of the output blocks. darcs-hash:8f0ac74d2150507519463d2d711607f467f18ea6 --- diff --git a/zfec/zfec/fec.c b/zfec/zfec/fec.c index 2054697..84f4645 100644 --- a/zfec/zfec/fec.c +++ b/zfec/zfec/fec.c @@ -482,15 +482,16 @@ fec_encode(const fec_t* code, const gf*restrict const*restrict const src, gf*res unsigned fecnum; const gf* p; - for (i=0; i= code->k); - memset(fecs[i], 0, sz); - p = &(code->enc_matrix[fecnum * code->k]); -// DUFF ME - for (k = 0; k < sz; k += STRIDE) + for (k = 0; k < sz; k += STRIDE) { + size_t stride = ((sz-k) < STRIDE)?(sz-k):STRIDE; + for (i=0; i= code->k); + memset(fecs[i]+k, 0, stride); + p = &(code->enc_matrix[fecnum * code->k]); for (j = 0; j < code->k; j++) - addmul(fecs[i]+k, src[j]+k, p[j], ((sz-k) < STRIDE)?(sz-k):STRIDE); + addmul(fecs[i]+k, src[j]+k, p[j], stride); + } } }