From 6c68dd897ab354bb141102dd15acc9fde21b3372 Mon Sep 17 00:00:00 2001 From: zooko <zooko@zooko.com> Date: Mon, 12 Nov 2007 20:28:19 +0530 Subject: [PATCH] zfec: reorder the inner loop to be more cache-friendly Loop over this stride of each input block before looping over all strides of this input block. In theory, this should allow the strides of the input blocks to remain in cache while we produce all of the output blocks. darcs-hash:8f0ac74d2150507519463d2d711607f467f18ea6 --- zfec/zfec/fec.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/zfec/zfec/fec.c b/zfec/zfec/fec.c index 2054697..84f4645 100644 --- a/zfec/zfec/fec.c +++ b/zfec/zfec/fec.c @@ -482,15 +482,16 @@ fec_encode(const fec_t* code, const gf*restrict const*restrict const src, gf*res unsigned fecnum; const gf* p; - for (i=0; i<num_block_nums; i++) { - fecnum=block_nums[i]; - assert (fecnum >= code->k); - memset(fecs[i], 0, sz); - p = &(code->enc_matrix[fecnum * code->k]); -// DUFF ME - for (k = 0; k < sz; k += STRIDE) + for (k = 0; k < sz; k += STRIDE) { + size_t stride = ((sz-k) < STRIDE)?(sz-k):STRIDE; + for (i=0; i<num_block_nums; i++) { + fecnum=block_nums[i]; + assert (fecnum >= code->k); + memset(fecs[i]+k, 0, stride); + p = &(code->enc_matrix[fecnum * code->k]); for (j = 0; j < code->k; j++) - addmul(fecs[i]+k, src[j]+k, p[j], ((sz-k) < STRIDE)?(sz-k):STRIDE); + addmul(fecs[i]+k, src[j]+k, p[j], stride); + } } } -- 2.45.2