) where
import Control.Monad.State
-import Data.Bits (shiftL, (.&.), (.|.))
+import Data.Bits (shiftR, (.&.))
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as BL
-import Data.Char (ord)
-import Data.Digest.Adler32 (adler32)
+import Data.Digest.Adler32 (adler32, adler32Update)
import qualified Data.Map as M
import Data.Word (Word8, Word32)
import qualified Crypto.Hash.MD4 as MD4
genInstructions :: [Signature] -> Integer -> BL.ByteString -> [Instruction]
genInstructions f0sigs blockSize fnew =
- -- create two hash tables one with adler32 as key and list of block numbers as values
- -- another with md4sum as key and block numbers as values.
- evalState (go fnew) 0
+ evalState (go 0 fnew) sig0
where
- go :: BL.ByteString -> State Word32 [Instruction]
- go fnew | fnew == BL.empty = return []
- | otherwise =
- let (blk, blks) = BL.splitAt (fromIntegral blockSize) fnew
- adlerSum = weakSig blk
- in
- case M.lookup adlerSum f0AdlerTable of
- Nothing -> do
- put adlerSum
- is <- go (BL.tail (blk `mappend` blks))
- return $ RChar (BL.head blk) : is
- Just _ ->
- let md4sum = blockSig blk
- in
- case M.lookup md4sum f0MD4Table of
- Just i -> do
- is <- go blks
- return $ RBlk (head i) : is
- Nothing -> do
- put adlerSum
- is <- go (BL.tail (blk `mappend` blks))
- return $ RChar (BL.head blk) : is
+ sig0 = weakSig $ BL.take (fromIntegral blockSize) fnew
+ go :: Integer -> BL.ByteString -> State Adler32checksum [Instruction]
+ go startIdx fnew | fnew == BL.empty = return []
+ | otherwise = do
+ let (blk, blks) = BL.splitAt (fromIntegral blockSize) fnew
+ endIdx = startIdx + fromIntegral (BL.length blk) - 1
+ adlerSum <- get
+ let matches = M.lookup adlerSum f0AdlerTable >>
+ M.lookup (blockSig blk) f0MD4Table
+ case matches of
+ Just idxs -> do
+ modify (`adler32Update` blk)
+ is <- go (endIdx + 1) blks
+ return $ RBlk (head idxs) : is
+ Nothing -> do
+ let c = BL.head blk
+ modify (`adler32Update` BL.singleton c)
+ is <- go (startIdx + 1) (BL.tail (blk `mappend` blks))
+ return $ RChar c : is
f0AdlerTable = toAdlerMap f0sigs
f0MD4Table = toMD4Map f0sigs
RBlk i -> (f0blocks !! i) `mappend` go f0blocks insts
RChar w -> BL.singleton w `mappend` go f0blocks insts
+rollingChecksum :: Int -> Int -> BL.ByteString -> Word32
+rollingChecksum strtIdx lenBS bs = a `mod` m + ((fromIntegral b) `mod` m) * m
+ where a = BL.foldl (\acc x -> acc + (fromIntegral x)) 0 bs'
+ b = BL.foldl (\acc x -> acc + x) 0 (BL.pack wbs')
+ bs' = BL.take (fromIntegral lenBS) $ BL.drop (fromIntegral strtIdx) bs
+ m = 2^16
+ wbs' = BL.zipWith (*) (BL.pack (reverse (map fromIntegral [1..(lenBS - strtIdx + 1)]))) bs'
+
+-- given the checksum of bytes from index: startIdx to endIdx, find
+-- the checksum for the block from (startIdx + 1 .. endIdx + 1)
+rollingChecksumUpdate :: Word32 -> Word8 -> Word8 -> Integer -> Integer -> Word32
+rollingChecksumUpdate oldChecksum old new strtIdx endIdx =
+ let b_Old = (oldChecksum `shiftR` 16) .&. 0xff
+ a_Old = (oldChecksum .&. 0xff)
+ a_New = (a_Old - (fromIntegral old) + (fromIntegral new)) `mod` m
+ b_New = (b_Old - ((fromIntegral endIdx) - (fromIntegral strtIdx) + 1) * (fromIntegral old) + a_New) `mod` m
+ m = 2^16
+ in
+ a_New `mod` m + (b_New `mod` m) * m