1 {-# LANGUAGE OverloadedStrings #-}
8 import Control.Monad.State
9 import qualified Data.ByteString.Char8 as BS
10 import qualified Data.ByteString.Lazy as BL
11 import Data.Digest.Adler32 (adler32, adler32Update)
12 import qualified Data.Map as M
13 import Data.Word (Word8, Word32)
14 import qualified Crypto.Hash.MD4 as MD4
16 type Md4digest = BS.ByteString
17 type Adler32checksum = Word32
19 type Signature = (Md4digest, Adler32checksum, Int)
21 fileSignatures :: BL.ByteString -> Integer -> [Signature]
22 fileSignatures bs blockSize = zip3 strongsigs weaksigs [0..]
23 where strongsigs = map blockSig (splitBS bs blockSize)
24 weaksigs = map adler32 (splitBS bs blockSize)
26 splitBS :: BL.ByteString -> Integer -> [BL.ByteString]
27 splitBS bs blockSize | fromIntegral (BL.length bs) < blockSize = [bs]
28 splitBS bs blockSize = BL.take (fromIntegral blockSize) bs :
29 splitBS (BL.drop (fromIntegral blockSize) bs) blockSize
31 -- compute md4 digest (128 bits)
32 blockSig :: BL.ByteString -> BS.ByteString
33 blockSig = MD4.hash . BL.toStrict
35 weakSig :: BL.ByteString -> Adler32checksum
38 data Instruction = RChar Word8
42 genInstructions :: [Signature] -> Integer -> BL.ByteString -> [Instruction]
43 genInstructions f0sigs blockSize fnew =
44 evalState (go fnew) sig0
46 sig0 = weakSig $ BL.take (fromIntegral blockSize) fnew
47 go :: BL.ByteString -> State Adler32checksum [Instruction]
48 go fnew | fnew == BL.empty = return []
50 let (blk, blks) = BL.splitAt (fromIntegral blockSize) fnew
52 let matches = M.lookup adlerSum f0AdlerTable >>
53 M.lookup (blockSig blk) f0MD4Table
56 modify (`adler32Update` blk)
58 return $ RBlk (head idxs) : is
61 modify (`adler32Update` BL.singleton c)
62 is <- go (BL.tail (blk `mappend` blks))
64 f0AdlerTable = toAdlerMap f0sigs
65 f0MD4Table = toMD4Map f0sigs
67 toAdlerMap :: [Signature] -> M.Map Adler32checksum [Int]
68 toAdlerMap = foldr f M.empty
69 where f sig m = let (_, aSig, idx) = sig in M.insertWith (++) aSig [idx] m
71 toMD4Map :: [Signature] -> M.Map Md4digest [Int]
72 toMD4Map = foldr f M.empty
73 where f sig m = let (mSig, _, idx) = sig in M.insertWith (++) mSig [idx] m
75 recreate :: BL.ByteString -> Integer -> [Instruction] -> BL.ByteString
76 recreate f0 blockSize ins =
77 let f0blocks = splitBS f0 blockSize
80 where go f0blocks [] = mempty
81 go f0blocks (inst:insts) =
83 RBlk i -> (f0blocks !! i) `mappend` go f0blocks insts
84 RChar w -> BL.singleton w `mappend` go f0blocks insts
86 rollingChecksum :: Int -> Int -> BL.ByteString -> Word32
87 rollingChecksum strtIdx lenBS bs = a `mod` m + ((fromIntegral b) `mod` m) * m
88 where a = BL.foldl (\acc x -> acc + (fromIntegral x)) 0 bs'
89 b = BL.foldl (\acc x -> acc + x) 0 (BL.pack wbs')
90 bs' = BL.take (fromIntegral lenBS) $ BL.drop (fromIntegral strtIdx) bs
92 wbs' = BL.zipWith (*) (BL.pack (reverse (map fromIntegral [1..(lenBS - strtIdx + 1)]))) bs'