1 {-# LANGUAGE OverloadedStrings #-}
8 import Control.Monad.State
9 import Data.Bits (shiftL, (.&.), (.|.))
10 import qualified Data.ByteString.Char8 as BS
11 import qualified Data.ByteString.Lazy as BL
12 import Data.Char (ord)
13 import Data.Digest.Adler32 (adler32)
14 import qualified Data.Map as M
15 import Data.Word (Word8, Word32)
16 import qualified Crypto.Hash.MD4 as MD4
18 type Md4digest = BS.ByteString
19 type Adler32checksum = Word32
21 type Signature = (Md4digest, Adler32checksum, Int)
23 fileSignatures :: BL.ByteString -> Integer -> [Signature]
24 fileSignatures bs blockSize = zip3 strongsigs weaksigs [0..]
25 where strongsigs = map blockSig (splitBS bs blockSize)
26 weaksigs = map adler32 (splitBS bs blockSize)
28 splitBS :: BL.ByteString -> Integer -> [BL.ByteString]
29 splitBS bs blockSize | fromIntegral (BL.length bs) < blockSize = [bs]
30 splitBS bs blockSize | otherwise =
31 (BL.take (fromIntegral blockSize) bs) :
32 splitBS (BL.drop (fromIntegral blockSize) bs) blockSize
34 -- compute md4 digest (128 bits)
35 blockSig :: BL.ByteString -> BS.ByteString
36 blockSig = MD4.hash . BL.toStrict
38 weakSig :: BL.ByteString -> Adler32checksum
41 data Instruction = RChar Word8
45 genInstructions :: [Signature] -> Integer -> BL.ByteString -> [Instruction]
46 genInstructions f0sigs blockSize fnew =
47 -- create two hash tables one with adler32 as key and list of block numbers as values
48 -- another with md4sum as key and block numbers as values.
51 go :: BL.ByteString -> State Word32 [Instruction]
52 go fnew | (fnew == BL.empty) = return []
54 let (blk, blks) = BL.splitAt (fromIntegral blockSize) fnew
55 adlerSum = weakSig blk
57 case M.lookup adlerSum f0AdlerTable of
60 is <- go (BL.tail (blk `mappend` blks))
61 return $ RChar (BL.head blk) : is
63 let md4sum = blockSig blk
65 case M.lookup md4sum f0MD4Table of
68 return $ RBlk (head i) : is
70 return [RChar (BL.head blk)]
71 f0AdlerTable = toAdlerMap f0sigs
72 f0MD4Table = toMD4Map f0sigs
74 toAdlerMap :: [Signature] -> M.Map Adler32checksum [Int]
75 toAdlerMap = foldr f M.empty
76 where f sig m = let (_, aSig, idx) = sig in M.insertWith (++) aSig [idx] m
78 toMD4Map :: [Signature] -> M.Map Md4digest [Int]
79 toMD4Map = foldr f M.empty
80 where f sig m = let (mSig, _, idx) = sig in M.insertWith (++) mSig [idx] m
82 recreate :: BL.ByteString -> Integer -> [Instruction] -> BL.ByteString
83 recreate f0 blockSize ins =
84 let f0blocks = splitBS f0 blockSize
87 where go f0blocks [] = mempty
88 go f0blocks (inst:insts) =
90 RBlk i -> (f0blocks !! i) `mappend` go f0blocks insts
91 RChar w -> (BL.singleton w) `mappend` go f0blocks insts