X-Git-Url: https://git.rkrishnan.org/?a=blobdiff_plain;f=src%2FLib.hs;fp=src%2FLib.hs;h=693cc19a26acd7b68a971297301059a578629a91;hb=0c12b50c31b904bb3ff6509940ef9f9aef4ea7c8;hp=8cfb325b04ab1ddfc20471571a97922ac22224d0;hpb=470fc5956c702816c23d6768d9952aee2423fd99;p=hs-rsync.git diff --git a/src/Lib.hs b/src/Lib.hs index 8cfb325..693cc19 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -5,15 +5,25 @@ module Lib , recreate ) where +import Control.Monad.State +import Data.Bits (shiftL, (.&.), (.|.)) import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy as BL -import Data.Word (Word8) -import qualified Crypto.Hash.SHA1 as SHA1 +import Data.Char (ord) +import Data.Digest.Adler32 (adler32) +import qualified Data.Map as M +import Data.Word (Word8, Word32) +import qualified Crypto.Hash.MD4 as MD4 -type Signature = (BS.ByteString, Int) +type Md4digest = BS.ByteString +type Adler32checksum = Word32 + +type Signature = (Md4digest, Adler32checksum, Int) fileSignatures :: BL.ByteString -> Integer -> [Signature] -fileSignatures bs blockSize = zip (map blockSig (splitBS bs blockSize)) [0..] +fileSignatures bs blockSize = zip3 strongsigs weaksigs [0..] + where strongsigs = map blockSig (splitBS bs blockSize) + weaksigs = map adler32 (splitBS bs blockSize) splitBS :: BL.ByteString -> Integer -> [BL.ByteString] splitBS bs blockSize | fromIntegral (BL.length bs) < blockSize = [bs] @@ -21,8 +31,12 @@ splitBS bs blockSize | otherwise = (BL.take (fromIntegral blockSize) bs) : splitBS (BL.drop (fromIntegral blockSize) bs) blockSize +-- compute md4 digest (128 bits) blockSig :: BL.ByteString -> BS.ByteString -blockSig = SHA1.hash . BL.toStrict +blockSig = MD4.hash . BL.toStrict + +weakSig :: BL.ByteString -> Adler32checksum +weakSig = adler32 data Instruction = RChar Word8 | RBlk Int @@ -30,16 +44,40 @@ data Instruction = RChar Word8 genInstructions :: [Signature] -> Integer -> BL.ByteString -> [Instruction] genInstructions f0sigs blockSize fnew = - if (fnew == BL.empty) - then [] - else - let (blk, blks) = BL.splitAt (fromIntegral blockSize) fnew - sig = blockSig blk - in - case (lookup sig f0sigs) of - Just (idx) -> RBlk (fromIntegral idx) : genInstructions f0sigs blockSize blks - Nothing -> RChar (BL.head blk) : - genInstructions f0sigs blockSize (BL.tail (blk `mappend` blks)) + -- create two hash tables one with adler32 as key and list of block numbers as values + -- another with md4sum as key and block numbers as values. + evalState (go fnew) 0 + where + go :: BL.ByteString -> State Word32 [Instruction] + go fnew | (fnew == BL.empty) = return [] + | otherwise = + let (blk, blks) = BL.splitAt (fromIntegral blockSize) fnew + adlerSum = weakSig blk + in + case M.lookup adlerSum f0AdlerTable of + Nothing -> do + put adlerSum + is <- go (BL.tail (blk `mappend` blks)) + return $ RChar (BL.head blk) : is + Just _ -> + let md4sum = blockSig blk + in + case M.lookup md4sum f0MD4Table of + Just i -> do + is <- go blks + return $ RBlk (head i) : is + Nothing -> + return [RChar (BL.head blk)] + f0AdlerTable = toAdlerMap f0sigs + f0MD4Table = toMD4Map f0sigs + +toAdlerMap :: [Signature] -> M.Map Adler32checksum [Int] +toAdlerMap = foldr f M.empty + where f sig m = let (_, aSig, idx) = sig in M.insertWith (++) aSig [idx] m + +toMD4Map :: [Signature] -> M.Map Md4digest [Int] +toMD4Map = foldr f M.empty + where f sig m = let (mSig, _, idx) = sig in M.insertWith (++) mSig [idx] m recreate :: BL.ByteString -> Integer -> [Instruction] -> BL.ByteString recreate f0 blockSize ins =