From: agl Date: Sun, 20 Jan 2008 02:19:14 +0000 (+0530) Subject: Add Haskell bindings to zfec X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/flags/simplejson/statistics?a=commitdiff_plain;h=544923c67f94beb1610527f3c4a764c043d847a2;p=tahoe-lafs%2Fzfec.git Add Haskell bindings to zfec darcs-hash:81f1aa5ca90679271b145c9d261e894bc8528e79 --- diff --git a/zfec/Setup.lhs b/zfec/Setup.lhs new file mode 100755 index 0000000..5bde0de --- /dev/null +++ b/zfec/Setup.lhs @@ -0,0 +1,3 @@ +#!/usr/bin/env runhaskell +> import Distribution.Simple +> main = defaultMain diff --git a/zfec/fec.cabal b/zfec/fec.cabal new file mode 100644 index 0000000..6144c7e --- /dev/null +++ b/zfec/fec.cabal @@ -0,0 +1,30 @@ +name: fec +version: 0.1 +license: GPL +license-file: README.txt +author: Adam Langley +maintainer: Adam Langley +description: This code, based on zfec by Zooko, based on code by Luigi + Rizzo implements an erasure code, or forward error + correction code. The most widely known example of an erasure + code is the RAID-5 algorithm which makes it so that in the + event of the loss of any one hard drive, the stored data can + be completely recovered. The algorithm in the zfec package + has a similar effect, but instead of recovering from the loss + of only a single element, it can be parameterized to choose in + advance the number of elements whose loss it can tolerate. +build-type: Simple +homepage: http://allmydata.org/source/zfec +synopsis: Forward error correction of ByteStrings +category: Codec +build-depends: base, bytestring>=0.9 +stability: provisional +tested-with: GHC == 6.8.2 +exposed-modules: Codec.FEC +extensions: ForeignFunctionInterface +hs-source-dirs: haskell +ghc-options: -Wall +c-sources: zfec/fec.c +cc-options: -std=c99 +include-dirs: zfec +extra-source-files: zfec/fec.h diff --git a/zfec/haskell/Codec/FEC.hs b/zfec/haskell/Codec/FEC.hs new file mode 100644 index 0000000..434c7d8 --- /dev/null +++ b/zfec/haskell/Codec/FEC.hs @@ -0,0 +1,246 @@ +{-# LANGUAGE ForeignFunctionInterface, EmptyDataDecls #-} +-- | +-- Module: Codec.FEC +-- Copyright: Adam Langley +-- License: BSD3 +-- +-- Stability: experimental +-- +-- The module provides k of n encoding - a way to generate (n - k) secondary +-- blocks of data from k primary blocks such that any k blocks (primary or +-- secondary) are sufficient to regenerate all blocks. +-- +-- All blocks must be the same length and you need to keep track of which +-- blocks you have in order to tell decode. By convention, the blocks are +-- numbered 0..(n - 1) and blocks numbered < k are the primary blocks. + +module Codec.FEC ( + FECParams + , fec + , encode + , decode + + -- * Utility functions + , secureDivide + , secureCombine + , enFEC + , deFEC + ) where + +import qualified Data.ByteString as B +import qualified Data.ByteString.Unsafe as BU +import qualified Data.ByteString.Internal as BI +import Data.Word (Word8) +import Data.Bits (xor) +import Data.List (sortBy, partition, (\\), nub) +import Foreign.Ptr +import Foreign.Storable (sizeOf, poke) +import Foreign.ForeignPtr +import Foreign.C.Types +import Foreign.Marshal.Alloc +import Foreign.Marshal.Array (withArray, advancePtr) +import System.IO (withFile, IOMode(..)) +import System.IO.Unsafe (unsafePerformIO) + +data CFEC +data FECParams = FECParams (ForeignPtr CFEC) Int Int + +instance Show FECParams where + show (FECParams _ k n) = "FEC (" ++ show k ++ ", " ++ show n ++ ")" + +foreign import ccall unsafe "fec_new" _new :: CUInt -- ^ k + -> CUInt -- ^ n + -> IO (Ptr CFEC) +foreign import ccall unsafe "&fec_free" _free :: FunPtr (Ptr CFEC -> IO ()) +foreign import ccall unsafe "fec_encode" _encode :: Ptr CFEC + -> Ptr (Ptr Word8) -- ^ primary blocks + -> Ptr (Ptr Word8) -- ^ (output) secondary blocks + -> Ptr CUInt -- ^ array of secondary block ids + -> CSize -- ^ length of previous + -> CSize -- ^ block length + -> IO () +foreign import ccall unsafe "fec_decode" _decode :: Ptr CFEC + -> Ptr (Ptr Word8) -- ^ input blocks + -> Ptr (Ptr Word8) -- ^ output blocks + -> Ptr CUInt -- ^ array of input indexes + -> CSize -- ^ block length + -> IO () + +-- | Return true if the given @k@ and @n@ values are valid +isValidConfig :: Int -> Int -> Bool +isValidConfig k n + | k >= n = False + | k < 1 = False + | n < 1 = False + | n > 255 = False + | otherwise = True + +-- | Return a FEC with the given parameters. +fec :: Int -- ^ the number of primary blocks + -> Int -- ^ the total number blocks, must be < 256 + -> FECParams +fec k n = + if not (isValidConfig k n) + then error $ "Invalid FEC parameters: " ++ show k ++ " " ++ show n + else unsafePerformIO (do + cfec <- _new (fromIntegral k) (fromIntegral n) + params <- newForeignPtr _free cfec + return $ FECParams params k n) + +-- | Create a C array of unsigned from an input array +uintCArray :: [Int] -> ((Ptr CUInt) -> IO a) -> IO a +uintCArray xs f = withArray (map fromIntegral xs) f + +-- | Convert a list of ByteStrings to an array of pointers to their data +byteStringsToArray :: [B.ByteString] -> ((Ptr (Ptr Word8)) -> IO a) -> IO a +byteStringsToArray inputs f = do + let l = length inputs + allocaBytes (l * sizeOf (undefined :: Ptr Word8)) (\array -> do + let inner _ [] = f array + inner array' (bs : bss) = BU.unsafeUseAsCString bs (\ptr -> do + poke array' $ castPtr ptr + inner (advancePtr array' 1) bss) + inner array inputs) + +-- | Return True iff all the given ByteStrings are the same length +allByteStringsSameLength :: [B.ByteString] -> Bool +allByteStringsSameLength [] = True +allByteStringsSameLength (bs : bss) = all ((==) (B.length bs)) $ map B.length bss + +-- | Run the given function with a pointer to an array of @n@ pointers to +-- buffers of size @size@. Return these buffers as a list of ByteStrings +createByteStringArray :: Int -- ^ the number of buffers requested + -> Int -- ^ the size of each buffer + -> ((Ptr (Ptr Word8)) -> IO ()) + -> IO [B.ByteString] +createByteStringArray n size f = do + allocaBytes (n * sizeOf (undefined :: Ptr Word8)) (\array -> do + allocaBytes (n * size) (\ptr -> do + mapM_ (\i -> poke (advancePtr array i) (advancePtr ptr (size * i))) [0..(n - 1)] + f array + mapM (\i -> B.packCStringLen (castPtr $ advancePtr ptr (i * size), size)) [0..(n - 1)])) + +-- | Generate the secondary blocks from a list of the primary blocks. The +-- primary blocks must be in order and all of the same size. There must be +-- @k@ primary blocks. +encode :: FECParams + -> [B.ByteString] -- ^ a list of @k@ input blocks + -> [B.ByteString] -- ^ (n - k) output blocks +encode (FECParams params k n) inblocks + | length inblocks /= k = error "Wrong number of blocks to FEC encode" + | not (allByteStringsSameLength inblocks) = error "Not all inputs to FEC encode are the same length" + | otherwise = unsafePerformIO (do + let sz = B.length $ head inblocks + withForeignPtr params (\cfec -> do + byteStringsToArray inblocks (\src -> do + createByteStringArray (n - k) sz (\fecs -> do + uintCArray [k..(n - 1)] (\block_nums -> do + _encode cfec src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz))))) + +-- | A sort function for tagged assoc lists +sortTagged :: [(Int, a)] -> [(Int, a)] +sortTagged = sortBy (\a b -> compare (fst a) (fst b)) + +-- | Reorder the given list so that elements with tag numbers < the first +-- argument have an index equal to their tag number (if possible) +reorderPrimaryBlocks :: Int -> [(Int, a)] -> [(Int, a)] +reorderPrimaryBlocks n blocks = inner (sortTagged pBlocks) sBlocks [] where + (pBlocks, sBlocks) = partition (\(tag, _) -> tag < n) blocks + inner [] sBlocks acc = acc ++ sBlocks + inner pBlocks [] acc = acc ++ pBlocks + inner pBlocks@((tag, a) : ps) sBlocks@(s : ss) acc = + if length acc == tag + then inner ps sBlocks (acc ++ [(tag, a)]) + else inner pBlocks ss (acc ++ [s]) + +-- | Recover the primary blocks from a list of @k@ blocks. Each block must be +-- tagged with its number (see the module comments about block numbering) +decode :: FECParams + -> [(Int, B.ByteString)] -- ^ a list of @k@ blocks and their index + -> [B.ByteString] -- ^ a list the @k@ primary blocks +decode (FECParams params k n) inblocks + | length (nub $ map fst inblocks) /= length (inblocks) = error "Duplicate input blocks in FEC decode" + | any (\f -> f < 0 || f >= n) $ map fst inblocks = error "Invalid block numbers in FEC decode" + | length inblocks /= k = error "Wrong number of blocks to FEC decode" + | not (allByteStringsSameLength $ map snd inblocks) = error "Not all inputs to FEC decode are same length" + | otherwise = unsafePerformIO (do + let sz = B.length $ snd $ head inblocks + inblocks' = reorderPrimaryBlocks k inblocks + presentBlocks = map fst inblocks' + withForeignPtr params (\cfec -> do + byteStringsToArray (map snd inblocks') (\src -> do + b <- createByteStringArray (n - k) sz (\out -> do + uintCArray presentBlocks (\block_nums -> do + _decode cfec src out block_nums $ fromIntegral sz)) + let blocks = [0..(n - 1)] \\ presentBlocks + tagged = zip blocks b + allBlocks = sortTagged $ tagged ++ inblocks' + return $ take k $ map snd allBlocks))) + +-- | Break a ByteString into @n@ parts, equal in length to the original, such +-- that all @n@ are required to reconstruct the original, but having less +-- than @n@ parts reveals no information about the orginal. +-- +-- This code works in IO monad because it needs a source of random bytes, +-- which it gets from /dev/urandom. If this file doesn't exist an +-- exception results +-- +-- Not terribly fast - probably best to do it with short inputs (e.g. an +-- encryption key) +secureDivide :: Int -- ^ the number of parts requested + -> B.ByteString -- ^ the data to be split + -> IO [B.ByteString] +secureDivide n input + | n < 0 = error "secureDivide called with negative number of parts" + | otherwise = withFile "/dev/urandom" ReadMode (\handle -> do + let inner 1 bs = return [bs] + inner n bs = do + mask <- B.hGet handle (B.length bs) + let masked = B.pack $ B.zipWith xor bs mask + rest <- inner (n - 1) masked + return (mask : rest) + inner n input) + +-- | Reverse the operation of secureDivide. The order of the inputs doesn't +-- matter, but they must all be the same length +secureCombine :: [B.ByteString] -> B.ByteString +secureCombine [] = error "Passed empty list of inputs to secureCombine" +secureCombine [a] = a +secureCombine [a, b] = B.pack $ B.zipWith xor a b +secureCombine (a : rest) = B.pack $ B.zipWith xor a $ secureCombine rest + +-- | A utility function which takes an arbitary input and FEC encodes it into a +-- number of blocks. The order the resulting blocks doesn't matter so long +-- as you have enough to present to @deFEC@. +enFEC :: Int -- ^ the number of blocks required to reconstruct + -> Int -- ^ the total number of blocks + -> B.ByteString -- ^ the data to divide + -> [B.ByteString] -- ^ the resulting blocks +enFEC k n input = taggedPrimaryBlocks ++ taggedSecondaryBlocks where + taggedPrimaryBlocks = map (uncurry B.cons) $ zip [0..] primaryBlocks + taggedSecondaryBlocks = map (uncurry B.cons) $ zip [(fromIntegral k)..] secondaryBlocks + remainder = B.length input `mod` k + paddingLength = if remainder >= 1 then (k - remainder) else k + paddingBytes = (B.replicate (paddingLength - 1) 0) `B.append` (B.singleton $ fromIntegral paddingLength) + divide a bs + | B.null bs = [] + | otherwise = (B.take a bs) : (divide a $ B.drop a bs) + input' = input `B.append` paddingBytes + blockSize = B.length input' `div` k + primaryBlocks = divide blockSize input' + secondaryBlocks = encode params primaryBlocks + params = fec k n + +-- | Reverses the operation of @enFEC@. +deFEC :: Int -- ^ the number of blocks required (matches call to @enFEC@) + -> Int -- ^ the total number of blocks (matches call to @enFEC@) + -> [B.ByteString] -- ^ a list of k, or more, blocks from @enFEC@ + -> B.ByteString +deFEC k n inputs + | length inputs < k = error "Too few inputs to deFEC" + | otherwise = B.take (B.length fecOutput - paddingLength) fecOutput where + paddingLength = fromIntegral $ B.last fecOutput + inputs' = take k inputs + taggedInputs = map (\bs -> (fromIntegral $ B.head bs, B.tail bs)) inputs' + fecOutput = B.concat $ decode params taggedInputs + params = fec k n diff --git a/zfec/haskell/FECTest.hs b/zfec/haskell/FECTest.hs new file mode 100644 index 0000000..0f5a353 --- /dev/null +++ b/zfec/haskell/FECTest.hs @@ -0,0 +1,57 @@ +module Main where + +import qualified Data.ByteString as B +import qualified Codec.FEC as FEC +import System.IO (withFile, IOMode(..)) +import System.Random +import Data.List (sortBy) + +import Test.QuickCheck + +-- | Return true if the given @k@ and @n@ values are valid +isValidConfig :: Int -> Int -> Bool +isValidConfig k n + | k >= n = False + | k < 1 = False + | n < 1 = False + | otherwise = True + +randomTake :: Int -> Int -> [a] -> [a] +randomTake seed n values = map snd $ take n sortedValues where + sortedValues = sortBy (\a b -> compare (fst a) (fst b)) taggedValues + taggedValues = zip rnds values + rnds :: [Float] + rnds = randoms gen + gen = mkStdGen seed + +testFEC k n len seed = FEC.decode fec someTaggedBlocks == origBlocks where + origBlocks = map (\i -> B.replicate len $ fromIntegral i) [0..(k - 1)] + fec = FEC.fec k n + secondaryBlocks = FEC.encode fec origBlocks + taggedBlocks = zip [0..] (origBlocks ++ secondaryBlocks) + someTaggedBlocks = randomTake seed k taggedBlocks + +prop_FEC :: Int -> Int -> Int -> Int -> Property +prop_FEC k n len seed = + isValidConfig k n && n < 256 && len < 1024 ==> testFEC k n len seed + +checkDivide :: Int -> IO () +checkDivide n = do + let input = B.replicate 1024 65 + parts <- FEC.secureDivide n input + if FEC.secureCombine parts == input + then return () + else fail "checkDivide failed" + +checkEnFEC :: Int -> IO () +checkEnFEC len = do + testdata <- withFile "/dev/urandom" ReadMode (\handle -> B.hGet handle len) + let [a, b, c, d, e] = FEC.enFEC 3 5 testdata + if FEC.deFEC 3 5 [b, e, d] == testdata + then return () + else fail "deFEC failure" + +main = do + mapM_ (check (defaultConfig { configMaxTest = 1000, configMaxFail = 10000 })) [prop_FEC] + mapM_ checkDivide [1, 2, 3, 4, 10] + mapM_ checkEnFEC [1, 2, 3, 4, 5, 1024 * 1024]