hashed-storage-0.5.10/0000755000000000000000000000000012025701732012660 5ustar0000000000000000hashed-storage-0.5.10/LICENSE0000644000000000000000000000244312025701732013670 0ustar0000000000000000Copyright Petr Rockai 2009 All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. hashed-storage-0.5.10/testdata.zip0000644000000000000000000004043512025701732015223 0ustar0000000000000000PK Z:_darcs/UT lJ95IJux PK wY:_darcs/pristine.hashed/UT R@`J95IJux PK Z:=.[[b_darcs/pristine.hashed/0000000088-b629d32eda4e44b4f196579e6b36caafc53107eff12d1dde063bdd8b52d21a38UT \5IJ\5IJux 5A 0{RhԴ)Q([O獫(ѭQO^\6=Lpr ؜r›'[XPKV: Hb_darcs/pristine.hashed/0000000002-87428fc522803d31065e7bce3cf03fe475096631e5e07bbd7a0fde60c4cf25c7UT T5IJT5IJux `f'L _erPKR:Ib_darcs/pristine.hashed/0000000002-0263829989b6fd954f72baaf2fc64bc2e2f01d692d4de72986ea808f6e99813fUT L5IJL5IJux `fGL G>PKkY:{9.b_darcs/pristine.hashed/0000000010-e6f805fa5fc041ab4bb7aa119641f77ac3e9f42106bc9f92354080692736c8deUT 9@`J9@`Jux `fo/sz,Y FiwnPľ0%6.;=]&&:^ZTz![[}ⶡ,'蘏brk5*WΫM$4>[|eޒrzLJ/[h~_( X ʠ*KXYsettx3RIVLXfݙ X+]ÜZ.g&߁dRWPK I: _darcs/prefs/UT 95IJ95IJux PKI:qAě_darcs/prefs/binariesUT 95IJ95IJux PMo0 W60@/;ɚ)X}شզ@ont#P4J <1R8lrҰ׆g0`?aDVq Ԁ@-zwDWy셞pФ* 2 ,6R/Z! vemV)WLt- $*pF ~iLFXEHf)('t 8\ Oɗji˒n޲DEi( ]>]X"LF]V8aX}_E:l\fKMؾPTT=N׹M[HWACv5naN97Oa E<|m~ VHUQbOԤǑrmBcm  3V׾:BP q:qKjJ$`Q_];f&+P[P8>KpӃU.ߏ)oޥ4suقd02S{Sqճi}NאSӲtU9J&!4so 7wѹ&!/2đ5I4JH߮l$9هBulAaT-j{D3_y_"/DKֺx_J'B\p>[,v4&6^aʖLǭL cn?5$uZ*"왍$K'D9^f'xUٷc,,8m26iX56J: gۙkJ,>'{RZ`y"r$HT!؛͆mVS֔anv'8>{^_2;J_r琩0X&_w]IPK I:_darcs/prefs/motdUT 95IJ95IJux PK wY:_darcs/patches/UT R@`J95IJux PK wY:bZ_darcs/patches/0000000375-0004f58841fa2a879732c077eb6e7309f1dd8b3dbae3559298cb97afad38c36aUT R@`JR@`Jux } 0 {"gEm7x&^HekeoD1?E14׳Ciyl(u-C^UC&D禞hpk! ۳-M%eҹJPR !5IP$ );la[4sbϴ'~ gnzb+ c$3aR~6q' |*BwPK h:r.Z_darcs/patches/0000000227-e54718f289070d7e09bbb752616934e0417290b8a798388083f8e2038817bc92UT t5IJt5IJux m 0E|ś+֤6UD&I:Åùt7`,8\3.u&nÐrH(*5+] #L˱ -h)Hq$jn\cujY+%ч\־b` ;g؏a]GQPK n: xZ_darcs/patches/0000000147-fa3ee8d70a3fb3d9f6aee557675eeb116df0813520a93346453419f119a2d71dUT 5IJ5IJux e wM1:Wc [H(uvn'"<(Zky]s9i9CסVh8(&'PHu[0Az\R,2N_&N`j Ōsɻ{+JJY*b]d jWhProƠ44p2 X-!*DB%ZcZ>PK a:~^_darcs/inventories/0000000382-2da2908c780ff8ef88ebae3cbe219df46a51b982a6a8c16e27a5d85d5cbc25e9UT e5IJe5IJux JCA YZ2L.݉"$m?čݳjyj%SlOyNW+`3f2/mجjX =L3v# .L%Q{u6JEJ>=Sm-~4Uw^263ȁU 钲Ci-S8-:1'QPK u:0k^_darcs/inventories/0000001008-09bafef4ebd348512b5ba6e115464ffe9041691421420c0ab559a99ce1d813fcUT 5IJ5IJux n1 w?.%EQhnEנ($&6`_App'v0~?>h{?I %iœn?tȀ woX!l[ k.}LSћJD3FqlآPW4aҺQ ROz_*teePtMF!e+ʑ h^i87PULi*4#JUv}߂Dp^|9>&'ZQHuu΂5 0q޻p*C9I&0 bye=EIN*6EV|iKQ3vܫ 4"M4wf)³*'hJKfʨ85Aγ2]1 58լIkIԬi ^X'.w/uSd G # ER'4628ey;PK Z:b9^_darcs/inventories/0000000190-ff575eca8b1699926e287033bf74a351fbe492ef357c1a1c205c63dcf0a36edfUT \5IJ\5IJux % 0=O`4鉈n*"ܝvޜe]=#O[few@H>٬󻵹B*5)B,.fDikEDE.(~Z*!#-IQqZN7?LPK wY:a,(22^_darcs/inventories/0000001211-d705032f80e41ed76d51c764b549a46416bd29e9c379e00695f8b0f8908441ecUT R@`JR@`Jux Mo1 @+\i']!Gnk8v*Tr8g1y8|8._m;;Ӷ?|J 5iœw2tԽ 8۲OfX穣q9x͔QKhթ8 ΓFiu9ϦEa&v㤳QKsTDФ4JwtN-92@jB7 i2s ܿ-:e %p}n=8FK@]{Ɍ2C0 vTCƘKP[Pqȯ+ywZ.H,TU]4j$4%c.^O#w F!r^ W.uL(\(NH;ǟv肉(Mr./$ɚTj"--x`)KB<TX g3YF*8>YZ0;_?v~/FEQ!,Qbu(e!"oZ2aeJUg1F͜J9߁S/̻PK I:gի _darcs/formatUT 95IJ95IJux hashed darcs-2 PKwY:Қ׎HU_darcs/tentative_pristineUT R@`JR@`Jux %I 0 &MXpNsyGZfwAK0@ UE"DwJLnӖPK M:foo_aUT A5IJA5IJux a PK T:foo_dir/UT O5IJD5IJux PK V:foo_dir/foo_subdir/UT T5IJO5IJux PK V:foo_dir/foo_subdir/foo_aUT T5IJT5IJux a PK Q: foo_dir/foo_aUT J5IJJ5IJux a PK R: foo_dir/foo_bUT L5IJL5IJux b PK nY: foo space/UT @@`J?`Jux PK nY:foo space/foo_aUT @@`J@@`Jux a PK kY:f= foo space/foo\backslashUT 9@`J9@`Jux backslash PK QY:ȁfoo space/foo newlineUT  @`J @`Jux newline PK Z:A_darcs/UTlJux PK wY:AA_darcs/pristine.hashed/UTR@`Jux PK Z:=.[[b_darcs/pristine.hashed/0000000088-b629d32eda4e44b4f196579e6b36caafc53107eff12d1dde063bdd8b52d21a38UT\5IJux PKV: Hb_darcs/pristine.hashed/0000000002-87428fc522803d31065e7bce3cf03fe475096631e5e07bbd7a0fde60c4cf25c7UTT5IJux PKR:Ib8_darcs/pristine.hashed/0000000002-0263829989b6fd954f72baaf2fc64bc2e2f01d692d4de72986ea808f6e99813fUTL5IJux PKkY:{9.b_darcs/pristine.hashed/0000000010-e6f805fa5fc041ab4bb7aa119641f77ac3e9f42106bc9f92354080692736c8deUT9@`Jux PK wY:&b_darcs/pristine.hashed/0000000284-ba1baff2dab9f3fdb42f8ab4c97d43fd636d14c891207c5e47a978a02aba9739UTR@`Jux PK u:C_b_darcs/pristine.hashed/0000000183-6eebdc0ce46aec3457a7feefefaeb3732907150540c6e376c0564543b8eecddaUT5IJux PK u:=b._darcs/pristine.hashed/0000000274-c40f7f018b5cdb889583641e449e5386271af224695d24ec1e692b9fb1db01e3UT5IJux PKQY:#b_darcs/pristine.hashed/0000000008-7ba826f0c347f6adc4686c8d1f61aeb2e2e98322749cd4f82204c926f4022ceeUT @`Jux PK wY:b>_darcs/pristine.hashed/0000000283-5e1671028edb90cc82bfb81b3ee29ec4f1daa354ec423398f2cc983f2f4ad4fbUTR@`Jux PKwY:ZS _darcs/hashed_inventoryUTR@`Jux PK I: AG _darcs/prefs/UT95IJux PKI:qAě _darcs/prefs/binariesUT95IJux PKI:L2x_darcs/prefs/boringUT95IJux PK I:_darcs/prefs/motdUT95IJux PK wY:A-_darcs/patches/UTR@`Jux PK wY:bZv_darcs/patches/0000000375-0004f58841fa2a879732c077eb6e7309f1dd8b3dbae3559298cb97afad38c36aUTR@`Jux PK h:r.Z_darcs/patches/0000000227-e54718f289070d7e09bbb752616934e0417290b8a798388083f8e2038817bc92UTt5IJux PK n: xZ_darcs/patches/0000000147-fa3ee8d70a3fb3d9f6aee557675eeb116df0813520a93346453419f119a2d71dUT5IJux PK wY: 3_darcs/patches/pending.tentativeUTR@`Jux PK u:WZ_darcs/patches/0000000200-f845b978211aa40061d5c1503e11c150a1f486aee7bc95868f6a1ae56c5271aaUT5IJux PK wY:_darcs/patches/pendingUTR@`Jux PK Z:xZ_darcs/patches/0000000140-4f9db7bc4438e289cba97ffad517e754d1afb6f9e91d75d06abd3fc7f3d60762UT\5IJux PK `:$Z<_darcs/patches/0000000125-330b85f2d1b798ce83ce5a9ff9542e97169cbb92c9990caa40c9415d174bd089UTd5IJux PK wY:AV_darcs/inventories/UTR@`Jux PK n:KT^_darcs/inventories/0000000796-a69cac87af8a076fef18f51d4849f17ca57ad8eedd2fb0f15d02a34a950a9c55UT5IJux PK a:~^_darcs/inventories/0000000382-2da2908c780ff8ef88ebae3cbe219df46a51b982a6a8c16e27a5d85d5cbc25e9UTe5IJux PK h:dII^d_darcs/inventories/0000000593-94396dc37bfeebdcddd2f4a467ce3f00092341caaf3058fd26189017da72ca99UTt5IJux PK u:0k^E!_darcs/inventories/0000001008-09bafef4ebd348512b5ba6e115464ffe9041691421420c0ab559a99ce1d813fcUT5IJux PK Z:b9^#_darcs/inventories/0000000190-ff575eca8b1699926e287033bf74a351fbe492ef357c1a1c205c63dcf0a36edfUT\5IJux PK wY:a,(22^$_darcs/inventories/0000001211-d705032f80e41ed76d51c764b549a46416bd29e9c379e00695f8b0f8908441ecUTR@`Jux PK I:gի '_darcs/formatUT95IJux PKwY:Қ׎HU(_darcs/tentative_pristineUTR@`Jux PK M:(foo_aUTA5IJux PK T:A(foo_dir/UTO5IJux PK V:A9)foo_dir/foo_subdir/UTT5IJux PK V:)foo_dir/foo_subdir/foo_aUTT5IJux PK Q: )foo_dir/foo_aUTJ5IJux PK R: #*foo_dir/foo_bUTL5IJux PK nY: Al*foo space/UT@@`Jux PK nY:*foo space/foo_aUT@@`Jux PK kY:f= *foo space/foo\backslashUT9@`Jux PK QY:ȁV+foo space/foo newlineUT @`Jux PK,,Z+hashed-storage-0.5.10/hashed-storage.cabal0000644000000000000000000000532112025701732016543 0ustar0000000000000000name: hashed-storage version: 0.5.10 synopsis: Hashed file storage support code. description: Support code for reading and manipulating hashed file storage (where each file and directory is associated with a cryptographic hash, for corruption-resistant storage and fast comparisons). . The supported storage formats include darcs hashed pristine, a plain filesystem tree and an indexed plain tree (where the index maintains hashes of the plain files and directories). license: BSD3 license-file: LICENSE copyright: 2009 Petr Rockai author: Petr Rockai maintainer: Petr Rockai category: System build-type: Custom cabal-version: >= 1.6 extra-source-files: Bundled/sha2.h, NEWS, testdata.zip flag test default: False flag diff default: False flag hpc default: False library if impl(ghc >= 6.8) ghc-options: -fwarn-tabs ghc-options: -Wall -O2 ghc-prof-options: -prof -auto-all -O2 if flag(hpc) ghc-prof-options: -fhpc exposed-modules: Storage.Hashed Storage.Hashed.AnchoredPath Storage.Hashed.Index Storage.Hashed.Monad Storage.Hashed.Tree Storage.Hashed.Hash Storage.Hashed.Packed Storage.Hashed.Plain Storage.Hashed.Darcs if flag(diff) exposed-modules: Storage.Hashed.Diff build-depends: lcs other-modules: Bundled.Posix Bundled.SHA256 Storage.Hashed.Utils build-depends: base >= 4 && < 5, containers, mtl, directory, filepath, bytestring, extensible-exceptions, dataenc, binary, zlib, mmap >= 0.5 && < 0.6 c-sources: Bundled/sha2.c extensions: PatternSignatures, NoMonomorphismRestriction executable hashed-storage-test if impl(ghc >= 6.8) ghc-options: -fwarn-tabs ghc-options: -Wall -O2 ghc-prof-options: -prof -auto-all -O2 if flag(hpc) ghc-prof-options: -fhpc main-is: test.hs other-modules: Bundled.Posix Storage.Hashed.Test c-sources: Bundled/sha2.c if flag(test) build-depends: test-framework, test-framework-hunit, test-framework-quickcheck2, QuickCheck >= 2.3, HUnit, process >= 1.0.1, zip-archive else buildable: False source-repository head type: darcs location: http://repos.mornfall.net/hashed-storage/ hashed-storage-0.5.10/NEWS0000644000000000000000000000110412025701732013353 0ustar0000000000000000hashed-storage 0.4.0 ==================== - Index now uses a 32-byte representation for sha256 hashes, instead of a 64-byte ascii-hex (base16) encoding. - New module Storage.Hashed.Hash that exports a new incarnation of Hash data type and a number of utilities. - The Tree type is overloaded over a monad type and is not bound to IO monad anymore. - Index has been streamlined, the API has improved safety and the code has been extensively optimised. - Support for creating new hashed trees with the - format used in darcs 2.0.2 and newer has been dropped. hashed-storage-0.5.10/Setup.hs0000644000000000000000000000445012025701732014317 0ustar0000000000000000import Distribution.Simple ( defaultMainWithHooks, UserHooks(..), simpleUserHooks ) import Distribution.Simple.LocalBuildInfo( LocalBuildInfo(..) ) import Distribution.PackageDescription ( PackageDescription(executables), Executable(buildInfo, exeName) , BuildInfo(customFieldsBI), emptyBuildInfo , updatePackageDescription, cppOptions, ccOptions ) import Distribution.Simple.Setup (buildVerbosity, copyDest, copyVerbosity, fromFlag, haddockVerbosity, installVerbosity, sDistVerbosity) import Distribution.Verbosity ( Verbosity ) import System.Exit( exitWith ) import System.Cmd( system ) import System.FilePath( () ) -- for endianness check import Foreign.Marshal.Utils ( with ) import Data.Word ( Word8, Word32 ) import Foreign.Storable ( peek ) import Foreign.Ptr ( castPtr ) hst = "hashed-storage-test" main :: IO () main = defaultMainWithHooks simpleUserHooks { runTests = \ _ _ _ lbi -> do exitWith =<< system (buildDir lbi hst hst), buildHook = \ pkg lbi hooks flags -> let verb = fromFlag $ buildVerbosity flags in commonBuildHook buildHook pkg lbi hooks verb >>= ($ flags), haddockHook = \ pkg lbi hooks flags -> let verb = fromFlag $ haddockVerbosity flags in commonBuildHook haddockHook pkg lbi hooks verb >>= ($ flags) } commonBuildHook :: (UserHooks -> PackageDescription -> LocalBuildInfo -> t -> a) -> PackageDescription -> LocalBuildInfo -> t -> Verbosity -> IO a commonBuildHook runHook pkg lbi hooks verbosity = do -- Add custom -DFOO[=BAR] flags to the cpp (for .hs) and cc (for .c) -- invocations, doing a dance to make the base hook aware of them. littleEndian <- testEndianness let args = if littleEndian then [ "-DLITTLEENDIAN" ] else [ "-DBIGENDIAN" ] bi = emptyBuildInfo { cppOptions = args, ccOptions = args } hbi = (Just bi, [(exeName exe, bi) | exe <- executables pkg]) pkg' = updatePackageDescription hbi pkg lbi' = lbi { localPkgDescr = pkg' } return $ runHook simpleUserHooks pkg' lbi' hooks where testEndianness :: IO Bool testEndianness = with (1 :: Word32) $ \p -> do o <- peek $ castPtr p return $ o == (1 :: Word8) hashed-storage-0.5.10/test.hs0000644000000000000000000000134712025701732014200 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables #-} import Storage.Hashed.Test( tests ) import Prelude hiding( catch ) import Test.Framework( defaultMain ) import System.Directory( createDirectory, removeDirectoryRecursive , setCurrentDirectory ) import Codec.Archive.Zip( extractFilesFromArchive, toArchive ) import qualified Data.ByteString.Lazy as BL import Control.Exception( catch, IOException ) main :: IO () main = do zip <- toArchive `fmap` BL.readFile "testdata.zip" removeDirectoryRecursive "_test_playground" `catch` \(_ :: IOException) -> return () createDirectory "_test_playground" setCurrentDirectory "_test_playground" extractFilesFromArchive [] zip defaultMain tests hashed-storage-0.5.10/Storage/0000755000000000000000000000000012025701732014264 5ustar0000000000000000hashed-storage-0.5.10/Storage/Hashed.hs0000644000000000000000000000400012025701732016006 0ustar0000000000000000module Storage.Hashed ( -- * Obtaining Trees. -- -- | Please note that Trees obtained this way will contain Stub -- items. These need to be executed (they are IO actions) in order to be -- accessed. Use 'expand' to do this. However, many operations are -- perfectly fine to be used on a stubbed Tree (and it is often more -- efficient to do everything that can be done before expanding a Tree). readPlainTree, readDarcsHashed -- * Blob access. , readBlob -- * Writing trees. , writePlainTree, writeDarcsHashed -- * Unsafe functions for the curious explorer. -- -- | These are more useful for playing within ghci than for real, serious -- programs. They generally trade safety for conciseness. Please use -- responsibly. Don't kill innocent kittens. , floatPath, printPath ) where import Storage.Hashed.AnchoredPath import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BL import Storage.Hashed.Tree ( Tree, TreeItem(..), listImmediate, find, readBlob ) -- For re-exports. import Storage.Hashed.Darcs( readDarcsHashed, writeDarcsHashed ) import Storage.Hashed.Plain( readPlainTree, writePlainTree ) ------------------------ -- For explorers -- -- | Take a relative FilePath within a Tree and print the contents of the -- object there. Useful for exploration, less so for serious programming. printPath :: Tree IO -> FilePath -> IO () printPath t p = print' $ find t (floatPath p) where print' Nothing = putStrLn $ "ERROR: No object at " ++ p print' (Just (File b)) = do putStrLn $ "== Contents of file " ++ p ++ ":" BL.unpack `fmap` readBlob b >>= putStr print' (Just (SubTree t')) = do putStrLn $ "== Listing Tree " ++ p ++ " (immediates only):" putStr $ unlines $ map BS.unpack $ listNames t' print' (Just (Stub _ _)) = putStrLn $ "== (not listing stub at " ++ p ++ ")" listNames t' = [ n | (Name n, _) <- listImmediate t' ] hashed-storage-0.5.10/Storage/Hashed/0000755000000000000000000000000012025701732015460 5ustar0000000000000000hashed-storage-0.5.10/Storage/Hashed/Test.hs0000644000000000000000000007036712025701732016750 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} module Storage.Hashed.Test( tests ) where import Prelude hiding ( filter, readFile, writeFile ) import qualified Prelude import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.ByteString.Char8 as BS import Control.Exception( finally ) import System.Process import System.Directory( doesFileExist, removeFile, doesDirectoryExist ) import System.FilePath( () ) import Control.Monad( forM_, when ) import Control.Monad.Identity import Control.Monad.Trans( lift ) import Control.Applicative( (<$>) ) import Data.Maybe import Data.Word import Data.Int import Data.Bits import Data.List( (\\), sort, intercalate, nub, intersperse ) import Storage.Hashed import Storage.Hashed.AnchoredPath import Storage.Hashed.Tree import Storage.Hashed.Index import Storage.Hashed.Utils import Storage.Hashed.Darcs import Storage.Hashed.Packed import Storage.Hashed.Hash import Storage.Hashed.Monad hiding ( tree ) import System.IO.Unsafe( unsafePerformIO ) import System.Mem( performGC ) import qualified Data.Set as S import qualified Data.Map as M import qualified Bundled.Posix as Posix ( getFileStatus, getSymbolicLinkStatus, fileSize, fileExists ) import Test.HUnit import Test.Framework( testGroup ) import Test.QuickCheck import Test.Framework.Providers.HUnit import Test.Framework.Providers.QuickCheck2 ------------------------ -- Test Data -- blobs = [ (floatPath "foo_a", BL.pack "a\n") , (floatPath "foo_dir/foo_a", BL.pack "a\n") , (floatPath "foo_dir/foo_b", BL.pack "b\n") , (floatPath "foo_dir/foo_subdir/foo_a", BL.pack "a\n") , (floatPath "foo space/foo\nnewline", BL.pack "newline\n") , (floatPath "foo space/foo\\backslash", BL.pack "backslash\n") , (floatPath "foo space/foo_a", BL.pack "a\n") ] files = map fst blobs dirs = [ floatPath "foo_dir" , floatPath "foo_dir/foo_subdir" , floatPath "foo space" ] emptyStub = Stub (return emptyTree) NoHash testTree = makeTree [ (makeName "foo", emptyStub) , (makeName "subtree", SubTree sub) , (makeName "substub", Stub getsub NoHash) ] where sub = makeTree [ (makeName "stub", emptyStub) , (makeName "substub", Stub getsub2 NoHash) , (makeName "x", SubTree emptyTree) ] getsub = return sub getsub2 = return $ makeTree [ (makeName "file", File emptyBlob) , (makeName "file2", File $ Blob (return $ BL.pack "foo") NoHash) ] equals_testdata t = sequence_ [ do isJust (findFile t p) @? show p ++ " in tree" ours <- readBlob (fromJust $ findFile t p) ours @?= stored | (p, stored) <- blobs ] >> sequence_ [ isJust (Prelude.lookup p blobs) @? show p ++ " extra in tree" | (p, File _) <- list t ] --------------------------- -- Test list -- tests = [ testGroup "Bundled.Posix" posix , testGroup "Storage.Hashed.Utils" utils , testGroup "Storage.Hashed.Hash" hash , testGroup "Storage.Hashed.Tree" tree , testGroup "Storage.Hashed.Index" index , testGroup "Storage.Hashed.Packed" packed , testGroup "Storage.Hashed.Monad" monad , testGroup "Storage.Hashed" hashed ] -------------------------- -- Tests -- hashed = [ testCase "plain has all files" have_files , testCase "pristine has all files" have_pristine_files , testCase "pristine has no extras" pristine_no_extra , testCase "pristine file contents match" pristine_contents , testCase "plain file contents match" plain_contents , testCase "writePlainTree works" write_plain ] where check_file t f = assertBool ("path " ++ show f ++ " is missing in tree " ++ show t) (isJust $ find t f) check_files = forM_ files . check_file pristine_no_extra = do t <- readDarcsPristine "." >>= expand forM_ (list t) $ \(path,_) -> assertBool (show path ++ " is extraneous in tree") (path `elem` (dirs ++ files)) have_files = readPlainTree "." >>= expand >>= check_files have_pristine_files = readDarcsPristine "." >>= expand >>= check_files pristine_contents = do t <- readDarcsPristine "." >>= expand equals_testdata t plain_contents = do t <- expand =<< filter nondarcs `fmap` readPlainTree "." equals_testdata t write_plain = do orig <- readDarcsPristine "." >>= expand writePlainTree orig "_darcs/plain" t <- expand =<< readPlainTree "_darcs/plain" equals_testdata t index = [ testCase "index versioning" check_index_versions , testCase "index listing" check_index , testCase "index content" check_index_content ] where pristine = readDarcsPristine "." >>= expand build_index = do x <- pristine exist <- doesFileExist "_darcs/index" performGC -- required in win32 to trigger file close when exist $ removeFile "_darcs/index" idx <- updateIndex =<< updateIndexFrom "_darcs/index" darcsTreeHash x return (x, idx) check_index = do (pris, idx) <- build_index (sort $ map fst $ list idx) @?= (sort $ map fst $ list pris) check_blob_pair p x y = do a <- readBlob x b <- readBlob y assertEqual ("content match on " ++ show p) a b check_index_content = do (_, idx) <- build_index plain <- readPlainTree "." x <- sequence $ zipCommonFiles check_blob_pair plain idx assertBool "files match" (length x > 0) check_index_versions = do performGC -- required in win32 to trigger file close Prelude.writeFile "_darcs/index" "nonsense index... do not crash!" valid <- indexFormatValid "_darcs/index" assertBool "index format invalid" $ not valid tree = [ testCase "modifyTree" check_modify , testCase "complex modifyTree" check_modify_complex , testCase "modifyTree removal" check_modify_remove , testCase "expand" check_expand , testCase "expandPath" check_expand_path , testCase "expandPath of sub" check_expand_path_sub , testCase "diffTrees" check_diffTrees , testCase "diffTrees identical" check_diffTrees_ident , testProperty "expandPath" prop_expandPath , testProperty "shapeEq" prop_shape_eq , testProperty "expandedShapeEq" prop_expanded_shape_eq , testProperty "expand is identity" prop_expand_id , testProperty "filter True is identity" prop_filter_id , testProperty "filter False is empty" prop_filter_empty , testProperty "restrict both ways keeps shape" prop_restrict_shape_commutative , testProperty "restrict is a subtree of both" prop_restrict_subtree , testProperty "overlay keeps shape" prop_overlay_shape , testProperty "overlay is superset of over" prop_overlay_super ] where blob x = File $ Blob (return (BL.pack x)) (sha256 $ BL.pack x) name = Name . BS.pack check_modify = let t = makeTree [(name "foo", blob "bar")] modify = modifyTree t (floatPath "foo") (Just $ blob "bla") in do x <- readBlob $ fromJust $ findFile t (floatPath "foo") y <- readBlob $ fromJust $ findFile modify (floatPath "foo") assertEqual "old version" x (BL.pack "bar") assertEqual "new version" y (BL.pack "bla") assertBool "list has foo" $ isJust (Prelude.lookup (floatPath "foo") $ list modify) length (list modify) @?= 1 check_modify_complex = let t = makeTree [ (name "foo", blob "bar") , (name "bar", SubTree t1) ] t1 = makeTree [ (name "foo", blob "bar") ] modify = modifyTree t (floatPath "bar/foo") (Just $ blob "bla") in do foo <- readBlob $ fromJust $ findFile t (floatPath "foo") foo' <- readBlob $ fromJust $ findFile modify (floatPath "foo") bar_foo <- readBlob $ fromJust $ findFile t (floatPath "bar/foo") bar_foo' <- readBlob $ fromJust $ findFile modify (floatPath "bar/foo") assertEqual "old foo" foo (BL.pack "bar") assertEqual "old bar/foo" bar_foo (BL.pack "bar") assertEqual "new foo" foo' (BL.pack "bar") assertEqual "new bar/foo" bar_foo' (BL.pack "bla") assertBool "list has bar/foo" $ isJust (Prelude.lookup (floatPath "bar/foo") $ list modify) assertBool "list has foo" $ isJust (Prelude.lookup (floatPath "foo") $ list modify) length (list modify) @?= length (list t) check_modify_remove = let t1 = makeTree [(name "foo", blob "bar")] t2 :: Tree Identity = makeTree [ (name "foo", blob "bar") , (name "bar", SubTree t1) ] modify1 = modifyTree t1 (floatPath "foo") Nothing modify2 = modifyTree t2 (floatPath "bar") Nothing file = findFile modify1 (floatPath "foo") subtree = findTree modify2 (floatPath "bar") in do assertBool "file is gone" (isNothing file) assertBool "subtree is gone" (isNothing subtree) no_stubs t = null [ () | (_, Stub _ _) <- list t ] path = floatPath "substub/substub/file" badpath = floatPath "substub/substub/foo" check_expand = do x <- expand testTree assertBool "no stubs in testTree" $ not (no_stubs testTree) assertBool "stubs in expanded tree" $ no_stubs x assertBool "path reachable" $ path `elem` (map fst $ list x) assertBool "badpath not reachable" $ badpath `notElem` (map fst $ list x) check_expand_path = do test_exp <- expand testTree t <- expandPath testTree path t' <- expandPath test_exp path t'' <- expandPath testTree $ floatPath "substub/x" assertBool "path not reachable in testTree" $ path `notElem` (map fst $ list testTree) assertBool "path reachable in t" $ path `elem` (map fst $ list t) assertBool "path reachable in t'" $ path `elem` (map fst $ list t') assertBool "path reachable in t (with findFile)" $ isJust $ findFile t path assertBool "path reachable in t' (with findFile)" $ isJust $ findFile t' path assertBool "path not reachable in t''" $ path `notElem` (map fst $ list t'') assertBool "badpath not reachable in t" $ badpath `notElem` (map fst $ list t) assertBool "badpath not reachable in t'" $ badpath `notElem` (map fst $ list t') check_expand_path_sub = do t <- expandPath testTree $ floatPath "substub" t' <- expandPath testTree $ floatPath "substub/stub" t'' <- expandPath testTree $ floatPath "subtree/stub" assertBool "leaf is not a Stub" $ isNothing (findTree testTree $ floatPath "substub") assertBool "leaf is not a Stub" $ isJust (findTree t $ floatPath "substub") assertBool "leaf is not a Stub (2)" $ isJust (findTree t' $ floatPath "substub/stub") assertBool "leaf is not a Stub (3)" $ isJust (findTree t'' $ floatPath "subtree/stub") check_diffTrees = flip finally (Prelude.writeFile "foo_dir/foo_a" "a\n") $ do Prelude.writeFile "foo_dir/foo_a" "b\n" working_plain <- filter nondarcs `fmap` readPlainTree "." working <- updateIndex =<< updateIndexFrom "_darcs/index" darcsTreeHash working_plain pristine <- readDarcsPristine "." (working', pristine') <- diffTrees working pristine let foo_work = findFile working' (floatPath "foo_dir/foo_a") foo_pris = findFile pristine' (floatPath "foo_dir/foo_a") working' `shapeEq` pristine' @? show working' ++ " `shapeEq` " ++ show pristine' assertBool "foo_dir/foo_a is in working'" $ isJust foo_work assertBool "foo_dir/foo_a is in pristine'" $ isJust foo_pris foo_work_c <- readBlob (fromJust foo_work) foo_pris_c <- readBlob (fromJust foo_pris) BL.unpack foo_work_c @?= "b\n" BL.unpack foo_pris_c @?= "a\n" assertEqual "working' tree is minimal" 2 (length $ list working') assertEqual "pristine' tree is minimal" 2 (length $ list pristine') check_diffTrees_ident = do pristine <- readDarcsPristine "." (t1, t2) <- diffTrees pristine pristine assertBool "t1 is empty" $ null (list t1) assertBool "t2 is empty" $ null (list t2) prop_shape_eq x = no_stubs x ==> x `shapeEq` x where types = x :: Tree Identity prop_expanded_shape_eq x = runIdentity $ expandedShapeEq x x where types = x :: Tree Identity prop_expand_id x = no_stubs x ==> runIdentity (expand x) `shapeEq` x where types = x :: Tree Identity prop_filter_id x = runIdentity $ expandedShapeEq x $ filter (\_ _ -> True) x where types = x :: Tree Identity prop_filter_empty x = runIdentity $ expandedShapeEq emptyTree $ filter (\_ _ -> False) x where types = x :: Tree Identity prop_restrict_shape_commutative (t1, t2) = no_stubs t1 && no_stubs t2 && not (restrict t1 t2 `shapeEq` emptyTree) ==> restrict t1 t2 `shapeEq` restrict t2 t1 where types = (t1 :: Tree Identity, t2 :: Tree Identity) prop_restrict_subtree (t1, t2) = no_stubs t1 && not (restrict t1 t2 `shapeEq` emptyTree) ==> let restricted = S.fromList (map fst $ list $ restrict t1 t2) orig1 = S.fromList (map fst $ list t1) orig2 = S.fromList (map fst $ list t2) in and [restricted `S.isSubsetOf` orig1, restricted `S.isSubsetOf` orig2] where types = (t1 :: Tree Identity, t2 :: Tree Identity) prop_overlay_shape (t1 :: Tree Identity, t2) = (Just LT == runIdentity (t2 `cmpExpandedShape` t1)) ==> runIdentity $ (t1 `overlay` t2) `expandedShapeEq` t1 prop_overlay_super (t1 :: Tree Identity, t2) = (Just LT == runIdentity (t2 `cmpExpandedShape` t1)) ==> Just EQ == (runIdentity $ restrict t2 (t1 `overlay` t2) `cmpTree` t2) prop_expandPath (TreeWithPath t p) = notStub $ find (runIdentity $ expandPath t p) p where notStub (Just (Stub _ _)) = False notStub Nothing = error "Did not exist." notStub _ = True packed = [ testCase "loose pristine tree" check_loose , testCase "load" check_load , testCase "live" check_live , testCase "compact" check_compact ] where root_hash = treeHash <$> get_pristine get_pristine = darcsUpdateDirHashes <$> (expand =<< readDarcsPristine ".") check_loose = do x <- readDarcsPristine "." os <- create "_darcs/loose" Loose (os', root) <- writePackedDarcsPristine x os y <- expand =<< readPackedDarcsPristine os' root equals_testdata y check_load = do os <- load "_darcs/loose" format (hatchery os) @?= Loose root <- root_hash y <- expand =<< readPackedDarcsPristine os root equals_testdata y check_live = do os <- load "_darcs/loose" x <- get_pristine root <- root_hash alive <- live (os { roots = [ root ] , references = darcsPristineRefs }) [hatchery os] sequence_ [ assertBool (show hash ++ " is alive") $ hash `S.member` M.keysSet alive | hash <- map (itemHash . snd) $ list x ] length (M.toList alive) @?= 1 + length (nub $ map snd blobs) + length dirs check_compact = do os <- load "_darcs/loose" x <- darcsUpdateDirHashes `fmap` (expand =<< readDarcsPristine ".") (os', root) <- storePackedDarcsPristine x os hatch_root_old <- blockLookup (hatchery os') root assertBool "bits in the old hatchery" $ isJust hatch_root_old os'' <- compact os' length (mature os'') @?= 1 hatch_root <- blockLookup (hatchery os'') root mature_root <- blockLookup (head $ mature os'') root assertBool "bits no longer in hatchery" $ isNothing hatch_root assertBool "bits now in the mature space" $ isJust mature_root mature_root_con <- readSegment (fromJust mature_root) Just mature_root_con @?= darcsFormatDir x y <- expand =<< readPackedDarcsPristine os'' root equals_testdata y utils = [ testProperty "xlate32" prop_xlate32 , testProperty "xlate64" prop_xlate64 , testProperty "align bounded" prop_align_bounded , testProperty "align aligned" prop_align_aligned , testProperty "reachable is a subset" prop_reach_subset , testProperty "roots are reachable" prop_reach_roots , testProperty "nonexistent roots are not reachable" prop_reach_nonroots , testCase "an example for reachable" check_reachable , testCase "fixFrom" check_fixFrom , testCase "mmap empty file" check_mmapEmpty ] where prop_xlate32 x = (xlate32 . xlate32) x == x where types = x :: Word32 prop_xlate64 x = (xlate64 . xlate64) x == x where types = x :: Word64 prop_align_bounded (bound, x) = bound > 0 && bound < 1024 && x >= 0 ==> align bound x >= x && align bound x < x + bound where types = (bound, x) :: (Int, Int) prop_align_aligned (bound, x) = bound > 0 && bound < 1024 && x >= 0 ==> align bound x `rem` bound == 0 where types = (bound, x) :: (Int, Int) check_fixFrom = let f 0 = 0 f n = f (n - 1) in fixFrom f 5 @?= 0 check_mmapEmpty = flip finally (removeFile "test_empty") $ do Prelude.writeFile "test_empty" "" x <- readSegment ("test_empty", Nothing) x @?= BL.empty reachable' ref look roots = runIdentity $ reachable ref look roots check_reachable = let refs 0 = [1, 2] refs 1 = [2] refs 2 = [0, 4] refs 3 = [4, 6, 7] refs 4 = [0, 1] set = S.fromList [1, 2] map = M.fromList [ (n, refs n) | n <- [0..10] ] reach = reachable' return (lookup map) set in do M.keysSet reach @?= S.fromList [0, 1, 2, 4] prop_reach_subset (set :: S.Set Int, map :: M.Map Int [Int]) = M.keysSet (reachable' return (lookup map) set) `S.isSubsetOf` M.keysSet map prop_reach_roots (set :: S.Set Int, map :: M.Map Int [Int]) = set `S.isSubsetOf` M.keysSet map ==> set `S.isSubsetOf` M.keysSet (reachable' return (lookup map) set) prop_reach_nonroots (set :: S.Set Int, map :: M.Map Int [Int]) = set `S.intersection` M.keysSet map == M.keysSet (reachable' (return . const []) (lookup map) set) lookup :: (Ord a) => M.Map a [a] -> a -> Identity (Maybe (a, [a])) lookup m k = return $ case M.lookupIndex k m of Nothing -> Nothing Just i -> Just $ M.elemAt i m hash = [ testProperty "decodeBase16 . encodeBase16 == id" prop_base16 , testProperty "decodeBase64u . encodeBase64u == id" prop_base64u ] where prop_base16 x = (decodeBase16 . encodeBase16) x == x prop_base64u x = (decodeBase64u . encodeBase64u) x == x monad = [ testCase "path expansion" check_virtual , testCase "rename" check_rename ] where check_virtual = virtualTreeMonad run testTree >> return () where run = do file <- readFile (floatPath "substub/substub/file") file2 <- readFile (floatPath "substub/substub/file2") lift $ BL.unpack file @?= "" lift $ BL.unpack file2 @?= "foo" check_rename = do (_, t) <- virtualTreeMonad run testTree t' <- darcsAddMissingHashes =<< expand t forM_ [ (p, i) | (p, i) <- list t' ] $ \(p,i) -> assertBool ("have hash: " ++ show p) $ itemHash i /= NoHash where run = do rename (floatPath "substub/substub/file") (floatPath "substub/file2") posix = [ testCase "getFileStatus" $ check_stat Posix.getFileStatus , testCase "getSymbolicLinkStatus" $ check_stat Posix.getSymbolicLinkStatus ] where check_stat fun = flip finally (removeFile "test_empty") $ do x <- Posix.fileSize `fmap` fun "foo_a" Prelude.writeFile "test_empty" "" y <- Posix.fileSize `fmap` fun "test_empty" exist_nonexistent <- Posix.fileExists `fmap` fun "test_does_not_exist" exist_existent <- Posix.fileExists `fmap` fun "test_empty" assertEqual "file size" x 2 assertEqual "file size" y 0 assertBool "existence check" $ not exist_nonexistent assertBool "existence check" exist_existent ---------------------------------- -- Arbitrary instances -- instance (Arbitrary a, Ord a) => Arbitrary (S.Set a) where arbitrary = S.fromList `fmap` arbitrary instance (Arbitrary k, Arbitrary v, Ord k) => Arbitrary (M.Map k v) where arbitrary = M.fromList `fmap` arbitrary instance Arbitrary BL.ByteString where arbitrary = BL.pack `fmap` arbitrary instance Arbitrary Hash where arbitrary = sized hash' where hash' 0 = return NoHash hash' _ = do tag <- oneof [return 0, return 1] case tag of 0 -> SHA256 . BS.pack <$> sequence [ arbitrary | _ <- [1..32] ] 1 -> SHA1 . BS.pack <$> sequence [ arbitrary | _ <- [1..20] ] instance (Monad m) => Arbitrary (TreeItem m) where arbitrary = sized tree' where tree' 0 = oneof [ return (File emptyBlob), return (SubTree emptyTree) ] tree' n = oneof [ file n, subtree n ] file 0 = return (File emptyBlob) file _ = do content <- arbitrary return (File $ Blob (return content) NoHash) subtree n = do branches <- choose (1, n) let sub name = do t <- tree' ((n - 1) `div` branches) return (makeName $ show name, t) sublist <- mapM sub [0..branches] oneof [ tree' 0 , return (SubTree $ makeTree sublist) , return $ (Stub $ return (makeTree sublist)) NoHash ] instance (Monad m) => Arbitrary (Tree m) where arbitrary = do item <- arbitrary case item of File _ -> arbitrary Stub _ _ -> arbitrary SubTree t -> return t data TreeWithPath = TreeWithPath (Tree Identity) AnchoredPath deriving (Show) instance Arbitrary TreeWithPath where arbitrary = do t <- arbitrary p <- oneof $ return (AnchoredPath []) : (map (return . fst) $ list (runIdentity $ expand t)) return $ TreeWithPath t p --------------------------- -- Other instances -- instance Show (Blob m) where show (Blob _ h) = "Blob " ++ show h instance Show (TreeItem m) where show (File f) = "File (" ++ show f ++ ")" show (Stub _ h) = "Stub _ " ++ show h show (SubTree s) = "SubTree (" ++ show s ++ ")" instance Show (Tree m) where show t = "Tree " ++ show (treeHash t) ++ " { " ++ (concat . intersperse ", " $ itemstrs) ++ " }" where itemstrs = map show $ listImmediate t instance Show (Int -> Int) where show f = "[" ++ intercalate ", " (map val [1..20]) ++ " ...]" where val x = show x ++ " -> " ++ show (f x) ----------------------- -- Test utilities -- shapeEq a b = Just EQ == cmpShape a b expandedShapeEq a b = (Just EQ ==) <$> cmpExpandedShape a b cmpcat (x:y:rest) | x == y = cmpcat (x:rest) | x == Just EQ = cmpcat (y:rest) | y == Just EQ = cmpcat (x:rest) | otherwise = Nothing cmpcat [x] = x cmpcat [] = Just EQ -- empty things are equal cmpTree a b = do a' <- expand a b' <- expand b con <- contentsEq a' b' return $ cmpcat [cmpShape a' b', con] where contentsEq a b = cmpcat <$> sequence (zipTrees cmp a b) cmp _ (Just (File a)) (Just (File b)) = do a' <- readBlob a b' <- readBlob b return $ Just (compare a' b') cmp _ _ _ = return (Just EQ) -- neutral cmpShape t r = cmpcat $ zipTrees cmp t r where cmp _ (Just a) (Just b) = a `item` b cmp _ Nothing (Just _) = Just LT cmp _ (Just _) Nothing = Just GT item (File _) (File _) = Just EQ item (SubTree s) (SubTree p) = s `cmpShape` p item _ _ = Nothing cmpExpandedShape :: (Monad m) => Tree m -> Tree m -> m (Maybe Ordering) cmpExpandedShape a b = do x <- expand a y <- expand b return $ x `cmpShape` y nondarcs (AnchoredPath (Name x:_)) _ | x == BS.pack "_darcs" = False | otherwise = True readDarcsPristine :: FilePath -> IO (Tree IO) readDarcsPristine dir = do let darcs = dir "_darcs" h_inventory = darcs "hashed_inventory" repo <- doesDirectoryExist darcs unless repo $ fail $ "Not a darcs repository: " ++ dir hashed <- doesFileExist h_inventory if hashed then do inv <- BS.readFile h_inventory let lines = BS.split '\n' inv case lines of [] -> return emptyTree (pris_line:_) -> do let hash = decodeDarcsHash $ BS.drop 9 pris_line size = decodeDarcsSize $ BS.drop 9 pris_line when (hash == NoHash) $ fail $ "Bad pristine root: " ++ show pris_line readDarcsHashed (darcs "pristine.hashed") (size, hash) else do have_pristine <- doesDirectoryExist $ darcs "pristine" have_current <- doesDirectoryExist $ darcs "current" case (have_pristine, have_current) of (True, _) -> readPlainTree $ darcs "pristine" (False, True) -> readPlainTree $ darcs "current" (_, _) -> fail "No pristine tree is available!" hashed-storage-0.5.10/Storage/Hashed/Packed.hs0000644000000000000000000002354712025701732017216 0ustar0000000000000000{-# LANGUAGE ParallelListComp #-} -- | This module implements an "object storage". This is a directory on disk -- containing a content-addressed storage. This is useful for storing all kinds -- of things, particularly filesystem trees, or darcs pristine caches and patch -- objects. However, this is an abstract, flat storage: no tree semantics are -- provided. You just need to provide a reference-collecting functionality, -- computing a list of references for any given object. The system provides -- transparent garbage collection and packing. module Storage.Hashed.Packed ( Format(..), Block, OS -- * Basic operations. , hatch, compact, repack, lookup -- * Creating and loading. , create, load -- * Low-level. , format, blockLookup, live, hatchery, mature, roots, references, rootdir ) where import Prelude hiding ( lookup, read ) import Storage.Hashed.AnchoredPath( ) import Storage.Hashed.Tree ( ) import Storage.Hashed.Utils import Storage.Hashed.Hash import Control.Monad( forM, forM_, unless ) import Control.Applicative( (<$>) ) import System.FilePath( (), (<.>) ) import System.Directory( createDirectoryIfMissing, removeFile , getDirectoryContents ) import Bundled.Posix( fileExists, isDirectory, getFileStatus ) import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.ByteString.Char8 as BS import Data.Maybe( listToMaybe, catMaybes, isNothing ) import Data.Binary( encode, decode ) import qualified Data.Set as S import qualified Data.Map as M import Data.List( sort ) import Data.Int( Int64 ) -- | On-disk format for object storage: we implement a completely loose format -- (one file per object), a compact format stored in a single append-only file -- and an immutable \"pack\" format. data Format = Loose | Compact | Pack deriving (Show, Eq) loose_dirs :: [[Char]] loose_dirs = let chars = ['0'..'9'] ++ ['a'..'f'] in [ [a,b] | a <- chars, b <- chars ] loosePath :: OS -> Hash -> FilePath loosePath _ NoHash = error "No path for NoHash!" loosePath os hash = let hash' = BS.unpack (encodeBase16 hash) in rootdir os "hatchery" take 2 hash' drop 2 hash' looseLookup :: OS -> Hash -> IO (Maybe FileSegment) looseLookup _ NoHash = return Nothing looseLookup os hash = do let path = loosePath os hash exist <- fileExists <$> getFileStatus path return $ if exist then Just (path, Nothing) else Nothing -- | Object storage block. When used as a hatchery, the loose or compact format -- are preferable, while for mature space, the pack format is more useful. data Block = Block { blockLookup :: Hash -> IO (Maybe FileSegment) , size :: Int64 , format :: Format } -- | Object storage. Contains a single \"hatchery\" and possibly a number of -- mature space blocks, usually in form of packs. It also keeps a list of root -- pointers and has a way to extract pointers from objects (externally -- supplied). These last two things are used to implement a simple GC. data OS = OS { hatchery :: Block , mature :: [Block] , roots :: [Hash] , references :: FileSegment -> IO [Hash] , rootdir :: FilePath } -- | Reduce number of packs in the object storage. This may both recombine -- packs to eliminate dead objects and join some packs to form bigger packs. repack :: OS -> IO OS repack _ = error "repack undefined" -- | Add new objects to the object storage (i.e. put them into hatchery). It is -- safe to call this even on objects that are already present in the storage: -- such objects will be skipped. hatch :: OS -> [BL.ByteString] -> IO OS hatch os blobs = do processed <- mapM sieve blobs write [ (h, b) | (True, h, b) <- processed ] where write bits = case format (hatchery os) of Loose -> do forM bits $ \(hash, blob) -> do BL.writeFile (loosePath os hash) blob return os Compact -> error "hatch/compact undefined" _ -> fail "Hatchery must be either Loose or Compact." sieve blob = do let hash = sha256 blob absent <- isNothing <$> lookup os hash return (absent, hash, blob) -- | Move things from hatchery into a (new) pack. compact :: OS -> IO OS compact os = do objects <- live os [hatchery os] block <- createPack os (M.toList objects) cleanup return $ os { mature = block:mature os } where cleanup = case format (hatchery os) of Loose -> forM_ loose_dirs $ nuke . ((rootdir os "hatchery") ) Compact -> removeFile (rootdir os "hatchery") >> return () _ -> fail "Hatchery must be either Loose or Compact." nuke dir = mapM (removeFile . (dir )) =<< (Prelude.filter (`notElem` [".", ".."]) `fmap` getDirectoryContents dir) blocksLookup :: [Block] -> Hash -> IO (Maybe (Hash, FileSegment)) blocksLookup blocks hash = do segment <- cat `fmap` mapM (flip blockLookup hash) blocks return $ case segment of Nothing -> Nothing Just seg -> Just (hash, seg) where cat = listToMaybe . catMaybes lookup :: OS -> Hash -> IO (Maybe FileSegment) lookup os hash = do res <- blocksLookup (hatchery os : mature os) hash return $ case res of Nothing -> Nothing Just (_, seg) -> Just seg -- | Create an empty object storage in given directory, with a hatchery of -- given format. The directory is created if needed, but is assumed to be -- empty. create :: FilePath -> Format -> IO OS create path fmt = do createDirectoryIfMissing True path initHatchery load path where initHatchery | fmt == Loose = do mkdir hatchpath forM loose_dirs $ mkdir . (hatchpath ) | fmt == Compact = error "create/mkHatchery Compact undefined" mkdir = createDirectoryIfMissing False hatchpath = path "hatchery" load :: FilePath -> IO OS load path = do hatch_stat <- getFileStatus $ path "hatchery" let is_os = fileExists hatch_stat is_dir = isDirectory hatch_stat unless is_os $ fail $ path ++ " is not an object storage!" let _hatchery = Block { blockLookup = look os , format = if is_dir then Loose else Compact , size = undefined } os = OS { hatchery = _hatchery , rootdir = path , mature = packs , roots = _roots , references = undefined } look | format _hatchery == Loose = looseLookup | otherwise = undefined packs = [] -- FIXME read packs _roots = [] -- FIXME read root pointers return os readPack :: FilePath -> IO Block readPack file = do bits <- readSegment (file, Nothing) let count = decode (BL.take 8 $ bits) _lookup NoHash _ _ = return Nothing _lookup hash@(SHA256 rawhash) first final = do let middle = first + ((final - first) `div` 2) res <- case ( compare rawhash (hashof first) , compare rawhash (hashof middle) , compare rawhash (hashof final) ) of (LT, _, _) -> return Nothing ( _, _, GT) -> return Nothing (EQ, _, _) -> return $ Just (segof first) ( _, _, EQ) -> return $ Just (segof final) (GT, EQ, LT) -> return $ Just (segof middle) (GT, GT, LT) | middle /= final -> _lookup hash middle final (GT, LT, LT) | first /= middle -> _lookup hash first middle ( _, _, _) -> return Nothing return res headerof i = BL.take 51 $ BL.drop (8 + i * 51) bits hashof i = BS.concat $ BL.toChunks $ BL.take 32 $ headerof i segof i = (file, Just (count * 51 + 8 + from, sz)) where from = decode (BL.take 8 $ BL.drop 33 $ headerof i) sz = decode (BL.take 8 $ BL.drop 42 $ headerof i) return $ Block { size = BL.length bits , format = Pack , blockLookup = \h -> _lookup h 0 (count - 1) } createPack :: OS -> [(Hash, FileSegment)] -> IO Block createPack os bits = do contents <- mapM readSegment (map snd bits) let offsets = scanl (+) 0 $ map BL.length contents headerbits = [ BL.concat [ BL.fromChunks [rawhash] , BL.pack "@" , encode offset , BL.pack "!" , encode $ BL.length string , BL.pack "\n" ] | (SHA256 rawhash, _) <- bits | string <- contents | offset <- offsets ] header = BL.concat $ (encode $ length bits) : sort headerbits blob = BL.concat $ header:contents hash = sha256 blob path = rootdir os BS.unpack (encodeBase16 hash) <.> "bin" BL.writeFile path blob readPack path -- | Build a map of live objects (i.e. those reachable from the given roots) in -- a given list of Blocks. live :: OS -> [Block] -> IO (M.Map Hash FileSegment) live os blocks = reachable (references os) (blocksLookup blocks) (S.fromList $ roots os) hashed-storage-0.5.10/Storage/Hashed/Plain.hs0000644000000000000000000000742012025701732017062 0ustar0000000000000000-- | The plain format implementation resides in this module. The plain format -- does not use any hashing and basically just wraps a normal filesystem tree -- in the hashed-storage API. -- -- NB. The 'read' function on Blobs coming from a plain tree is susceptible to -- file content changes. Since we use mmap in 'read', this will break -- referential transparency and produce unexpected results. Please always make -- sure that all parallel access to the underlying filesystem tree never -- mutates files. Unlink + recreate is fine though (in other words, the -- 'writePlainTree' and 'plainTreeIO' implemented in this module are safe in -- this respect). module Storage.Hashed.Plain( readPlainTree, writePlainTree, plainTreeIO -- (obsolete? if so remove implementation!) ) where import Data.Maybe( catMaybes ) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS8 import qualified Data.ByteString.Lazy as BL import System.FilePath( () ) import System.Directory( getDirectoryContents , createDirectoryIfMissing ) import Bundled.Posix( getFileStatus, isDirectory, isRegularFile, FileStatus ) import Control.Monad( forM_ ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Utils import Storage.Hashed.Hash( Hash( NoHash) ) import Storage.Hashed.Tree( Tree(), TreeItem(..) , Blob(..), makeTree , list, readBlob, find, modifyTree, expand ) import Storage.Hashed.Monad( TreeIO, runTreeMonad, initialState, tree, replaceItem ) import qualified Data.Set as S import Control.Monad.State( liftIO, gets, modify ) readPlainDir :: FilePath -> IO [(FilePath, FileStatus)] readPlainDir dir = withCurrentDirectory dir $ do items <- getDirectoryContents "." sequence [ do st <- getFileStatus s return (s, st) | s <- items, s `notElem` [ ".", ".." ] ] readPlainTree :: FilePath -> IO (Tree IO) readPlainTree dir = do items <- readPlainDir dir let subs = catMaybes [ let name = Name (BS8.pack name') in case status of _ | isDirectory status -> Just (name, Stub (readPlainTree (dir name')) NoHash) _ | isRegularFile status -> Just (name, File $ Blob (readBlob' name) NoHash) _ -> Nothing | (name', status) <- items ] return $ makeTree subs where readBlob' (Name name) = readSegment (dir BS8.unpack name, Nothing) -- | Write out /full/ tree to a plain directory structure. If you instead want -- to make incremental updates, refer to "Storage.Hashed.Monad". writePlainTree :: Tree IO -> FilePath -> IO () writePlainTree t dir = do createDirectoryIfMissing True dir expand t >>= mapM_ write . list where write (p, File b) = write' p b write (p, SubTree _) = createDirectoryIfMissing True (anchorPath dir p) write _ = return () write' p b = readBlob b >>= BL.writeFile (anchorPath dir p) -- | Run a 'TreeIO' action in a plain tree setting. Writes out changes to the -- plain tree every now and then (after the action is finished, the last tree -- state is always flushed to disk). XXX Modify the tree with filesystem -- reading and put it back into st (ie. replace the in-memory Blobs with normal -- ones, so the memory can be GCd). plainTreeIO :: TreeIO a -> Tree IO -> FilePath -> IO (a, Tree IO) plainTreeIO action t dir = runTreeMonad action $ initialState t (\_ -> return NoHash) updatePlain where updatePlain path (File b) = do liftIO $ createDirectoryIfMissing True (anchorPath "" $ parent path) liftIO $ readBlob b >>= BL.writeFile (anchorPath "" path) return $ File $ Blob (BL.readFile $ anchorPath "" path) NoHash updatePlain _ x = return x hashed-storage-0.5.10/Storage/Hashed/Index.hs0000644000000000000000000004404312025701732017070 0ustar0000000000000000{-# LANGUAGE CPP, ScopedTypeVariables, MultiParamTypeClasses #-} -- | This module contains plain tree indexing code. The index itself is a -- CACHE: you should only ever use it as an optimisation and never as a primary -- storage. In practice, this means that when we change index format, the -- application is expected to throw the old index away and build a fresh -- index. Please note that tracking index validity is out of scope for this -- library: this is responsibility of your application. It is advisable that in -- your validity tracking code, you also check for format validity (see -- 'indexFormatValid') and scrap and re-create index when needed. -- -- The index is a binary file that overlays a hashed tree over the working -- copy. This means that every working file and directory has an entry in the -- index, that contains its path and hash and validity data. The validity data -- is a timestamp plus the file size. The file hashes are sha256's of the -- file's content. -- -- There are two entry types, a file entry and a directory entry. Both have a -- common binary format (see 'Item'). The on-disk format is best described by -- the section /Index format/ below. -- -- For each file, the index has a copy of the file's last modification -- timestamp taken at the instant when the hash has been computed. This means -- that when file size and timestamp of a file in working copy matches those in -- the index, we assume that the hash stored in the index for given file is -- valid. These hashes are then exposed in the resulting 'Tree' object, and can -- be leveraged by eg. 'diffTrees' to compare many files quickly. -- -- You may have noticed that we also keep hashes of directories. These are -- assumed to be valid whenever the complete subtree has been valid. At any -- point, as soon as a size or timestamp mismatch is found, the working file in -- question is opened, its hash (and timestamp and size) is recomputed and -- updated in-place in the index file (everything lives at a fixed offset and -- is fixed size, so this isn't an issue). This is also true of directories: -- when a file in a directory changes hash, this triggers recomputation of all -- of its parent directory hashes; moreover this is done efficiently -- each -- directory is updated at most once during an update run. -- -- /Index format/ -- -- The Index is organised into \"lines\" where each line describes a single -- indexed item. Cf. 'Item'. -- -- The first word on the index \"line\" is the length of the file path (which is -- the only variable-length part of the line). Then comes the path itself, then -- fixed-length hash (sha256) of the file in question, then two words, one for -- size and one "aux", which is used differently for directories and for files. -- -- With directories, this aux holds the offset of the next sibling line in the -- index, so we can efficiently skip reading the whole subtree starting at a -- given directory (by just seeking aux bytes forward). The lines are -- pre-ordered with respect to directory structure -- the directory comes first -- and after it come all its items. Cf. 'readIndex''. -- -- For files, the aux field holds a timestamp. module Storage.Hashed.Index( readIndex, updateIndexFrom, indexFormatValid , updateIndex , Index, filter ) where import Prelude hiding ( lookup, readFile, writeFile, catch, filter ) import Storage.Hashed.Utils import Storage.Hashed.Tree import Storage.Hashed.AnchoredPath import Data.Int( Int64, Int32 ) import Bundled.Posix( getFileStatusBS, modificationTime, getFileStatus, fileSize, fileExists ) import System.IO.MMap( mmapFileForeignPtr, mmapFileByteString, Mode(..) ) import System.IO( ) import System.Directory( doesFileExist, getCurrentDirectory ) #if mingw32_HOST_OS import System.Directory( renameFile ) import System.FilePath( (<.>) ) #else import System.Directory( removeFile ) #endif import System.FilePath( () ) import Control.Monad( when ) import Control.Exception.Extensible import Control.Applicative( (<$>) ) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import Data.ByteString.Unsafe( unsafeHead, unsafeDrop ) import Data.ByteString.Internal( toForeignPtr, fromForeignPtr, memcpy , nullForeignPtr, c2w ) import Data.IORef( ) import Data.Maybe( fromJust, isJust ) import Data.Bits( Bits ) import Foreign.Storable import Foreign.ForeignPtr import Foreign.Ptr import Storage.Hashed.Hash( sha256, rawHash ) -------------------------- -- Indexed trees -- -- | Description of a a single indexed item. The structure itself does not -- contain any data, just pointers to the underlying mmap (bytestring is a -- pointer + offset + length). -- -- The structure is recursive-ish (as opposed to flat-ish structure, which is -- used by git...) It turns out that it's hard to efficiently read a flat index -- with our internal data structures -- we need to turn the flat index into a -- recursive Tree object, which is rather expensive... As a bonus, we can also -- efficiently implement subtree queries this way (cf. 'readIndex'). data Item = Item { iBase :: !(Ptr ()) , iHashAndDescriptor :: !BS.ByteString } deriving Show size_magic :: Int size_magic = 4 -- the magic word, first 4 bytes of the index size_dsclen, size_hash, size_size, size_aux :: Int size_size = 8 -- file/directory size (Int64) size_aux = 8 -- aux (Int64) size_dsclen = 4 -- this many bytes store the length of the path size_hash = 32 -- hash representation off_size, off_aux, off_hash, off_dsc, off_dsclen :: Int off_size = 0 off_aux = off_size + size_size off_dsclen = off_aux + size_aux off_hash = off_dsclen + size_dsclen off_dsc = off_hash + size_hash itemAllocSize :: AnchoredPath -> Int itemAllocSize apath = align 4 $ size_hash + size_size + size_aux + size_dsclen + 2 + BS.length (flatten apath) itemSize, itemNext :: Item -> Int itemSize i = size_size + size_aux + size_dsclen + (BS.length $ iHashAndDescriptor i) itemNext i = align 4 (itemSize i + 1) iPath, iHash, iDescriptor :: Item -> BS.ByteString iDescriptor = unsafeDrop size_hash . iHashAndDescriptor iPath = unsafeDrop 1 . iDescriptor iHash = BS.take size_hash . iHashAndDescriptor iSize, iAux :: Item -> Ptr Int64 iSize i = plusPtr (iBase i) off_size iAux i = plusPtr (iBase i) off_aux itemIsDir :: Item -> Bool itemIsDir i = unsafeHead (iDescriptor i) == c2w 'D' -- xlatePeek32 = fmap xlate32 . peek xlatePeek64 :: (Storable a, Bits a) => Ptr a -> IO a xlatePeek64 = fmap xlate64 . peek -- xlatePoke32 ptr v = poke ptr (xlate32 v) xlatePoke64 :: (Storable a, Bits a) => Ptr a -> a -> IO () xlatePoke64 ptr v = poke ptr (xlate64 v) -- | Lay out the basic index item structure in memory. The memory location is -- given by a ForeignPointer () and an offset. The path and type given are -- written out, and a corresponding Item is given back. The remaining bits of -- the item can be filled out using 'update'. createItem :: ItemType -> AnchoredPath -> ForeignPtr () -> Int -> IO Item createItem typ apath fp off = do let dsc = BS.concat [ BSC.singleton $ if typ == TreeType then 'D' else 'F' , flatten apath , BS.singleton 0 ] (dsc_fp, dsc_start, dsc_len) = toForeignPtr dsc withForeignPtr fp $ \p -> withForeignPtr dsc_fp $ \dsc_p -> do pokeByteOff p (off + off_dsclen) (xlate32 $ fromIntegral dsc_len :: Int32) memcpy (plusPtr p $ off + off_dsc) (plusPtr dsc_p dsc_start) (fromIntegral dsc_len) peekItem fp off -- | Read the on-disk representation into internal data structure. -- -- See the module-level section /Index format/ for details on how the index -- is structured. peekItem :: ForeignPtr () -> Int -> IO Item peekItem fp off = withForeignPtr fp $ \p -> do nl' :: Int32 <- xlate32 `fmap` peekByteOff p (off + off_dsclen) when (nl' <= 2) $ fail "Descriptor too short in peekItem!" let nl = fromIntegral nl' dsc = fromForeignPtr (castForeignPtr fp) (off + off_hash) (size_hash + nl - 1) return $! Item { iBase = plusPtr p off , iHashAndDescriptor = dsc } -- | Update an existing item with new hash and optionally mtime (give Nothing -- when updating directory entries). updateItem :: Item -> Int64 -> Hash -> IO () updateItem item _ NoHash = fail $ "Index.update NoHash: " ++ BSC.unpack (iPath item) updateItem item size hash = do xlatePoke64 (iSize item) size unsafePokeBS (iHash item) (rawHash hash) updateAux :: Item -> Int64 -> IO () updateAux item aux = xlatePoke64 (iAux item) $ aux updateTime :: forall a.(Enum a) => Item -> a -> IO () updateTime item mtime = updateAux item (fromIntegral $ fromEnum mtime) iHash' :: Item -> Hash iHash' i = SHA256 (iHash i) -- | Gives a ForeignPtr to mmapped index, which can be used for reading and -- updates. The req_size parameter, if non-0, expresses the requested size of -- the index file. mmapIndex will grow the index if it is smaller than this. mmapIndex :: forall a. FilePath -> Int -> IO (ForeignPtr a, Int) mmapIndex indexpath req_size = do exist <- doesFileExist indexpath act_size <- fromIntegral `fmap` if exist then fileSize `fmap` getFileStatus indexpath else return 0 let size = case req_size > 0 of True -> req_size False | act_size >= size_magic -> act_size - size_magic | otherwise -> 0 case size of 0 -> return (castForeignPtr nullForeignPtr, size) _ -> do (x, _, _) <- mmapFileForeignPtr indexpath ReadWriteEx (Just (0, size + size_magic)) return (x, size) data IndexM m = Index { mmap :: (ForeignPtr ()) , basedir :: FilePath , hashtree :: Tree m -> Hash , predicate :: AnchoredPath -> TreeItem m -> Bool } | EmptyIndex type Index = IndexM IO data State = State { dirlength :: !Int , path :: !AnchoredPath , start :: !Int } data Result = Result { changed :: !Bool , next :: !Int , treeitem :: !(Maybe (TreeItem IO)) , resitem :: !Item } readItem :: Index -> State -> IO Result readItem index state = do item <- peekItem (mmap index) (start state) res' <- if itemIsDir item then readDir index state item else readFile index state item return res' readDir :: Index -> State -> Item -> IO Result readDir index state item = do following <- fromIntegral <$> xlatePeek64 (iAux item) exists <- fileExists <$> getFileStatusBS (iPath item) let name it dirlen = Name $ (BS.drop (dirlen + 1) $ iDescriptor it) -- FIXME MAGIC namelength = (BS.length $ iDescriptor item) - (dirlength state) myname = name item (dirlength state) substate = state { start = start state + itemNext item , path = path state `appendPath` myname , dirlength = if myname == Name (BSC.singleton '.') then dirlength state else dirlength state + namelength } want = exists && (predicate index) (path substate) (Stub undefined NoHash) oldhash = iHash' item subs off | off < following = do result <- readItem index $ substate { start = off } rest <- subs $ next result return $! (name (resitem result) $ dirlength substate, result) : rest subs coff | coff == following = return [] | otherwise = fail $ "Offset mismatch at " ++ show coff ++ " (ends at " ++ show following ++ ")" inferiors <- if want then subs $ start substate else return [] let we_changed = or [ changed x | (_, x) <- inferiors ] || nullleaf nullleaf = null inferiors && oldhash == nullsha nullsha = SHA256 (BS.replicate 32 0) tree' = makeTree [ (n, fromJust $ treeitem s) | (n, s) <- inferiors, isJust $ treeitem s ] treehash = if we_changed then hashtree index tree' else oldhash tree = tree' { treeHash = treehash } when we_changed $ updateItem item 0 treehash return $ Result { changed = not exists || we_changed , next = following , treeitem = if want then Just $ SubTree tree else Nothing , resitem = item } readFile :: Index -> State -> Item -> IO Result readFile index state item = do st <- getFileStatusBS (iPath item) mtime <- fromIntegral <$> (xlatePeek64 $ iAux item) size <- xlatePeek64 $ iSize item let mtime' = modificationTime st size' = fromIntegral $ fileSize st readblob = readSegment (basedir index BSC.unpack (iPath item), Nothing) exists = fileExists st we_changed = mtime /= mtime' || size /= size' hash = iHash' item when we_changed $ do hash' <- sha256 `fmap` readblob updateItem item size' hash' updateTime item mtime' return $ Result { changed = not exists || we_changed , next = start state + itemNext item , treeitem = if exists then Just $ File $ Blob readblob hash else Nothing , resitem = item } updateIndex :: Index -> IO (Tree IO) updateIndex EmptyIndex = return emptyTree updateIndex index = do let initial = State { start = size_magic , dirlength = 0 , path = AnchoredPath [] } res <- readItem index initial case treeitem res of Just (SubTree tree) -> return $ filter (predicate index) tree _ -> fail "Unexpected failure in updateIndex!" -- | Read an index and build up a 'Tree' object from it, referring to current -- working directory. The initial Index object returned by readIndex is not -- directly useful. However, you can use 'Tree.filter' on it. Either way, to -- obtain the actual Tree object, call update. -- -- The usual use pattern is this: -- -- > do (idx, update) <- readIndex -- > tree <- update =<< filter predicate idx -- -- The resulting tree will be fully expanded. readIndex :: FilePath -> (Tree IO -> Hash) -> IO Index readIndex indexpath ht = do (mmap_ptr, mmap_size) <- mmapIndex indexpath 0 base <- getCurrentDirectory return $ if mmap_size == 0 then EmptyIndex else Index { mmap = mmap_ptr , basedir = base , hashtree = ht , predicate = \_ _ -> True } formatIndex :: ForeignPtr () -> Tree IO -> Tree IO -> IO () formatIndex mmap_ptr old reference = do create (SubTree reference) (AnchoredPath []) size_magic unsafePokeBS magic (BSC.pack "HSI4") where magic = fromForeignPtr (castForeignPtr mmap_ptr) 0 4 create (File _) path' off = do i <- createItem BlobType path' mmap_ptr off let flatpath = BSC.unpack $ flatten path' case find old path' of Nothing -> return () -- TODO calling getFileStatus here is both slightly -- inefficient and slightly race-prone Just ti -> do st <- getFileStatus flatpath let hash = itemHash ti mtime = modificationTime st size = fileSize st updateItem i (fromIntegral size) hash updateTime i mtime return $ off + itemNext i create (SubTree s) path' off = do i <- createItem TreeType path' mmap_ptr off case find old path' of Nothing -> return () Just ti | itemHash ti == NoHash -> return () | otherwise -> updateItem i 0 $ itemHash ti let subs [] = return $ off + itemNext i subs ((name,x):xs) = do let path'' = path' `appendPath` name noff <- subs xs create x path'' noff lastOff <- subs (listImmediate s) xlatePoke64 (iAux i) (fromIntegral lastOff) return lastOff create (Stub _ _) path' _ = fail $ "Cannot create index from stubbed Tree at " ++ show path' -- | Will add and remove files in index to make it match the 'Tree' object -- given (it is an error for the 'Tree' to contain a file or directory that -- does not exist in a plain form in current working directory). updateIndexFrom :: FilePath -> (Tree IO -> Hash) -> Tree IO -> IO Index updateIndexFrom indexpath hashtree' ref = do old_idx <- updateIndex =<< readIndex indexpath hashtree' reference <- expand ref let len_root = itemAllocSize anchoredRoot len = len_root + sum [ itemAllocSize p | (p, _) <- list reference ] exist <- doesFileExist indexpath #if mingw32_HOST_OS when exist $ renameFile indexpath (indexpath <.> "old") #else when exist $ removeFile indexpath -- to avoid clobbering oldidx #endif (mmap_ptr, _) <- mmapIndex indexpath len formatIndex mmap_ptr old_idx reference readIndex indexpath hashtree' -- | Check that a given file is an index file with a format we can handle. You -- should remove and re-create the index whenever this is not true. indexFormatValid :: FilePath -> IO Bool indexFormatValid path' = do magic <- mmapFileByteString path' (Just (0, size_magic)) return $ case BSC.unpack magic of "HSI4" -> True _ -> False `catch` \(_::SomeException) -> return False instance FilterTree IndexM IO where filter _ EmptyIndex = EmptyIndex filter p index = index { predicate = \a b -> predicate index a b && p a b } hashed-storage-0.5.10/Storage/Hashed/Utils.hs0000644000000000000000000001304412025701732017116 0ustar0000000000000000{-# LANGUAGE CPP, ScopedTypeVariables #-} -- | Mostly internal utilities for use by the rest of the library. Subject to -- removal without further notice. module Storage.Hashed.Utils where import Prelude hiding ( lookup, catch ) import System.Mem( performGC ) import System.IO.MMap( mmapFileByteString ) import Bundled.Posix( getFileStatus, fileSize ) import System.Directory( getCurrentDirectory, setCurrentDirectory ) import System.FilePath( (), isAbsolute ) import Data.Int( Int64 ) import Data.Maybe( catMaybes ) import Control.Exception.Extensible( catch, bracket, SomeException(..) ) import Control.Monad( when ) import Control.Monad.Identity( runIdentity ) import Control.Applicative( (<$>) ) import Foreign.ForeignPtr( withForeignPtr ) import Foreign.Ptr( plusPtr ) import Data.ByteString.Internal( toForeignPtr, memcpy ) import System.IO (withFile, IOMode(ReadMode), hSeek, SeekMode(AbsoluteSeek)) import Data.Bits( Bits ) #ifdef BIGENDIAN import Data.Bits( (.&.), (.|.), shift, shiftL, rotateR ) #endif import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.ByteString.Char8 as BS8 import qualified Data.ByteString as BS import qualified Data.Set as S import qualified Data.Map as M -- | Pointer to a filesystem, possibly with start/end offsets. Supposed to be -- fed to (uncurry mmapFileByteString) or similar. type FileSegment = (FilePath, Maybe (Int64, Int)) -- | Read in a FileSegment into a Lazy ByteString. Implemented using mmap. readSegment :: FileSegment -> IO BL.ByteString readSegment (f,range) = do bs <- tryToRead `catch` (\(_::SomeException) -> do size <- fileSize `fmap` getFileStatus f if size == 0 then return BS8.empty else performGC >> tryToRead) return $ BL.fromChunks [bs] where tryToRead = do case range of Nothing -> BS.readFile f Just (off, size) -> withFile f ReadMode $ \h -> do hSeek h AbsoluteSeek $ fromIntegral off BS.hGet h size {-# INLINE readSegment #-} -- | Run an IO action with @path@ as a working directory. Does neccessary -- bracketing. withCurrentDirectory :: FilePath -> IO a -> IO a withCurrentDirectory name = bracket (do cwd <- getCurrentDirectory when (name /= "") (setCurrentDirectory name) return cwd) (\oldwd -> setCurrentDirectory oldwd `catch` \(_::SomeException) -> return ()) . const makeAbsolute :: FilePath -> IO FilePath makeAbsolute p = do cwd <- getCurrentDirectory return $! if isAbsolute p then p else cwd p -- Wow, unsafe. unsafePokeBS :: BS8.ByteString -> BS8.ByteString -> IO () unsafePokeBS to from = do let (fp_to, off_to, len_to) = toForeignPtr to (fp_from, off_from, len_from) = toForeignPtr from when (len_to /= len_from) $ fail $ "Length mismatch in unsafePokeBS: from = " ++ show len_from ++ " /= to = " ++ show len_to withForeignPtr fp_from $ \p_from -> withForeignPtr fp_to $ \p_to -> memcpy (plusPtr p_to off_to) (plusPtr p_from off_from) (fromIntegral len_to) align :: Integral a => a -> a -> a align boundary i = case i `rem` boundary of 0 -> i x -> i + boundary - x {-# INLINE align #-} xlate32 :: (Bits a) => a -> a xlate64 :: (Bits a) => a -> a #ifdef LITTLEENDIAN xlate32 = id xlate64 = id #endif #ifdef BIGENDIAN bytemask :: (Bits a) => a bytemask = 255 xlate32 a = ((a .&. (bytemask `shift` 0)) `shiftL` 24) .|. ((a .&. (bytemask `shift` 8)) `shiftL` 8) .|. ((a .&. (bytemask `shift` 16)) `rotateR` 8) .|. ((a .&. (bytemask `shift` 24)) `rotateR` 24) xlate64 a = ((a .&. (bytemask `shift` 0)) `shiftL` 56) .|. ((a .&. (bytemask `shift` 8)) `shiftL` 40) .|. ((a .&. (bytemask `shift` 16)) `shiftL` 24) .|. ((a .&. (bytemask `shift` 24)) `shiftL` 8) .|. ((a .&. (bytemask `shift` 32)) `rotateR` 8) .|. ((a .&. (bytemask `shift` 40)) `rotateR` 24) .|. ((a .&. (bytemask `shift` 48)) `rotateR` 40) .|. ((a .&. (bytemask `shift` 56)) `rotateR` 56) #endif -- | Find a monadic fixed point of @f@ that is the least above @i@. (Will -- happily diverge if there is none.) mfixFrom :: (Eq a, Functor m, Monad m) => (a -> m a) -> a -> m a mfixFrom f i = do x <- f i if x == i then return i else mfixFrom f x -- | Find a fixed point of @f@ that is the least above @i@. (Will happily -- diverge if there is none.) fixFrom :: (Eq a) => (a -> a) -> a -> a fixFrom f i = runIdentity $ mfixFrom (return . f) i -- | For a @refs@ function, a @map@ (@key@ -> @value@) and a @rootSet@, find a -- submap of @map@ such that all items in @map@ are reachable, through @refs@ -- from @rootSet@. reachable :: forall monad key value. (Functor monad, Monad monad, Ord key, Eq value) => (value -> monad [key]) -> (key -> monad (Maybe (key, value))) -> S.Set key -> monad (M.Map key value) reachable refs lookup rootSet = do lookupSet rootSet >>= mfixFrom expand where lookupSet :: S.Set key -> monad (M.Map key value) expand :: M.Map key value -> monad (M.Map key value) lookupSet s = do list <- mapM lookup (S.toAscList s) return $ M.fromAscList (catMaybes list) expand from = do refd <- concat <$> mapM refs (M.elems from) M.union from <$> lookupSet (S.fromList refd) hashed-storage-0.5.10/Storage/Hashed/AnchoredPath.hs0000644000000000000000000001004012025701732020347 0ustar0000000000000000-- | This module implements relative paths within a Tree. All paths are -- anchored at a certain root (this is usually the Tree root). They are -- represented by a list of Names (these are just strict bytestrings). module Storage.Hashed.AnchoredPath ( Name(..), AnchoredPath(..), anchoredRoot, appendPath, anchorPath , isPrefix, parent, parents, catPaths, flatten, makeName -- * Unsafe functions. , floatBS, floatPath ) where import qualified Data.ByteString.Char8 as BS import Data.List( isPrefixOf, inits ) import System.FilePath( (), splitDirectories, normalise, dropTrailingPathSeparator ) ------------------------------- -- AnchoredPath utilities -- newtype Name = Name BS.ByteString deriving (Eq, Show, Ord) -- | This is a type of "sane" file paths. These are always canonic in the sense -- that there are no stray slashes, no ".." components and similar. They are -- usually used to refer to a location within a Tree, but a relative filesystem -- path works just as well. These are either constructed from individual name -- components (using "appendPath", "catPaths" and "makeName"), or converted -- from a FilePath ("floatPath" -- but take care when doing that) or . newtype AnchoredPath = AnchoredPath [Name] deriving (Eq, Show, Ord) -- | Check whether a path is a prefix of another path. isPrefix :: AnchoredPath -> AnchoredPath -> Bool (AnchoredPath a) `isPrefix` (AnchoredPath b) = a `isPrefixOf` b -- | Append an element to the end of a path. appendPath :: AnchoredPath -> Name -> AnchoredPath appendPath (AnchoredPath p) n = case n of (Name s) | s == BS.empty -> AnchoredPath p | s == BS.pack "." -> AnchoredPath p | otherwise -> AnchoredPath $ p ++ [n] -- | Catenate two paths together. Not very safe, but sometimes useful -- (e.g. when you are representing paths relative to a different point than a -- Tree root). catPaths :: AnchoredPath -> AnchoredPath -> AnchoredPath catPaths (AnchoredPath p) (AnchoredPath n) = AnchoredPath $ p ++ n -- | Get parent (path) of a given path. foo/bar/baz -> foo/bar parent :: AnchoredPath -> AnchoredPath parent (AnchoredPath x) = AnchoredPath (init x) -- | List all parents of a given path. foo/bar/baz -> [foo, foo/bar] parents :: AnchoredPath -> [AnchoredPath] parents (AnchoredPath x) = map AnchoredPath . init . inits $ x -- | Take a "root" directory and an anchored path and produce a full -- 'FilePath'. Moreover, you can use @anchorPath \"\"@ to get a relative -- 'FilePath'. anchorPath :: FilePath -> AnchoredPath -> FilePath anchorPath dir p = dir BS.unpack (flatten p) {-# INLINE anchorPath #-} -- | Unsafe. Only ever use on bytestrings that came from flatten on a -- pre-existing AnchoredPath. floatBS :: BS.ByteString -> AnchoredPath floatBS = AnchoredPath . map Name . takeWhile (not . BS.null) . BS.split '/' flatten :: AnchoredPath -> BS.ByteString flatten (AnchoredPath []) = BS.singleton '.' flatten (AnchoredPath p) = BS.intercalate (BS.singleton '/') [ n | (Name n) <- p ] makeName :: String -> Name makeName ".." = error ".. is not a valid AnchoredPath component name" makeName n | '/' `elem` n = error "/ may not occur in a valid AnchoredPath component name" | otherwise = Name $ BS.pack n -- | Take a relative FilePath and turn it into an AnchoredPath. The operation -- is (relatively) unsafe. Basically, by using floatPath, you are testifying -- that the argument is a path relative to some common root -- i.e. the root of -- the associated "Tree" object. Also, there are certain invariants about -- AnchoredPath that this function tries hard to preserve, but probably cannot -- guarantee (i.e. this is a best-effort thing). You should sanitize any -- FilePaths before you declare them "good" by converting into AnchoredPath -- (using this function). floatPath :: FilePath -> AnchoredPath floatPath = make . splitDirectories . normalise . dropTrailingPathSeparator where make ["."] = AnchoredPath [] make x = AnchoredPath $ map (Name . BS.pack) x anchoredRoot :: AnchoredPath anchoredRoot = AnchoredPath [] hashed-storage-0.5.10/Storage/Hashed/Hash.hs0000644000000000000000000000575612025701732016714 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} module Storage.Hashed.Hash( Hash(..), encodeBase64u, decodeBase64u , encodeBase16, decodeBase16, sha256, rawHash , match ) where import qualified Bundled.SHA256 as SHA import qualified Data.ByteString as BS import qualified Data.ByteString.Internal as BSI import qualified Data.ByteString.Lazy as BL import qualified Codec.Binary.Base64Url as B64U import qualified Codec.Binary.Base16 as B16 import Data.Maybe( isJust, fromJust ) import Data.Char( toLower, toUpper ) import Data.Data( Data ) import Data.Typeable( Typeable ) data Hash = SHA256 !BS.ByteString | SHA1 !BS.ByteString | NoHash deriving (Show, Eq, Ord, Read, Typeable, Data) base16 :: BS.ByteString -> BS.ByteString debase16 :: BS.ByteString -> Maybe BS.ByteString base64u :: BS.ByteString -> BS.ByteString debase64u :: BS.ByteString -> Maybe BS.ByteString base16 = BS.pack . map (BSI.c2w . toLower) . B16.encode . BS.unpack base64u = BS.pack . map BSI.c2w . B64U.encode . BS.unpack debase64u bs = case B64U.decode $ map BSI.w2c $ BS.unpack bs of Just s -> Just $ BS.pack s Nothing -> Nothing debase16 bs = case B16.decode $ map (toUpper . BSI.w2c) $ BS.unpack bs of Just s -> Just $ BS.pack s Nothing -> Nothing encodeBase64u :: Hash -> BS.ByteString encodeBase64u (SHA256 bs) = base64u bs encodeBase64u (SHA1 bs) = base64u bs encodeBase64u NoHash = BS.empty -- | Produce a base16 (ascii-hex) encoded string from a hash. This can be -- turned back into a Hash (see "decodeBase16". This is a loss-less process. encodeBase16 :: Hash -> BS.ByteString encodeBase16 (SHA256 bs) = base16 bs encodeBase16 (SHA1 bs) = base16 bs encodeBase16 NoHash = BS.empty -- | Take a base64/url-encoded string and decode it as a "Hash". If the string -- is malformed, yields NoHash. decodeBase64u :: BS.ByteString -> Hash decodeBase64u bs | BS.length bs == 44 && isJust (debase64u bs) = SHA256 (fromJust $ debase64u bs) | BS.length bs == 28 && isJust (debase64u bs) = SHA1 (fromJust $ debase64u bs) | otherwise = NoHash -- | Take a base16-encoded string and decode it as a "Hash". If the string is -- malformed, yields NoHash. decodeBase16 :: BS.ByteString -> Hash decodeBase16 bs | BS.length bs == 64 && isJust (debase16 bs) = SHA256 (fromJust $ debase16 bs) | BS.length bs == 40 && isJust (debase16 bs) = SHA1 (fromJust $ debase16 bs) | otherwise = NoHash -- | Compute a sha256 of a (lazy) ByteString. However, although this works -- correctly for any bytestring, it is only efficient if the bytestring only -- has a sigle chunk. sha256 :: BL.ByteString -> Hash sha256 bits = SHA256 (SHA.sha256 $ BS.concat $ BL.toChunks bits) rawHash :: Hash -> BS.ByteString rawHash NoHash = error "Cannot obtain raw hash from NoHash." rawHash (SHA1 s) = s rawHash (SHA256 s) = s match :: Hash -> Hash -> Bool NoHash `match` _ = False _ `match` NoHash = False x `match` y = x == y hashed-storage-0.5.10/Storage/Hashed/Diff.hs0000644000000000000000000001262312025701732016670 0ustar0000000000000000module Storage.Hashed.Diff where import Prelude hiding ( lookup, filter ) import qualified Data.ByteString.Lazy.Char8 as BL import Storage.Hashed.Tree import Storage.Hashed.AnchoredPath import Data.List.LCS import Data.List ( groupBy ) unidiff :: Tree IO -> Tree IO -> IO BL.ByteString unidiff l r = do (from, to) <- diffTrees l r diffs <- sequence $ zipCommonFiles diff from to return $ BL.concat diffs where diff p a b = do x <- readBlob a y <- readBlob b return $ diff' p x y diff' p x y = case unifiedDiff x y of x' | BL.null x' -> BL.empty | otherwise -> (BL.pack $ "--- " ++ anchorPath "old" p ++ "\n" ++ "+++ " ++ anchorPath "new" p ++ "\n") `BL.append` x' type Line = BL.ByteString data WeaveLine = Common Line | Remove Line | Add Line | Replace Line Line | Skip Int deriving Show -- | A weave -- two files woven together, with common and differing regions -- marked up. Cf. 'WeaveLine'. type Weave = [WeaveLine] -- | Sort of a sub-weave. type Hunk = [WeaveLine] -- | Produce unified diff (in a string form, ie. formatted) from a pair of -- bytestrings. unifiedDiff :: BL.ByteString -> BL.ByteString -> BL.ByteString unifiedDiff a b = printUnified $ concat unifiedHunks where unifiedHunks = reduceContext 3 $ map unifyHunk $ hunks $ weave a b -- | Weave two bytestrings. Intermediate data structure for the actual unidiff -- implementation. No skips are produced. weave :: BL.ByteString -> BL.ByteString -> Weave weave a' b' = weave' left common right where left = init' (BL.split '\n' a') -- drop trailing newline right = init' (BL.split '\n' b') -- drop trailing newline init' [] = [] init' x = init x common = lcs left right weave' [] [] [] = [] weave' [] c [] = error $ "oops: Left & Right empty, Common: " ++ show c weave' [] [] (b:bs) = Add b : weave' [] [] bs weave' (a:as) [] [] = Remove a : weave' as [] [] weave' (a:as) [] (b:bs) = Replace a b : weave' as [] bs weave' (a:as) (c:cs) (b:bs) | a == c && b == c = Common a : weave' as cs bs | a == c && b /= c = Add b : weave' (a:as) (c:cs) bs | a /= c && b == c = Remove a : weave' as (c:cs) (b:bs) | a /= c && b /= c = Replace a b : weave' as (c:cs) bs | otherwise = error "oops!" weave' a c b = error $ "oops: \nLeft: " ++ show a ++ "\nCommon: " ++ show c ++ "\nRight: " ++ show b -- | Break up a 'Weave' into 'Hunk's. hunks :: Weave -> [Hunk] hunks = groupBy grp where grp (Common _) (Common _) = True grp (Common _) _ = False grp _ (Common _) = False grp _ _ = True -- | Reformat a 'Hunk' into a format suitable for unified diff. Replaces are -- turned into add/remove pairs, all removals in a hunk go before all -- adds. 'Hunk's of 'Common' lines are left intact. Produces input suitable for -- 'reduceContext'. unifyHunk :: Hunk -> Hunk unifyHunk h = case h of (Common _:_) -> h _ -> reorder $ concatMap breakup h where reorder h' = [ Remove a | Remove a <- h' ] ++ [ Add a | Add a <- h' ] breakup (Replace f t) = [Remove f, Add t] breakup x = [x] -- | Break up a 'Weave' into unified 'Hunk's, leaving @n@ lines of context around -- every hunk. Consecutive 'Common' lines not used as context are replaced with -- 'Skip's. reduceContext :: Int -> [Hunk] -> [Hunk] reduceContext n hs = case hs of [] -> [] [Common _:_] -> [] [x] -> [x] [h,t] -> [reduce 0 n h, reduce n 0 t] (h:rest) -> reduce 0 n h : map (reduce n n) (init rest) ++ [reduce n 0 $ last rest] where reduce s e h@(Common _:_) | length h <= s + e = h | otherwise = take s h ++ [Skip $ length h - e - s ] ++ drop (length h - e) h reduce _ _ h = h -- | Format a 'Weave' for printing. deweave :: Weave -> BL.ByteString deweave = BL.unlines . map disp where disp (Common l) = BL.cons ' ' l disp (Remove l) = BL.cons '-' l disp (Add l) = BL.cons '+' l disp (Replace _ t) = BL.cons '!' t disp (Skip n) = BL.pack $ "-- skip " ++ show n ++ " lines --" -- | Print a \"hunked\" weave in form of an unified diff. 'Hunk' boundaries are -- marked up as 'Skip' lines. Cf. 'reduceContext'. printUnified :: Weave -> BL.ByteString printUnified hunked = printHunks 1 1 $ groupBy splits hunked where splits (Skip _) _ = False splits _ (Skip _) = False splits _ _ = True printHunks _ _ [] = BL.empty printHunks l r ([Skip n]:rest) = printHunks (n+l) (n+r) rest printHunks l r (h:rest) = (BL.pack $ "@@ -" ++ show l ++ "," ++ show (removals h) ++ " +" ++ show r ++ "," ++ show (adds h) ++ " @@\n") `BL.append` deweave h `BL.append` printHunks (l + removals h) (r + adds h) rest commons h = length [ () | (Common _) <- h ] adds h = commons h + length [ () | (Add _) <- h ] removals h = commons h + length [ () | (Remove _) <- h ] hashed-storage-0.5.10/Storage/Hashed/Darcs.hs0000644000000000000000000003131212025701732017050 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables, BangPatterns #-} -- | A few darcs-specific utility functions. These are used for reading and -- writing darcs and darcs-compatible hashed trees. module Storage.Hashed.Darcs where import Prelude hiding ( lookup, catch ) import System.FilePath ( () ) import System.Directory( doesFileExist ) import Codec.Compression.GZip( decompress, compress ) import Control.Applicative( (<$>) ) import Control.Exception( catch, IOException ) import qualified Data.ByteString.Char8 as BS8 import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString as BS import Data.List( sortBy ) import Data.Char( chr, ord, isSpace ) import Data.Maybe( fromJust, isJust ) import qualified Data.Set as S import Control.Monad.State.Strict import Storage.Hashed.Tree hiding ( lookup ) import qualified Storage.Hashed.Tree as Tree import Storage.Hashed.AnchoredPath import Storage.Hashed.Utils import Storage.Hashed.Hash import Storage.Hashed.Packed import Storage.Hashed.Monad --------------------------------------------------------------------- -- Utilities for coping with the darcs directory format. -- -- | 'darcsDecodeWhite' interprets the Darcs-specific \"encoded\" filenames -- produced by 'darcsEncodeWhite' -- -- > darcsDecodeWhite "hello\32\there" == "hello there" -- > darcsDecodeWhite "hello\92\there" == "hello\there" -- > darcsDecodeWhite "hello\there" == error "malformed filename" darcsDecodeWhite :: String -> FilePath darcsDecodeWhite ('\\':cs) = case break (=='\\') cs of (theord, '\\':rest) -> chr (read theord) : darcsDecodeWhite rest _ -> error "malformed filename" darcsDecodeWhite (c:cs) = c: darcsDecodeWhite cs darcsDecodeWhite "" = "" -- | 'darcsEncodeWhite' translates whitespace in filenames to a darcs-specific -- format (backslash followed by numerical representation according to 'ord'). -- Note that backslashes are also escaped since they are used in the encoding. -- -- > darcsEncodeWhite "hello there" == "hello\32\there" -- > darcsEncodeWhite "hello\there" == "hello\92\there" darcsEncodeWhite :: FilePath -> String darcsEncodeWhite (c:cs) | isSpace c || c == '\\' = '\\' : (show $ ord c) ++ "\\" ++ darcsEncodeWhite cs darcsEncodeWhite (c:cs) = c : darcsEncodeWhite cs darcsEncodeWhite [] = [] darcsEncodeWhiteBS :: BS8.ByteString -> BS8.ByteString darcsEncodeWhiteBS = BS8.pack . darcsEncodeWhite . BS8.unpack decodeDarcsHash :: BS8.ByteString -> Hash decodeDarcsHash bs = case BS8.split '-' bs of [s, h] | BS8.length s == 10 -> decodeBase16 h _ -> decodeBase16 bs decodeDarcsSize :: BS8.ByteString -> Maybe Int decodeDarcsSize bs = case BS8.split '-' bs of [s, _] | BS8.length s == 10 -> case reads (BS8.unpack s) of [(x, _)] -> Just x _ -> Nothing _ -> Nothing darcsLocation :: FilePath -> (Maybe Int, Hash) -> FileSegment darcsLocation dir (s,h) = case hash of "" -> error "darcsLocation: invalid hash" _ -> (dir prefix s ++ hash, Nothing) where prefix Nothing = "" prefix (Just s') = formatSize s' ++ "-" formatSize s' = let n = show s' in replicate (10 - length n) '0' ++ n hash = BS8.unpack (encodeBase16 h) ---------------------------------------------- -- Darcs directory format. -- darcsFormatDir :: Tree m -> Maybe BL8.ByteString darcsFormatDir t = BL8.fromChunks <$> concat <$> mapM string (sortBy cmp $ listImmediate t) where cmp (Name a, _) (Name b, _) = compare a b string (Name name, item) = do header <- case item of File _ -> Just $ BS8.pack "file:\n" _ -> Just $ BS8.pack "directory:\n" hash <- case itemHash item of NoHash -> Nothing x -> Just $ encodeBase16 x return $ [ header , darcsEncodeWhiteBS name , BS8.singleton '\n' , hash, BS8.singleton '\n' ] darcsParseDir :: BL8.ByteString -> [(ItemType, Name, Maybe Int, Hash)] darcsParseDir content = parse (BL8.split '\n' content) where parse (t:n:h':r) = (header t, Name $ BS8.pack $ darcsDecodeWhite (BL8.unpack n), decodeDarcsSize hash, decodeDarcsHash hash) : parse r where hash = BS8.concat $ BL8.toChunks h' parse _ = [] header x | x == BL8.pack "file:" = BlobType | x == BL8.pack "directory:" = TreeType | otherwise = error $ "Error parsing darcs hashed dir: " ++ BL8.unpack x ---------------------------------------- -- Utilities. -- -- | Compute a darcs-compatible hash value for a tree-like structure. darcsTreeHash :: Tree m -> Hash darcsTreeHash t = case darcsFormatDir t of Nothing -> NoHash Just x -> sha256 x -- The following two are mostly for experimental use in Packed. darcsUpdateDirHashes :: Tree m -> Tree m darcsUpdateDirHashes = updateSubtrees update where update t = t { treeHash = darcsTreeHash t } darcsUpdateHashes :: (Monad m, Functor m) => Tree m -> m (Tree m) darcsUpdateHashes = updateTree update where update (SubTree t) = return . SubTree $ t { treeHash = darcsTreeHash t } update (File blob@(Blob con _)) = do hash <- sha256 <$> readBlob blob return $ File (Blob con hash) darcsHash (SubTree t) = return $ darcsTreeHash t darcsHash (File blob) = sha256 <$> readBlob blob darcshash _ = return NoHash darcsAddMissingHashes :: (Monad m, Functor m) => Tree m -> m (Tree m) darcsAddMissingHashes = addMissingHashes darcsHash ------------------------------------------- -- Reading darcs pristine data -- -- | Read and parse a darcs-style hashed directory listing from a given @dir@ -- and with a given @hash@. readDarcsHashedDir :: FilePath -> (Maybe Int, Hash) -> IO [(ItemType, Name, Maybe Int, Hash)] readDarcsHashedDir dir h = do exist <- doesFileExist $ fst (darcsLocation dir h) unless exist $ fail $ "error opening " ++ fst (darcsLocation dir h) compressed <- readSegment $ darcsLocation dir h let content = decompress compressed return $ if BL8.null compressed then [] else darcsParseDir content -- | Read in a darcs-style hashed tree. This is mainly useful for reading -- \"pristine.hashed\". You need to provide the root hash you are interested in -- (found in _darcs/hashed_inventory). readDarcsHashed' :: Bool -> FilePath -> (Maybe Int, Hash) -> IO (Tree IO) readDarcsHashed' _ _ (_, NoHash) = fail "Cannot readDarcsHashed NoHash" readDarcsHashed' sizefail dir root@(_, hash) = do items' <- readDarcsHashedDir dir root subs <- sequence [ do when (sizefail && isJust s) $ fail ("Unexpectedly encountered size-prefixed hash in " ++ dir) case tp of BlobType -> return (d, File $ Blob (readBlob' (s, h)) h) TreeType -> do let t = readDarcsHashed dir (s, h) return (d, Stub t h) | (tp, d, s, h) <- items' ] return $ makeTreeWithHash subs hash where readBlob' = fmap decompress . readSegment . darcsLocation dir readDarcsHashed = readDarcsHashed' False readDarcsHashedNosize dir hash = readDarcsHashed' True dir (Nothing, hash) ---------------------------------------------------- -- Writing darcs-style hashed trees. -- -- | Write a Tree into a darcs-style hashed directory. writeDarcsHashed :: Tree IO -> FilePath -> IO Hash writeDarcsHashed tree' dir = do t <- darcsUpdateDirHashes <$> expand tree' sequence_ [ dump =<< readBlob b | (_, File b) <- list t ] let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ] os' <- mapM dump $ map fromJust dirs return $ darcsTreeHash t where dump bits = do let name = dir BS8.unpack (encodeBase16 $ sha256 bits) exist <- doesFileExist name unless exist $ BL.writeFile name (compress bits) -- | Create a hashed file from a 'FilePath' and content. In case the file exists -- it is kept untouched and is assumed to have the right content. XXX Corrupt -- files should be probably renamed out of the way automatically or something -- (probably when they are being read though). fsCreateHashedFile :: FilePath -> BL8.ByteString -> TreeIO () fsCreateHashedFile fn content = liftIO $ do exist <- doesFileExist fn unless exist $ BL.writeFile fn content -- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed -- to be fully available from the @directory@, and any changes will be written -- out to same. Please note that actual filesystem files are never removed. hashedTreeIO :: TreeIO a -- ^ action -> Tree IO -- ^ initial -> FilePath -- ^ directory -> IO (a, Tree IO) hashedTreeIO action t dir = do runTreeMonad action $ initialState t darcsHash updateItem where updateItem path (File b) = File <$> updateFile path b updateItem path (SubTree s) = SubTree <$> updateSub path s updateItem _ x = return x updateFile path b@(Blob _ !h) = do content <- liftIO $ readBlob b let fn = dir BS8.unpack (encodeBase16 h) nblob = Blob (decompress <$> rblob) h rblob = BL.fromChunks <$> return <$> BS.readFile fn newcontent = compress content fsCreateHashedFile fn newcontent return nblob updateSub path s = do let !hash = treeHash s Just dirdata = darcsFormatDir s fn = dir BS8.unpack (encodeBase16 hash) fsCreateHashedFile fn (compress dirdata) return s -------------------------------------------------------------- -- Reading and writing packed pristine. EXPERIMENTAL. ---- -- | Read a Tree in the darcs hashed format from an object storage. This is -- basically the same as readDarcsHashed from Storage.Hashed, but uses an -- object storage instead of traditional darcs filesystem layout. Requires the -- tree root hash as a starting point. readPackedDarcsPristine :: OS -> Hash -> IO (Tree IO) readPackedDarcsPristine os root = do items' <- darcsParseDir <$> grab root subs <- sequence [ case tp of BlobType -> return (d, File $ file h) TreeType -> let t = readPackedDarcsPristine os h in return (d, Stub t h) | (tp, d, _, h) <- items' ] return $ makeTreeWithHash subs root where file h = Blob (grab h) h grab hash = do maybeseg <- lookup os hash case maybeseg of Nothing -> fail $ "hash " ++ BS8.unpack (encodeBase16 hash) ++ " not available" Just seg -> readSegment seg -- | Write a Tree into an object storage, using the darcs-style directory -- formatting (and therefore darcs-style hashes). Gives back the object storage -- and the root hash of the stored Tree. NB. The function expects that the Tree -- comes equipped with darcs-style hashes already! writePackedDarcsPristine :: Tree IO -> OS -> IO (OS, Hash) writePackedDarcsPristine tree' os = do t <- darcsUpdateDirHashes <$> expand tree' files <- sequence [ readBlob b | (_, File b) <- list t ] let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ] os' <- hatch os $ files ++ (map fromJust dirs) return (os', darcsTreeHash t) storePackedDarcsPristine :: Tree IO -> OS -> IO (OS, Hash) storePackedDarcsPristine tree' os = do (os', root) <- writePackedDarcsPristine tree' os return $ (os' { roots = root : roots os' -- FIXME we probably don't want to override the references -- thing completely here... , references = darcsPristineRefs }, root) darcsPristineRefs :: FileSegment -> IO [Hash] darcsPristineRefs fs = do con <- (darcsParseDir <$> readSegment fs) `catch` \(_ :: IOException) -> return [] return $! [ hash | (_, _, _, hash) <- con, valid hash ] where valid NoHash = False valid _ = True darcsCheckExpand :: Tree IO -> IO (Either [(FilePath, Hash, Maybe Hash)] (Tree IO)) darcsCheckExpand t = do problemsOrTree <- checkExpand darcsHash t case problemsOrTree of Left problems -> return . Left $ map render problems Right tree -> return . Right $ tree where render (path, h, h') = (anchorPath "." path, h, h')hashed-storage-0.5.10/Storage/Hashed/Monad.hs0000644000000000000000000002707112025701732017061 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables, BangPatterns, TypeSynonymInstances, UndecidableInstances, FlexibleInstances #-} -- | An experimental monadic interface to Tree mutation. The main idea is to -- simulate IO-ish manipulation of real filesystem (that's the state part of -- the monad), and to keep memory usage down by reasonably often dumping the -- intermediate data to disk and forgetting it. The monad interface itself is -- generic, and a number of actual implementations can be used. This module -- provides just 'virtualTreeIO' that never writes any changes, but may trigger -- filesystem reads as appropriate. module Storage.Hashed.Monad ( virtualTreeIO, virtualTreeMonad , readFile, writeFile, createDirectory, rename, copy, unlink , fileExists, directoryExists, exists, withDirectory , currentDirectory , tree, TreeState, TreeMonad, TreeIO, runTreeMonad , initialState, replaceItem , findM, findFileM, findTreeM , TreeRO, TreeRW ) where import Prelude hiding ( readFile, writeFile ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Tree import Storage.Hashed.Hash import Control.Applicative( (<$>) ) import Data.List( sortBy ) import Data.Int( Int64 ) import Data.Maybe( isNothing, isJust ) import qualified Data.ByteString.Lazy.Char8 as BL import Control.Monad.RWS.Strict import qualified Data.Set as S import qualified Data.Map as M type Changed = M.Map AnchoredPath (Int64, Int64) -- size, age -- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree -- content, unsync'd changes and a current working directory (of the monad). data TreeState m = TreeState { tree :: !(Tree m) , changed :: !Changed , changesize :: !Int64 , maxage :: !Int64 , updateHash :: TreeItem m -> m Hash , update :: AnchoredPath -> TreeItem m -> TreeMonad m (TreeItem m) } -- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well, -- which is a sort of virtual filesystem. Depending on how you obtained your -- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the -- actual real filesystem. For 'virtualTreeIO', nothing happens in real -- filesystem, however with 'plainTreeIO', the plain tree will be updated every -- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get -- updated. type TreeMonad m = RWST AnchoredPath () (TreeState m) m type TreeIO = TreeMonad IO class (Functor m, Monad m) => TreeRO m where currentDirectory :: m AnchoredPath withDirectory :: AnchoredPath -> m a -> m a expandTo :: AnchoredPath -> m AnchoredPath -- | Grab content of a file in the current Tree at the given path. readFile :: AnchoredPath -> m BL.ByteString -- | Check for existence of a node (file or directory, doesn't matter). exists :: AnchoredPath -> m Bool -- | Check for existence of a directory. directoryExists ::AnchoredPath -> m Bool -- | Check for existence of a file. fileExists :: AnchoredPath -> m Bool class TreeRO m => TreeRW m where -- | Change content of a file at a given path. The change will be -- eventually flushed to disk, but might be buffered for some time. writeFile :: AnchoredPath -> BL.ByteString -> m () createDirectory :: AnchoredPath -> m () unlink :: AnchoredPath -> m () rename :: AnchoredPath -> AnchoredPath -> m () copy :: AnchoredPath -> AnchoredPath -> m () initialState :: Tree m -> (TreeItem m -> m Hash) -> (AnchoredPath -> TreeItem m -> TreeMonad m (TreeItem m)) -> TreeState m initialState t uh u = TreeState { tree = t , changed = M.empty , changesize = 0 , updateHash = uh , maxage = 0 , update = u } flush :: (Functor m, Monad m) => TreeMonad m () flush = do current <- get changed' <- map fst <$> M.toList <$> gets changed dirs' <- gets tree >>= \t -> return [ path | (path, SubTree s) <- list t ] modify $ \st -> st { changed = M.empty, changesize = 0 } forM_ (changed' ++ dirs' ++ [AnchoredPath []]) flushItem runTreeMonad' :: (Functor m, Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m) runTreeMonad' action initial = do (out, final, _) <- runRWST action (AnchoredPath []) initial return (out, tree final) runTreeMonad :: (Functor m, Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m) runTreeMonad action initial = do let action' = do x <- action flush return x runTreeMonad' action' initial -- | Run a TreeIO action without storing any changes. This is useful for -- running monadic tree mutations for obtaining the resulting Tree (as opposed -- to their effect of writing a modified tree to disk). The actions can do both -- read and write -- reads are passed through to the actual filesystem, but the -- writes are held in memory in a form of modified Tree. virtualTreeMonad :: (Functor m, Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m) virtualTreeMonad action t = runTreeMonad' action $ initialState t (\_ -> return NoHash) (\_ x -> return x) virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO) virtualTreeIO = virtualTreeMonad -- | Modifies an item in the current Tree. This action keeps an account of the -- modified data, in changed and changesize, for subsequent flush -- operations. Any modifications (as in "modifyTree") are allowed. modifyItem :: (Functor m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m () modifyItem path item = do path' <- (`catPaths` path) `fmap` currentDirectory age <- gets maxage changed' <- gets changed let getsize (Just (File b)) = lift (BL.length `fmap` readBlob b) getsize _ = return 0 size <- getsize item let change = case M.lookup path' changed' of Nothing -> size Just (oldsize, _) -> size - oldsize modify $ \st -> st { tree = modifyTree (tree st) path' item , changed = M.insert path' (size, age) (changed st) , maxage = age + 1 , changesize = (changesize st + change) } renameChanged from to = modify $ \st -> st { changed = rename' $ changed st } where rename' = M.fromList . map renameone . M.toList renameone (x, d) | from `isPrefix` x = (to `catPaths` relative from x, d) | otherwise = (x, d) relative (AnchoredPath from) (AnchoredPath x) = AnchoredPath $ drop (length from) x -- | Replace an item with a new version without modifying the content of the -- tree. This does not do any change tracking. Ought to be only used from a -- 'sync' implementation for a particular storage format. The presumed use-case -- is that an existing in-memory Blob is replaced with a one referring to an -- on-disk file. replaceItem :: (Functor m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m () replaceItem path item = do path' <- (`catPaths` path) `fmap` currentDirectory modify $ \st -> st { tree = modifyTree (tree st) path' item } flushItem :: forall e m. (Monad m, Functor m) => AnchoredPath -> TreeMonad m () flushItem path = do current <- gets tree case find current path of Nothing -> return () -- vanished, do nothing Just x -> do y <- fixHash x new <- gets update >>= ($ y) . ($ path) replaceItem path (Just new) where fixHash :: TreeItem m -> TreeMonad m (TreeItem m) fixHash f@(File (Blob con NoHash)) = do hash <- gets updateHash >>= \x -> lift $ x f return $ File $ Blob con hash fixHash (SubTree s) | treeHash s == NoHash = gets updateHash >>= \f -> SubTree <$> lift (addMissingHashes f s) fixHash x = return x -- | If buffers are becoming large, sync, otherwise do nothing. flushSome :: (Monad m, Functor m) => TreeMonad m () flushSome = do x <- gets changesize when (x > megs 100) $ do remaining <- go =<< sortBy age <$> M.toList <$> gets changed modify $ \s -> s { changed = M.fromList remaining } where go [] = return [] go ((path, (size, age_)):chs) = do x <- (\s -> s - size) <$> gets changesize flushItem path modify $ \s -> s { changesize = x } if (x > megs 50) then go chs else return $ chs megs = (* (1024 * 1024)) age (_, (_, a)) (_, (_, b)) = compare a b instance (Functor m, Monad m) => TreeRO (TreeMonad m) where expandTo p = do t <- gets tree p' <- (`catPaths` p) `fmap` ask let amend = do t' <- lift $ expandPath t p' modify $ \st -> st { tree = t' } case find t p' of Nothing -> amend Just (Stub _ _) -> amend _ -> return () return p' fileExists p = do p' <- expandTo p (isJust . (flip findFile p')) `fmap` gets tree directoryExists p = do p' <- expandTo p (isJust . (flip findTree p')) `fmap` gets tree exists p = do p' <- expandTo p (isJust . (flip find p')) `fmap` gets tree readFile p = do p' <- expandTo p t <- gets tree let f = findFile t p' case f of Nothing -> fail $ "No such file " ++ show p' Just x -> lift (readBlob x) currentDirectory = ask withDirectory dir act = do dir' <- expandTo dir local (\old -> dir') act instance (Functor m, Monad m) => TreeRW (TreeMonad m) where writeFile p con = do expandTo p modifyItem p (Just blob) flushSome where blob = File $ Blob (return con) hash hash = NoHash -- we would like to say "sha256 con" here, but due -- to strictness of Hash in Blob, this would often -- lead to unnecessary computation which would then -- be discarded anyway; we rely on the sync -- implementation to fix up any NoHash occurrences createDirectory p = do expandTo p modifyItem p $ Just $ SubTree emptyTree unlink p = do expandTo p modifyItem p Nothing rename from to = do from' <- expandTo from to' <- expandTo to tr <- gets tree let item = find tr from' found_to = find tr to' unless (isNothing found_to) $ fail $ "Error renaming: destination " ++ show to ++ " exists." unless (isNothing item) $ do modifyItem from Nothing modifyItem to item renameChanged from to copy from to = do from' <- expandTo from to' <- expandTo to tr <- gets tree let item = find tr from' unless (isNothing item) $ modifyItem to item findM' :: forall m a e. (Monad m, Functor m) => (Tree m -> AnchoredPath -> a) -> Tree m -> AnchoredPath -> m a findM' what t path = fst <$> virtualTreeMonad (look path) t where look :: AnchoredPath -> TreeMonad m a look = expandTo >=> \p' -> flip what p' <$> gets tree findM :: (Monad m, Functor m) => Tree m -> AnchoredPath -> m (Maybe (TreeItem m)) findM = findM' find findTreeM :: (Monad m, Functor m) => Tree m -> AnchoredPath -> m (Maybe (Tree m)) findTreeM = findM' findTree findFileM :: (Monad m, Functor m) => Tree m -> AnchoredPath -> m (Maybe (Blob m)) findFileM = findM' findFile hashed-storage-0.5.10/Storage/Hashed/Tree.hs0000644000000000000000000005170612025701732016724 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables, MultiParamTypeClasses, FlexibleInstances, BangPatterns #-} -- | The abstract representation of a Tree and useful abstract utilities to -- handle those. module Storage.Hashed.Tree ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..) , makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS -- * Unfolding stubbed (lazy) Trees. -- -- | By default, Tree obtained by a read function is stubbed: it will -- contain Stub items that need to be executed in order to access the -- respective subtrees. 'expand' will produce an unstubbed Tree. , expandUpdate, expand, expandPath, checkExpand -- * Tree access and lookup. , items, list, listImmediate, treeHash , lookup, find, findFile, findTree, itemHash, itemType , zipCommonFiles, zipFiles, zipTrees, diffTrees -- * Files (Blobs). , readBlob -- * Filtering trees. , FilterTree(..), restrict -- * Manipulating trees. , modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay , addMissingHashes ) where import Control.Exception( catch, IOException ) import Prelude hiding( lookup, filter, all, catch ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Hash import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.ByteString.Char8 as BS import qualified Data.Map as M import Data.Maybe( catMaybes, isNothing ) import Data.Either( lefts, rights ) import Data.List( union, sort ) import Control.Monad( filterM, liftM ) import Control.Applicative( (<$>) ) -------------------------------- -- Tree, Blob and friends -- data Blob m = Blob !(m BL.ByteString) !Hash data TreeItem m = File !(Blob m) | SubTree !(Tree m) | Stub !(m (Tree m)) !Hash data ItemType = BlobType | TreeType deriving (Show, Eq) -- | Abstraction of a filesystem tree. -- Please note that the Tree returned by the respective read operations will -- have TreeStub items in it. To obtain a Tree without such stubs, call -- expand on it, eg.: -- -- > tree <- readDarcsPristine "." >>= expand -- -- When a Tree is expanded, it becomes \"final\". All stubs are forced and the -- Tree can be traversed purely. Access to actual file contents stays in IO -- though. -- -- A Tree may have a Hash associated with it. A pair of Tree's is identical -- whenever their hashes are (the reverse need not hold, since not all Trees -- come equipped with a hash). data Tree m = Tree { items :: (M.Map Name (TreeItem m)) -- | Get hash of a Tree. This is guaranteed to uniquely -- identify the Tree (including any blob content), as far as -- cryptographic hashes are concerned. Sha256 is recommended. , treeHash :: !Hash } listImmediate :: Tree m -> [(Name, TreeItem m)] listImmediate = M.toList . items -- | Get a hash of a TreeItem. May be Nothing. itemHash :: TreeItem m -> Hash itemHash (File (Blob _ h)) = h itemHash (SubTree t) = treeHash t itemHash (Stub _ h) = h itemType :: TreeItem m -> ItemType itemType (File _) = BlobType itemType (SubTree _) = TreeType itemType (Stub _ _) = TreeType emptyTree :: (Monad m) => Tree m emptyTree = Tree { items = M.empty , treeHash = NoHash } emptyBlob :: (Monad m) => Blob m emptyBlob = Blob (return BL.empty) NoHash makeBlob :: (Monad m) => BL.ByteString -> Blob m makeBlob str = Blob (return str) (sha256 str) makeBlobBS :: (Monad m) => BS.ByteString -> Blob m makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s) makeTree :: (Monad m) => [(Name,TreeItem m)] -> Tree m makeTree l = Tree { items = M.fromList l , treeHash = NoHash } makeTreeWithHash :: (Monad m) => [(Name,TreeItem m)] -> Hash -> Tree m makeTreeWithHash l h = Tree { items = M.fromList l , treeHash = h } ----------------------------------- -- Tree access and lookup -- -- | Look up a 'Tree' item (an immediate subtree or blob). lookup :: Tree m -> Name -> Maybe (TreeItem m) lookup t n = M.lookup n (items t) find' :: TreeItem m -> AnchoredPath -> Maybe (TreeItem m) find' t (AnchoredPath []) = Just t find' (SubTree t) (AnchoredPath (d : rest)) = case lookup t d of Just sub -> find' sub (AnchoredPath rest) Nothing -> Nothing find' _ _ = Nothing -- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid. find :: Tree m -> AnchoredPath -> Maybe (TreeItem m) find = find' . SubTree -- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does -- not point to a Blob. findFile :: Tree m -> AnchoredPath -> Maybe (Blob m) findFile t p = case find t p of Just (File x) -> Just x _ -> Nothing -- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does -- not point to a Tree. findTree :: Tree m -> AnchoredPath -> Maybe (Tree m) findTree t p = case find t p of Just (SubTree x) -> Just x _ -> Nothing -- | List all contents of a 'Tree'. list :: Tree m -> [(AnchoredPath, TreeItem m)] list t_ = paths t_ (AnchoredPath []) where paths t p = [ (appendPath p n, i) | (n,i) <- listImmediate t ] ++ concat [ paths subt (appendPath p subn) | (subn, SubTree subt) <- listImmediate t ] expandUpdate :: (Monad m) => (AnchoredPath -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m) expandUpdate update t_ = go (AnchoredPath []) t_ where go path t = do let subtree (name, sub) = do tree <- go (path `appendPath` name) =<< unstub sub return (name, SubTree tree) expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ] let orig_map = M.filter (not . isSub) (items t) expanded_map = M.fromList expanded tree = t { items = M.union orig_map expanded_map } update path tree -- | Expand a stubbed Tree into a one with no stubs in it. You might want to -- filter the tree before expanding to save IO. This is the basic -- implementation, which may be overriden by some Tree instances (this is -- especially true of the Index case). expand :: (Monad m) => Tree m -> m (Tree m) expand = expandUpdate $ \_ -> return -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is -- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub -- in the resulting Tree. A non-existent path is expanded as far as it can be. expandPath :: (Monad m) => Tree m -> AnchoredPath -> m (Tree m) expandPath t_ path_ = expand' t_ path_ where expand' t (AnchoredPath []) = return t expand' t (AnchoredPath (n:rest)) = case lookup t n of (Just item) | isSub item -> amend t n rest =<< unstub item _ -> return t -- fail $ "Descent error in expandPath: " ++ show path_ amend t name rest sub = do sub' <- expand' sub (AnchoredPath rest) let tree = t { items = M.insert name (SubTree sub') (items t) } return tree -- | Check the disk version of a Tree: expands it, and checks each -- hash. Returns either the expanded tree or a list of AnchoredPaths -- where there are problems. The first argument is the hashing function -- used to create the tree. checkExpand :: (TreeItem IO -> IO Hash) -> Tree IO -> IO (Either [(AnchoredPath, Hash, Maybe Hash)] (Tree IO)) checkExpand hashFunc t = go (AnchoredPath []) t where go path t_ = do let subtree (name, sub) = do let here = path `appendPath` name sub' <- (Just <$> unstub sub) `catch` \(_ :: IOException) -> return Nothing case sub' of Nothing -> return $ Left [(here, treeHash t_, Nothing)] Just sub -> do treeOrTrouble <- go (path `appendPath` name) sub return $ case treeOrTrouble of Left problems -> Left problems Right tree -> Right (name, SubTree tree) badBlob (_, f@(File (Blob s h))) = fmap (/= h) (hashFunc f `catch` (\(_ :: IOException) -> return NoHash)) badBlob _ = return False render (name, f@(File (Blob _ h))) = do h' <- (Just <$> hashFunc f) `catch` \(_ :: IOException) -> return Nothing return (path `appendPath` name, h, h') subs <- mapM subtree [ x | x@(_, item) <- listImmediate t_, isSub item ] badBlobs <- filterM badBlob (listImmediate t) >>= mapM render let problems = badBlobs ++ (concat $ lefts subs) if null problems then do let orig_map = M.filter (not . isSub) (items t) expanded_map = M.fromList $ rights subs tree = t_ {items = orig_map `M.union` expanded_map} h' <- hashFunc (SubTree t_) if h' `match` treeHash t_ then return $ Right tree else return $ Left [(path, treeHash t_, Just h')] else return $ Left problems class (Monad m) => FilterTree a m where -- | Given @pred tree@, produce a 'Tree' that only has items for which -- @pred@ returns @True@. -- The tree might contain stubs. When expanded, these will be subject to -- filtering as well. filter :: (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m instance (Monad m) => FilterTree Tree m where filter predicate t_ = filter' t_ (AnchoredPath []) where filter' t path = let subs = (catMaybes [ (,) name `fmap` wibble path name item | (name,item) <- listImmediate t ]) in t { items = M.mapMaybeWithKey (wibble path) $ items t } wibble path name item = let npath = path `appendPath` name in if predicate npath item then Just $ filterSub npath item else Nothing filterSub npath (SubTree t) = SubTree $ filter' t npath filterSub npath (Stub stub h) = Stub (do x <- stub return $ filter' x npath) h filterSub _ x = x -- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a -- identical to @tree@, but only has those items that are present in both -- @tree@ and @guide@. The @guide@ Tree may not contain any stubs. restrict :: (FilterTree t m, Monad n) => Tree n -> t m -> t m restrict guide tree = filter accept tree where accept path item = case (find guide path, item) of (Just (SubTree _), SubTree _) -> True (Just (SubTree _), Stub _ _) -> True (Just (File _), File _) -> True (Just (Stub _ _), _) -> error "*sulk* Go away, you, you precondition violator!" (_, _) -> False -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with -- care. readBlob :: Blob m -> m BL.ByteString readBlob (Blob r _) = r -- | For every pair of corresponding blobs from the two supplied trees, -- evaluate the supplied function and accumulate the results in a list. Hint: -- to get IO actions through, just use sequence on the resulting list. -- NB. This won't expand any stubs. zipCommonFiles :: (AnchoredPath -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a] zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p | (p, File x) <- list b ] -- | For each file in each of the two supplied trees, evaluate the supplied -- function (supplying the corresponding file from the other tree, or Nothing) -- and accumulate the results in a list. Hint: to get IO actions through, just -- use sequence on the resulting list. NB. This won't expand any stubs. zipFiles :: (AnchoredPath -> Maybe (Blob m) -> Maybe (Blob m) -> a) -> Tree m -> Tree m -> [a] zipFiles f a b = [ f p (findFile a p) (findFile b p) | p <- paths a `sortedUnion` paths b ] where paths t = sort [ p | (p, File _) <- list t ] zipTrees :: (AnchoredPath -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a) -> Tree m -> Tree m -> [a] zipTrees f a b = [ f p (find a p) (find b p) | p <- reverse (paths a `sortedUnion` paths b) ] where paths t = sort [ p | (p, _) <- list t ] -- | Helper function for taking the union of AnchoredPath lists that -- are already sorted. This function does not check the precondition -- so use it carefully. sortedUnion :: [AnchoredPath] -> [AnchoredPath] -> [AnchoredPath] sortedUnion [] ys = ys sortedUnion xs [] = xs sortedUnion a@(x:xs) b@(y:ys) = case compare x y of LT -> x : sortedUnion xs b EQ -> x : sortedUnion xs ys GT -> y : sortedUnion a ys -- | Cautiously extracts differing subtrees from a pair of Trees. It will never -- do any unneccessary expanding. Tree hashes are used to cut the comparison as -- high up the Tree branches as possible. The result is a pair of trees that do -- not share any identical subtrees. They are derived from the first and second -- parameters respectively and they are always fully expanded. It might be -- advantageous to feed the result into 'zipFiles' or 'zipTrees'. diffTrees :: forall m. (Functor m, Monad m) => Tree m -> Tree m -> m (Tree m, Tree m) diffTrees left right = if treeHash left `match` treeHash right then return (emptyTree, emptyTree) else diff left right where isFile (File _) = True isFile _ = False notFile = not . isFile isEmpty = null . listImmediate subtree :: TreeItem m -> m (Tree m) subtree (Stub x _) = x subtree (SubTree x) = return x subtree (File _) = error "diffTrees tried to descend a File as a subtree" maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand) maybeUnfold (SubTree x) = SubTree `fmap` expand x maybeUnfold i = return i immediateN t = [ n | (n, _) <- listImmediate t ] diff left' right' = do is <- sequence [ case (lookup left' n, lookup right' n) of (Just l, Nothing) -> do l' <- maybeUnfold l return (n, Just l', Nothing) (Nothing, Just r) -> do r' <- maybeUnfold r return (n, Nothing, Just r') (Just l, Just r) | itemHash l `match` itemHash r -> return (n, Nothing, Nothing) | notFile l && notFile r -> do x <- subtree l y <- subtree r (x', y') <- diffTrees x y if isEmpty x' && isEmpty y' then return (n, Nothing, Nothing) else return (n, Just $ SubTree x', Just $ SubTree y') | isFile l && isFile r -> return (n, Just l, Just r) | otherwise -> do l' <- maybeUnfold l r' <- maybeUnfold r return (n, Just l', Just r') _ -> error "n lookups failed" | n <- immediateN left' `union` immediateN right' ] let is_l = [ (n, l) | (n, Just l, _) <- is ] is_r = [ (n, r) | (n, _, Just r) <- is ] return (makeTree is_l, makeTree is_r) -- | Modify a Tree (by replacing, or removing or adding items). modifyTree :: (Monad m) => Tree m -> AnchoredPath -> Maybe (TreeItem m) -> Tree m modifyTree t_ p_ i_ = snd $ go t_ p_ i_ where fix t unmod items' = (unmod, t { items = countmap items' `seq` items' , treeHash = if unmod then treeHash t else NoHash }) go t (AnchoredPath []) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub) go t (AnchoredPath [n]) (Just item) = fix t unmod items' where !items' = M.insert n item (items t) !unmod = itemHash item `match` case lookup t n of Nothing -> NoHash Just i -> itemHash i go t (AnchoredPath [n]) Nothing = fix t unmod items' where !items' = M.delete n (items t) !unmod = isNothing $ lookup t n go t path@(AnchoredPath (n:r)) item = fix t unmod items' where subtree s = go s (AnchoredPath r) item !items' = M.insert n sub (items t) !sub = snd sub' !unmod = fst sub' !sub' = case lookup t n of Just (SubTree s) -> let (mod, sub) = subtree s in (mod, SubTree sub) Just (Stub s _) -> (False, Stub (do x <- s return $! snd $! subtree x) NoHash) Nothing -> (False, SubTree $! snd $! subtree emptyTree) _ -> error $ "Modify tree at " ++ show path go _ (AnchoredPath []) (Just (Stub _ _)) = error $ "BUG: Error descending in modifyTree, path = " ++ show p_ go _ (AnchoredPath []) (Just (File _)) = error $ "BUG: Error descending in modifyTree, path = " ++ show p_ go _ (AnchoredPath []) Nothing = error $ "BUG: Error descending in modifyTree, path = " ++ show p_ countmap = M.fold (\_ i -> i + 1) 0 updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m updateSubtrees fun t = fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t , treeHash = NoHash } where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s) update (k, File f) = (k, File f) update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder" -- | Does /not/ expand the tree. updateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m) updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t -- | Does /not/ expand the tree. partiallyUpdateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> (AnchoredPath -> TreeItem m -> Bool) -> Tree m -> m (Tree m) partiallyUpdateTree fun pred t' = go (AnchoredPath []) t' where go path t = do items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t) SubTree t' <- fun . SubTree $ t { items = items' , treeHash = NoHash } return t' maybeupdate path (k, item) = case pred (path `appendPath` k) item of True -> update (path `appendPath` k) (k, item) False -> return (k, item) update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree update _ (k, item) = (\new -> (k, new)) <$> fun item -- | Lay one tree over another. The resulting Tree will look like the base (1st -- parameter) Tree, although any items also present in the overlay Tree will be -- taken from the overlay. It is not allowed to overlay a different kind of an -- object, nor it is allowed for the overlay to add new objects to base. This -- means that the overlay Tree should be a subset of the base Tree (although -- any extraneous items will be ignored by the implementation). overlay :: (Functor m, Monad m) => Tree m -> Tree m -> Tree m overlay base over = Tree { items = M.fromList immediate , treeHash = NoHash } where immediate = [ (n, get n) | (n, _) <- listImmediate base ] get n = case (M.lookup n $ items base, M.lookup n $ items over) of (Just (File _), Just f@(File _)) -> f (Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o (Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash (Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash (Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o b' <- b return $ overlay b' o') NoHash (Just x, _) -> x (_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "." addMissingHashes :: (Monad m, Functor m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m) addMissingHashes make = updateTree update -- use partiallyUpdateTree here where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x }) update (File blob@(Blob con NoHash)) = do hash <- make $ File blob return $ File (Blob con hash) update (Stub s NoHash) = update . SubTree =<< s update x = return x ------ Private utilities shared among multiple functions. -------- unstub :: (Monad m) => TreeItem m -> m (Tree m) unstub (Stub s _) = s unstub (SubTree s) = return s isSub :: TreeItem m -> Bool isSub (File _) = False isSub _ = True hashed-storage-0.5.10/Bundled/0000755000000000000000000000000012025701732014235 5ustar0000000000000000hashed-storage-0.5.10/Bundled/Posix.hsc0000644000000000000000000000567712025701732016055 0ustar0000000000000000{-# LANGUAGE CPP #-} module Bundled.Posix( getFdStatus, getSymbolicLinkStatus, getFileStatus , getFileStatusBS , fileExists , modificationTime, fileSize, FileStatus , EpochTime, isDirectory, isRegularFile ) where import qualified Data.ByteString.Char8 as BS import Data.ByteString.Unsafe( unsafeUseAsCString ) import Foreign.Marshal.Alloc ( allocaBytes ) import Foreign.C.Error ( throwErrno, getErrno, eNOENT ) import Foreign.C.Types ( CTime, CInt ) import Foreign.Ptr ( Ptr ) import System.Posix.Internals ( CStat, c_fstat, c_stat, sizeof_stat , st_mode, st_size, st_mtime, s_isdir, s_isreg ) import System.Posix.Types ( Fd(..), CMode, EpochTime ) #if mingw32_HOST_OS && __GLASGOW_HASKELL__ >= 612 import Foreign.C.String( withCWString ) #else import Foreign.C.String ( withCString ) #endif #if mingw32_HOST_OS import Data.Int ( Int64 ) type FileOffset = Int64 lstat = c_stat #else import System.Posix.Types ( FileOffset ) import System.Posix.Internals( lstat ) #endif #if mingw32_HOST_OS && __GLASGOW_HASKELL__ >= 612 bsToPath s f = withCWString (BS.unpack s) f strToPath = withCWString #else bsToPath = unsafeUseAsCString strToPath = withCString #endif data FileStatus = FileStatus { fst_exists :: !Bool, fst_mode :: !CMode, fst_mtime :: !CTime, fst_size :: !FileOffset } getFdStatus :: Fd -> IO FileStatus getFdStatus (Fd fd) = do do_stat (c_fstat fd) do_stat :: (Ptr CStat -> IO CInt) -> IO FileStatus do_stat stat_func = do allocaBytes sizeof_stat $! \p -> do ret <- stat_func p if (ret == -1) then do err <- getErrno if (err == eNOENT) then return $! (FileStatus False 0 0 0) else throwErrno "do_stat" else do mode <- st_mode p mtime <- st_mtime p size <- st_size p return $! FileStatus True mode mtime size {-# INLINE do_stat #-} isDirectory :: FileStatus -> Bool isDirectory = s_isdir . fst_mode isRegularFile :: FileStatus -> Bool isRegularFile = s_isreg . fst_mode modificationTime :: FileStatus -> EpochTime modificationTime = fst_mtime fileSize :: FileStatus -> FileOffset fileSize = fst_size fileExists :: FileStatus -> Bool fileExists = fst_exists #include -- lstat is broken on win32 with at least GHC 6.10.3 getSymbolicLinkStatus :: FilePath -> IO FileStatus ##if mingw32_HOST_OS getSymbolicLinkStatus = getFileStatus ##else getSymbolicLinkStatus fp = do_stat (\p -> (fp `strToPath` (`lstat` p))) ##endif getFileStatus :: FilePath -> IO FileStatus getFileStatus fp = do_stat (\p -> (fp `strToPath` (`lstat` p))) -- | Requires NULL-terminated bytestring -> unsafe! Use with care. getFileStatusBS :: BS.ByteString -> IO FileStatus getFileStatusBS fp = do_stat (\p -> (fp `bsToPath` (`lstat` p))) {-# INLINE getFileStatusBS #-} hashed-storage-0.5.10/Bundled/sha2.h0000644000000000000000000000607012025701732015246 0ustar0000000000000000/* * FIPS 180-2 SHA-224/256/384/512 implementation * Last update: 02/02/2007 * Issue date: 04/30/2005 * * Copyright (C) 2005, 2007 Olivier Gay * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef SHA2_H #define SHA2_H #define SHA224_DIGEST_SIZE ( 224 / 8) #define SHA256_DIGEST_SIZE ( 256 / 8) #define SHA384_DIGEST_SIZE ( 384 / 8) #define SHA512_DIGEST_SIZE ( 512 / 8) #define SHA256_BLOCK_SIZE ( 512 / 8) #define SHA512_BLOCK_SIZE (1024 / 8) #define SHA384_BLOCK_SIZE SHA512_BLOCK_SIZE #define SHA224_BLOCK_SIZE SHA256_BLOCK_SIZE #ifndef SHA2_TYPES #define SHA2_TYPES typedef unsigned char uint8; typedef unsigned int uint32; typedef unsigned long long uint64; #endif #ifdef __cplusplus extern "C" { #endif typedef struct { unsigned int tot_len; unsigned int len; unsigned char block[2 * SHA256_BLOCK_SIZE]; uint32 h[8]; } sha256_ctx; typedef struct { unsigned int tot_len; unsigned int len; unsigned char block[2 * SHA512_BLOCK_SIZE]; uint64 h[8]; } sha512_ctx; typedef sha512_ctx sha384_ctx; typedef sha256_ctx sha224_ctx; void hashed_storage_sha224(const unsigned char *message, unsigned int len, unsigned char *digest); void hashed_storage_sha256(const unsigned char *message, unsigned int len, unsigned char *digest); void hashed_storage_sha384(const unsigned char *message, unsigned int len, unsigned char *digest); void hashed_storage_sha512(const unsigned char *message, unsigned int len, unsigned char *digest); #ifdef __cplusplus } #endif #endif /* !SHA2_H */ hashed-storage-0.5.10/Bundled/sha2.c0000644000000000000000000010066212025701732015243 0ustar0000000000000000/* * FIPS 180-2 SHA-224/256/384/512 implementation * Last update: 02/02/2007 * Issue date: 04/30/2005 * * Copyright (C) 2005, 2007 Olivier Gay * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #define UNROLL_LOOPS /* Enable loops unrolling */ #endif #include #include "sha2.h" #define SHFR(x, n) (x >> n) #define ROTR(x, n) ((x >> n) | (x << ((sizeof(x) << 3) - n))) #define ROTL(x, n) ((x << n) | (x >> ((sizeof(x) << 3) - n))) #define CH(x, y, z) ((x & y) ^ (~x & z)) #define MAJ(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) #define SHA256_F1(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) #define SHA256_F2(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) #define SHA256_F3(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHFR(x, 3)) #define SHA256_F4(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHFR(x, 10)) #define SHA512_F1(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39)) #define SHA512_F2(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41)) #define SHA512_F3(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHFR(x, 7)) #define SHA512_F4(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ SHFR(x, 6)) #define UNPACK32(x, str) \ { \ *((str) + 3) = (uint8) ((x) ); \ *((str) + 2) = (uint8) ((x) >> 8); \ *((str) + 1) = (uint8) ((x) >> 16); \ *((str) + 0) = (uint8) ((x) >> 24); \ } #define PACK32(str, x) \ { \ *(x) = ((uint32) *((str) + 3) ) \ | ((uint32) *((str) + 2) << 8) \ | ((uint32) *((str) + 1) << 16) \ | ((uint32) *((str) + 0) << 24); \ } #define UNPACK64(x, str) \ { \ *((str) + 7) = (uint8) ((x) ); \ *((str) + 6) = (uint8) ((x) >> 8); \ *((str) + 5) = (uint8) ((x) >> 16); \ *((str) + 4) = (uint8) ((x) >> 24); \ *((str) + 3) = (uint8) ((x) >> 32); \ *((str) + 2) = (uint8) ((x) >> 40); \ *((str) + 1) = (uint8) ((x) >> 48); \ *((str) + 0) = (uint8) ((x) >> 56); \ } #define PACK64(str, x) \ { \ *(x) = ((uint64) *((str) + 7) ) \ | ((uint64) *((str) + 6) << 8) \ | ((uint64) *((str) + 5) << 16) \ | ((uint64) *((str) + 4) << 24) \ | ((uint64) *((str) + 3) << 32) \ | ((uint64) *((str) + 2) << 40) \ | ((uint64) *((str) + 1) << 48) \ | ((uint64) *((str) + 0) << 56); \ } /* Macros used for loops unrolling */ #define SHA256_SCR(i) \ { \ w[i] = SHA256_F4(w[i - 2]) + w[i - 7] \ + SHA256_F3(w[i - 15]) + w[i - 16]; \ } #define SHA512_SCR(i) \ { \ w[i] = SHA512_F4(w[i - 2]) + w[i - 7] \ + SHA512_F3(w[i - 15]) + w[i - 16]; \ } #define SHA256_EXP(a, b, c, d, e, f, g, h, j) \ { \ t1 = wv[h] + SHA256_F2(wv[e]) + CH(wv[e], wv[f], wv[g]) \ + sha256_k[j] + w[j]; \ t2 = SHA256_F1(wv[a]) + MAJ(wv[a], wv[b], wv[c]); \ wv[d] += t1; \ wv[h] = t1 + t2; \ } #define SHA512_EXP(a, b, c, d, e, f, g ,h, j) \ { \ t1 = wv[h] + SHA512_F2(wv[e]) + CH(wv[e], wv[f], wv[g]) \ + sha512_k[j] + w[j]; \ t2 = SHA512_F1(wv[a]) + MAJ(wv[a], wv[b], wv[c]); \ wv[d] += t1; \ wv[h] = t1 + t2; \ } static uint32 sha224_h0[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; static uint32 sha256_h0[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; static uint64 sha384_h0[8] = {0xcbbb9d5dc1059ed8ULL, 0x629a292a367cd507ULL, 0x9159015a3070dd17ULL, 0x152fecd8f70e5939ULL, 0x67332667ffc00b31ULL, 0x8eb44a8768581511ULL, 0xdb0c2e0d64f98fa7ULL, 0x47b5481dbefa4fa4ULL}; static uint64 sha512_h0[8] = {0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL}; static uint32 sha256_k[64] = {0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; static uint64 sha512_k[80] = {0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL}; /* SHA-256 functions */ static void sha256_transf(sha256_ctx *ctx, const unsigned char *message, unsigned int block_nb) { uint32 w[64]; uint32 wv[8]; uint32 t1, t2; const unsigned char *sub_block; int i; #ifndef UNROLL_LOOPS int j; #endif for (i = 0; i < (int) block_nb; i++) { sub_block = message + (i << 6); #ifndef UNROLL_LOOPS for (j = 0; j < 16; j++) { PACK32(&sub_block[j << 2], &w[j]); } for (j = 16; j < 64; j++) { SHA256_SCR(j); } for (j = 0; j < 8; j++) { wv[j] = ctx->h[j]; } for (j = 0; j < 64; j++) { t1 = wv[7] + SHA256_F2(wv[4]) + CH(wv[4], wv[5], wv[6]) + sha256_k[j] + w[j]; t2 = SHA256_F1(wv[0]) + MAJ(wv[0], wv[1], wv[2]); wv[7] = wv[6]; wv[6] = wv[5]; wv[5] = wv[4]; wv[4] = wv[3] + t1; wv[3] = wv[2]; wv[2] = wv[1]; wv[1] = wv[0]; wv[0] = t1 + t2; } for (j = 0; j < 8; j++) { ctx->h[j] += wv[j]; } #else PACK32(&sub_block[ 0], &w[ 0]); PACK32(&sub_block[ 4], &w[ 1]); PACK32(&sub_block[ 8], &w[ 2]); PACK32(&sub_block[12], &w[ 3]); PACK32(&sub_block[16], &w[ 4]); PACK32(&sub_block[20], &w[ 5]); PACK32(&sub_block[24], &w[ 6]); PACK32(&sub_block[28], &w[ 7]); PACK32(&sub_block[32], &w[ 8]); PACK32(&sub_block[36], &w[ 9]); PACK32(&sub_block[40], &w[10]); PACK32(&sub_block[44], &w[11]); PACK32(&sub_block[48], &w[12]); PACK32(&sub_block[52], &w[13]); PACK32(&sub_block[56], &w[14]); PACK32(&sub_block[60], &w[15]); SHA256_SCR(16); SHA256_SCR(17); SHA256_SCR(18); SHA256_SCR(19); SHA256_SCR(20); SHA256_SCR(21); SHA256_SCR(22); SHA256_SCR(23); SHA256_SCR(24); SHA256_SCR(25); SHA256_SCR(26); SHA256_SCR(27); SHA256_SCR(28); SHA256_SCR(29); SHA256_SCR(30); SHA256_SCR(31); SHA256_SCR(32); SHA256_SCR(33); SHA256_SCR(34); SHA256_SCR(35); SHA256_SCR(36); SHA256_SCR(37); SHA256_SCR(38); SHA256_SCR(39); SHA256_SCR(40); SHA256_SCR(41); SHA256_SCR(42); SHA256_SCR(43); SHA256_SCR(44); SHA256_SCR(45); SHA256_SCR(46); SHA256_SCR(47); SHA256_SCR(48); SHA256_SCR(49); SHA256_SCR(50); SHA256_SCR(51); SHA256_SCR(52); SHA256_SCR(53); SHA256_SCR(54); SHA256_SCR(55); SHA256_SCR(56); SHA256_SCR(57); SHA256_SCR(58); SHA256_SCR(59); SHA256_SCR(60); SHA256_SCR(61); SHA256_SCR(62); SHA256_SCR(63); wv[0] = ctx->h[0]; wv[1] = ctx->h[1]; wv[2] = ctx->h[2]; wv[3] = ctx->h[3]; wv[4] = ctx->h[4]; wv[5] = ctx->h[5]; wv[6] = ctx->h[6]; wv[7] = ctx->h[7]; SHA256_EXP(0,1,2,3,4,5,6,7, 0); SHA256_EXP(7,0,1,2,3,4,5,6, 1); SHA256_EXP(6,7,0,1,2,3,4,5, 2); SHA256_EXP(5,6,7,0,1,2,3,4, 3); SHA256_EXP(4,5,6,7,0,1,2,3, 4); SHA256_EXP(3,4,5,6,7,0,1,2, 5); SHA256_EXP(2,3,4,5,6,7,0,1, 6); SHA256_EXP(1,2,3,4,5,6,7,0, 7); SHA256_EXP(0,1,2,3,4,5,6,7, 8); SHA256_EXP(7,0,1,2,3,4,5,6, 9); SHA256_EXP(6,7,0,1,2,3,4,5,10); SHA256_EXP(5,6,7,0,1,2,3,4,11); SHA256_EXP(4,5,6,7,0,1,2,3,12); SHA256_EXP(3,4,5,6,7,0,1,2,13); SHA256_EXP(2,3,4,5,6,7,0,1,14); SHA256_EXP(1,2,3,4,5,6,7,0,15); SHA256_EXP(0,1,2,3,4,5,6,7,16); SHA256_EXP(7,0,1,2,3,4,5,6,17); SHA256_EXP(6,7,0,1,2,3,4,5,18); SHA256_EXP(5,6,7,0,1,2,3,4,19); SHA256_EXP(4,5,6,7,0,1,2,3,20); SHA256_EXP(3,4,5,6,7,0,1,2,21); SHA256_EXP(2,3,4,5,6,7,0,1,22); SHA256_EXP(1,2,3,4,5,6,7,0,23); SHA256_EXP(0,1,2,3,4,5,6,7,24); SHA256_EXP(7,0,1,2,3,4,5,6,25); SHA256_EXP(6,7,0,1,2,3,4,5,26); SHA256_EXP(5,6,7,0,1,2,3,4,27); SHA256_EXP(4,5,6,7,0,1,2,3,28); SHA256_EXP(3,4,5,6,7,0,1,2,29); SHA256_EXP(2,3,4,5,6,7,0,1,30); SHA256_EXP(1,2,3,4,5,6,7,0,31); SHA256_EXP(0,1,2,3,4,5,6,7,32); SHA256_EXP(7,0,1,2,3,4,5,6,33); SHA256_EXP(6,7,0,1,2,3,4,5,34); SHA256_EXP(5,6,7,0,1,2,3,4,35); SHA256_EXP(4,5,6,7,0,1,2,3,36); SHA256_EXP(3,4,5,6,7,0,1,2,37); SHA256_EXP(2,3,4,5,6,7,0,1,38); SHA256_EXP(1,2,3,4,5,6,7,0,39); SHA256_EXP(0,1,2,3,4,5,6,7,40); SHA256_EXP(7,0,1,2,3,4,5,6,41); SHA256_EXP(6,7,0,1,2,3,4,5,42); SHA256_EXP(5,6,7,0,1,2,3,4,43); SHA256_EXP(4,5,6,7,0,1,2,3,44); SHA256_EXP(3,4,5,6,7,0,1,2,45); SHA256_EXP(2,3,4,5,6,7,0,1,46); SHA256_EXP(1,2,3,4,5,6,7,0,47); SHA256_EXP(0,1,2,3,4,5,6,7,48); SHA256_EXP(7,0,1,2,3,4,5,6,49); SHA256_EXP(6,7,0,1,2,3,4,5,50); SHA256_EXP(5,6,7,0,1,2,3,4,51); SHA256_EXP(4,5,6,7,0,1,2,3,52); SHA256_EXP(3,4,5,6,7,0,1,2,53); SHA256_EXP(2,3,4,5,6,7,0,1,54); SHA256_EXP(1,2,3,4,5,6,7,0,55); SHA256_EXP(0,1,2,3,4,5,6,7,56); SHA256_EXP(7,0,1,2,3,4,5,6,57); SHA256_EXP(6,7,0,1,2,3,4,5,58); SHA256_EXP(5,6,7,0,1,2,3,4,59); SHA256_EXP(4,5,6,7,0,1,2,3,60); SHA256_EXP(3,4,5,6,7,0,1,2,61); SHA256_EXP(2,3,4,5,6,7,0,1,62); SHA256_EXP(1,2,3,4,5,6,7,0,63); ctx->h[0] += wv[0]; ctx->h[1] += wv[1]; ctx->h[2] += wv[2]; ctx->h[3] += wv[3]; ctx->h[4] += wv[4]; ctx->h[5] += wv[5]; ctx->h[6] += wv[6]; ctx->h[7] += wv[7]; #endif /* !UNROLL_LOOPS */ } } static void sha256_init(sha256_ctx *ctx) { #ifndef UNROLL_LOOPS int i; for (i = 0; i < 8; i++) { ctx->h[i] = sha256_h0[i]; } #else ctx->h[0] = sha256_h0[0]; ctx->h[1] = sha256_h0[1]; ctx->h[2] = sha256_h0[2]; ctx->h[3] = sha256_h0[3]; ctx->h[4] = sha256_h0[4]; ctx->h[5] = sha256_h0[5]; ctx->h[6] = sha256_h0[6]; ctx->h[7] = sha256_h0[7]; #endif /* !UNROLL_LOOPS */ ctx->len = 0; ctx->tot_len = 0; } static void sha256_update(sha256_ctx *ctx, const unsigned char *message, unsigned int len) { unsigned int block_nb; unsigned int new_len, rem_len, tmp_len; const unsigned char *shifted_message; tmp_len = SHA256_BLOCK_SIZE - ctx->len; rem_len = len < tmp_len ? len : tmp_len; memcpy(&ctx->block[ctx->len], message, rem_len); if (ctx->len + len < SHA256_BLOCK_SIZE) { ctx->len += len; return; } new_len = len - rem_len; block_nb = new_len / SHA256_BLOCK_SIZE; shifted_message = message + rem_len; sha256_transf(ctx, ctx->block, 1); sha256_transf(ctx, shifted_message, block_nb); rem_len = new_len % SHA256_BLOCK_SIZE; memcpy(ctx->block, &shifted_message[block_nb << 6], rem_len); ctx->len = rem_len; ctx->tot_len += (block_nb + 1) << 6; } static void sha256_final(sha256_ctx *ctx, unsigned char *digest) { unsigned int block_nb; unsigned int pm_len; unsigned int len_b; #ifndef UNROLL_LOOPS int i; #endif block_nb = (1 + ((SHA256_BLOCK_SIZE - 9) < (ctx->len % SHA256_BLOCK_SIZE))); len_b = (ctx->tot_len + ctx->len) << 3; pm_len = block_nb << 6; memset(ctx->block + ctx->len, 0, pm_len - ctx->len); ctx->block[ctx->len] = 0x80; UNPACK32(len_b, ctx->block + pm_len - 4); sha256_transf(ctx, ctx->block, block_nb); #ifndef UNROLL_LOOPS for (i = 0 ; i < 8; i++) { UNPACK32(ctx->h[i], &digest[i << 2]); } #else UNPACK32(ctx->h[0], &digest[ 0]); UNPACK32(ctx->h[1], &digest[ 4]); UNPACK32(ctx->h[2], &digest[ 8]); UNPACK32(ctx->h[3], &digest[12]); UNPACK32(ctx->h[4], &digest[16]); UNPACK32(ctx->h[5], &digest[20]); UNPACK32(ctx->h[6], &digest[24]); UNPACK32(ctx->h[7], &digest[28]); #endif /* !UNROLL_LOOPS */ } void hashed_storage_sha256(const unsigned char *message, unsigned int len, unsigned char *digest) { sha256_ctx ctx; sha256_init(&ctx); sha256_update(&ctx, message, len); sha256_final(&ctx, digest); } /* SHA-512 functions */ static void sha512_transf(sha512_ctx *ctx, const unsigned char *message, unsigned int block_nb) { uint64 w[80]; uint64 wv[8]; uint64 t1, t2; const unsigned char *sub_block; int i, j; for (i = 0; i < (int) block_nb; i++) { sub_block = message + (i << 7); #ifndef UNROLL_LOOPS for (j = 0; j < 16; j++) { PACK64(&sub_block[j << 3], &w[j]); } for (j = 16; j < 80; j++) { SHA512_SCR(j); } for (j = 0; j < 8; j++) { wv[j] = ctx->h[j]; } for (j = 0; j < 80; j++) { t1 = wv[7] + SHA512_F2(wv[4]) + CH(wv[4], wv[5], wv[6]) + sha512_k[j] + w[j]; t2 = SHA512_F1(wv[0]) + MAJ(wv[0], wv[1], wv[2]); wv[7] = wv[6]; wv[6] = wv[5]; wv[5] = wv[4]; wv[4] = wv[3] + t1; wv[3] = wv[2]; wv[2] = wv[1]; wv[1] = wv[0]; wv[0] = t1 + t2; } for (j = 0; j < 8; j++) { ctx->h[j] += wv[j]; } #else PACK64(&sub_block[ 0], &w[ 0]); PACK64(&sub_block[ 8], &w[ 1]); PACK64(&sub_block[ 16], &w[ 2]); PACK64(&sub_block[ 24], &w[ 3]); PACK64(&sub_block[ 32], &w[ 4]); PACK64(&sub_block[ 40], &w[ 5]); PACK64(&sub_block[ 48], &w[ 6]); PACK64(&sub_block[ 56], &w[ 7]); PACK64(&sub_block[ 64], &w[ 8]); PACK64(&sub_block[ 72], &w[ 9]); PACK64(&sub_block[ 80], &w[10]); PACK64(&sub_block[ 88], &w[11]); PACK64(&sub_block[ 96], &w[12]); PACK64(&sub_block[104], &w[13]); PACK64(&sub_block[112], &w[14]); PACK64(&sub_block[120], &w[15]); SHA512_SCR(16); SHA512_SCR(17); SHA512_SCR(18); SHA512_SCR(19); SHA512_SCR(20); SHA512_SCR(21); SHA512_SCR(22); SHA512_SCR(23); SHA512_SCR(24); SHA512_SCR(25); SHA512_SCR(26); SHA512_SCR(27); SHA512_SCR(28); SHA512_SCR(29); SHA512_SCR(30); SHA512_SCR(31); SHA512_SCR(32); SHA512_SCR(33); SHA512_SCR(34); SHA512_SCR(35); SHA512_SCR(36); SHA512_SCR(37); SHA512_SCR(38); SHA512_SCR(39); SHA512_SCR(40); SHA512_SCR(41); SHA512_SCR(42); SHA512_SCR(43); SHA512_SCR(44); SHA512_SCR(45); SHA512_SCR(46); SHA512_SCR(47); SHA512_SCR(48); SHA512_SCR(49); SHA512_SCR(50); SHA512_SCR(51); SHA512_SCR(52); SHA512_SCR(53); SHA512_SCR(54); SHA512_SCR(55); SHA512_SCR(56); SHA512_SCR(57); SHA512_SCR(58); SHA512_SCR(59); SHA512_SCR(60); SHA512_SCR(61); SHA512_SCR(62); SHA512_SCR(63); SHA512_SCR(64); SHA512_SCR(65); SHA512_SCR(66); SHA512_SCR(67); SHA512_SCR(68); SHA512_SCR(69); SHA512_SCR(70); SHA512_SCR(71); SHA512_SCR(72); SHA512_SCR(73); SHA512_SCR(74); SHA512_SCR(75); SHA512_SCR(76); SHA512_SCR(77); SHA512_SCR(78); SHA512_SCR(79); wv[0] = ctx->h[0]; wv[1] = ctx->h[1]; wv[2] = ctx->h[2]; wv[3] = ctx->h[3]; wv[4] = ctx->h[4]; wv[5] = ctx->h[5]; wv[6] = ctx->h[6]; wv[7] = ctx->h[7]; j = 0; do { SHA512_EXP(0,1,2,3,4,5,6,7,j); j++; SHA512_EXP(7,0,1,2,3,4,5,6,j); j++; SHA512_EXP(6,7,0,1,2,3,4,5,j); j++; SHA512_EXP(5,6,7,0,1,2,3,4,j); j++; SHA512_EXP(4,5,6,7,0,1,2,3,j); j++; SHA512_EXP(3,4,5,6,7,0,1,2,j); j++; SHA512_EXP(2,3,4,5,6,7,0,1,j); j++; SHA512_EXP(1,2,3,4,5,6,7,0,j); j++; } while (j < 80); ctx->h[0] += wv[0]; ctx->h[1] += wv[1]; ctx->h[2] += wv[2]; ctx->h[3] += wv[3]; ctx->h[4] += wv[4]; ctx->h[5] += wv[5]; ctx->h[6] += wv[6]; ctx->h[7] += wv[7]; #endif /* !UNROLL_LOOPS */ } } static void sha512_init(sha512_ctx *ctx) { #ifndef UNROLL_LOOPS int i; for (i = 0; i < 8; i++) { ctx->h[i] = sha512_h0[i]; } #else ctx->h[0] = sha512_h0[0]; ctx->h[1] = sha512_h0[1]; ctx->h[2] = sha512_h0[2]; ctx->h[3] = sha512_h0[3]; ctx->h[4] = sha512_h0[4]; ctx->h[5] = sha512_h0[5]; ctx->h[6] = sha512_h0[6]; ctx->h[7] = sha512_h0[7]; #endif /* !UNROLL_LOOPS */ ctx->len = 0; ctx->tot_len = 0; } static void sha512_update(sha512_ctx *ctx, const unsigned char *message, unsigned int len) { unsigned int block_nb; unsigned int new_len, rem_len, tmp_len; const unsigned char *shifted_message; tmp_len = SHA512_BLOCK_SIZE - ctx->len; rem_len = len < tmp_len ? len : tmp_len; memcpy(&ctx->block[ctx->len], message, rem_len); if (ctx->len + len < SHA512_BLOCK_SIZE) { ctx->len += len; return; } new_len = len - rem_len; block_nb = new_len / SHA512_BLOCK_SIZE; shifted_message = message + rem_len; sha512_transf(ctx, ctx->block, 1); sha512_transf(ctx, shifted_message, block_nb); rem_len = new_len % SHA512_BLOCK_SIZE; memcpy(ctx->block, &shifted_message[block_nb << 7], rem_len); ctx->len = rem_len; ctx->tot_len += (block_nb + 1) << 7; } static void sha512_final(sha512_ctx *ctx, unsigned char *digest) { unsigned int block_nb; unsigned int pm_len; unsigned int len_b; #ifndef UNROLL_LOOPS int i; #endif block_nb = 1 + ((SHA512_BLOCK_SIZE - 17) < (ctx->len % SHA512_BLOCK_SIZE)); len_b = (ctx->tot_len + ctx->len) << 3; pm_len = block_nb << 7; memset(ctx->block + ctx->len, 0, pm_len - ctx->len); ctx->block[ctx->len] = 0x80; UNPACK32(len_b, ctx->block + pm_len - 4); sha512_transf(ctx, ctx->block, block_nb); #ifndef UNROLL_LOOPS for (i = 0 ; i < 8; i++) { UNPACK64(ctx->h[i], &digest[i << 3]); } #else UNPACK64(ctx->h[0], &digest[ 0]); UNPACK64(ctx->h[1], &digest[ 8]); UNPACK64(ctx->h[2], &digest[16]); UNPACK64(ctx->h[3], &digest[24]); UNPACK64(ctx->h[4], &digest[32]); UNPACK64(ctx->h[5], &digest[40]); UNPACK64(ctx->h[6], &digest[48]); UNPACK64(ctx->h[7], &digest[56]); #endif /* !UNROLL_LOOPS */ } void hashed_storage_sha512(const unsigned char *message, unsigned int len, unsigned char *digest) { sha512_ctx ctx; sha512_init(&ctx); sha512_update(&ctx, message, len); sha512_final(&ctx, digest); } /* SHA-384 functions */ static void sha384_init(sha384_ctx *ctx) { #ifndef UNROLL_LOOPS int i; for (i = 0; i < 8; i++) { ctx->h[i] = sha384_h0[i]; } #else ctx->h[0] = sha384_h0[0]; ctx->h[1] = sha384_h0[1]; ctx->h[2] = sha384_h0[2]; ctx->h[3] = sha384_h0[3]; ctx->h[4] = sha384_h0[4]; ctx->h[5] = sha384_h0[5]; ctx->h[6] = sha384_h0[6]; ctx->h[7] = sha384_h0[7]; #endif /* !UNROLL_LOOPS */ ctx->len = 0; ctx->tot_len = 0; } static void sha384_update(sha384_ctx *ctx, const unsigned char *message, unsigned int len) { unsigned int block_nb; unsigned int new_len, rem_len, tmp_len; const unsigned char *shifted_message; tmp_len = SHA384_BLOCK_SIZE - ctx->len; rem_len = len < tmp_len ? len : tmp_len; memcpy(&ctx->block[ctx->len], message, rem_len); if (ctx->len + len < SHA384_BLOCK_SIZE) { ctx->len += len; return; } new_len = len - rem_len; block_nb = new_len / SHA384_BLOCK_SIZE; shifted_message = message + rem_len; sha512_transf(ctx, ctx->block, 1); sha512_transf(ctx, shifted_message, block_nb); rem_len = new_len % SHA384_BLOCK_SIZE; memcpy(ctx->block, &shifted_message[block_nb << 7], rem_len); ctx->len = rem_len; ctx->tot_len += (block_nb + 1) << 7; } static void sha384_final(sha384_ctx *ctx, unsigned char *digest) { unsigned int block_nb; unsigned int pm_len; unsigned int len_b; #ifndef UNROLL_LOOPS int i; #endif block_nb = (1 + ((SHA384_BLOCK_SIZE - 17) < (ctx->len % SHA384_BLOCK_SIZE))); len_b = (ctx->tot_len + ctx->len) << 3; pm_len = block_nb << 7; memset(ctx->block + ctx->len, 0, pm_len - ctx->len); ctx->block[ctx->len] = 0x80; UNPACK32(len_b, ctx->block + pm_len - 4); sha512_transf(ctx, ctx->block, block_nb); #ifndef UNROLL_LOOPS for (i = 0 ; i < 6; i++) { UNPACK64(ctx->h[i], &digest[i << 3]); } #else UNPACK64(ctx->h[0], &digest[ 0]); UNPACK64(ctx->h[1], &digest[ 8]); UNPACK64(ctx->h[2], &digest[16]); UNPACK64(ctx->h[3], &digest[24]); UNPACK64(ctx->h[4], &digest[32]); UNPACK64(ctx->h[5], &digest[40]); #endif /* !UNROLL_LOOPS */ } void hashed_storage_sha384(const unsigned char *message, unsigned int len, unsigned char *digest) { sha384_ctx ctx; sha384_init(&ctx); sha384_update(&ctx, message, len); sha384_final(&ctx, digest); } /* SHA-224 functions */ static void sha224_init(sha224_ctx *ctx) { #ifndef UNROLL_LOOPS int i; for (i = 0; i < 8; i++) { ctx->h[i] = sha224_h0[i]; } #else ctx->h[0] = sha224_h0[0]; ctx->h[1] = sha224_h0[1]; ctx->h[2] = sha224_h0[2]; ctx->h[3] = sha224_h0[3]; ctx->h[4] = sha224_h0[4]; ctx->h[5] = sha224_h0[5]; ctx->h[6] = sha224_h0[6]; ctx->h[7] = sha224_h0[7]; #endif /* !UNROLL_LOOPS */ ctx->len = 0; ctx->tot_len = 0; } static void sha224_update(sha224_ctx *ctx, const unsigned char *message, unsigned int len) { unsigned int block_nb; unsigned int new_len, rem_len, tmp_len; const unsigned char *shifted_message; tmp_len = SHA224_BLOCK_SIZE - ctx->len; rem_len = len < tmp_len ? len : tmp_len; memcpy(&ctx->block[ctx->len], message, rem_len); if (ctx->len + len < SHA224_BLOCK_SIZE) { ctx->len += len; return; } new_len = len - rem_len; block_nb = new_len / SHA224_BLOCK_SIZE; shifted_message = message + rem_len; sha256_transf(ctx, ctx->block, 1); sha256_transf(ctx, shifted_message, block_nb); rem_len = new_len % SHA224_BLOCK_SIZE; memcpy(ctx->block, &shifted_message[block_nb << 6], rem_len); ctx->len = rem_len; ctx->tot_len += (block_nb + 1) << 6; } static void sha224_final(sha224_ctx *ctx, unsigned char *digest) { unsigned int block_nb; unsigned int pm_len; unsigned int len_b; #ifndef UNROLL_LOOPS int i; #endif block_nb = (1 + ((SHA224_BLOCK_SIZE - 9) < (ctx->len % SHA224_BLOCK_SIZE))); len_b = (ctx->tot_len + ctx->len) << 3; pm_len = block_nb << 6; memset(ctx->block + ctx->len, 0, pm_len - ctx->len); ctx->block[ctx->len] = 0x80; UNPACK32(len_b, ctx->block + pm_len - 4); sha256_transf(ctx, ctx->block, block_nb); #ifndef UNROLL_LOOPS for (i = 0 ; i < 7; i++) { UNPACK32(ctx->h[i], &digest[i << 2]); } #else UNPACK32(ctx->h[0], &digest[ 0]); UNPACK32(ctx->h[1], &digest[ 4]); UNPACK32(ctx->h[2], &digest[ 8]); UNPACK32(ctx->h[3], &digest[12]); UNPACK32(ctx->h[4], &digest[16]); UNPACK32(ctx->h[5], &digest[20]); UNPACK32(ctx->h[6], &digest[24]); #endif /* !UNROLL_LOOPS */ } void hashed_storage_sha224(const unsigned char *message, unsigned int len, unsigned char *digest) { sha224_ctx ctx; sha224_init(&ctx); sha224_update(&ctx, message, len); sha224_final(&ctx, digest); } #ifdef TEST_VECTORS /* FIPS 180-2 Validation tests */ #include #include void test(const unsigned char *vector, unsigned char *digest, unsigned int digest_size) { unsigned char output[2 * SHA512_DIGEST_SIZE + 1]; int i; output[2 * digest_size] = '\0'; for (i = 0; i < (int) digest_size ; i++) { sprintf((char *) output + 2 * i, "%02x", digest[i]); } printf("H: %s\n", output); if (strcmp((char *) vector, (char *) output)) { fprintf(stderr, "Test failed.\n"); exit(EXIT_FAILURE); } } int main() { static const unsigned char *vectors[4][3] = { /* SHA-224 */ { "23097d223405d8228642a477bda255b32aadbce4bda0b3f7e36c9da7", "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525", "20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67", }, /* SHA-256 */ { "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1", "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0", }, /* SHA-384 */ { "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed" "8086072ba1e7cc2358baeca134c825a7", "09330c33f71147e83d192fc782cd1b4753111b173b3b05d22fa08086e3b0f712" "fcc7c71a557e2db966c3e9fa91746039", "9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b" "07b8b3dc38ecc4ebae97ddd87f3d8985", }, /* SHA-512 */ { "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a" "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f", "8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299aeadb6889018" "501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26545e96e55b874be909", "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb" "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b" } }; static const unsigned char message1[] = "abc"; static const unsigned char message2a[] = "abcdbcdecdefdefgefghfghighijhi" "jkijkljklmklmnlmnomnopnopq"; static const unsigned char message2b[] = "abcdefghbcdefghicdefghijdefghijkefghij" "klfghijklmghijklmnhijklmnoijklmnopjklm" "nopqklmnopqrlmnopqrsmnopqrstnopqrstu"; unsigned char *message3; unsigned int message3_len = 1000000; unsigned char digest[SHA512_DIGEST_SIZE]; message3 = malloc(message3_len); if (message3 == NULL) { fprintf(stderr, "Can't allocate memory\n"); return -1; } memset(message3, 'a', message3_len); printf("SHA-2 FIPS 180-2 Validation tests\n\n"); printf("SHA-224 Test vectors\n"); sha224(message1, strlen((char *) message1), digest); test(vectors[0][0], digest, SHA224_DIGEST_SIZE); sha224(message2a, strlen((char *) message2a), digest); test(vectors[0][1], digest, SHA224_DIGEST_SIZE); sha224(message3, message3_len, digest); test(vectors[0][2], digest, SHA224_DIGEST_SIZE); printf("\n"); printf("SHA-256 Test vectors\n"); sha256(message1, strlen((char *) message1), digest); test(vectors[1][0], digest, SHA256_DIGEST_SIZE); sha256(message2a, strlen((char *) message2a), digest); test(vectors[1][1], digest, SHA256_DIGEST_SIZE); sha256(message3, message3_len, digest); test(vectors[1][2], digest, SHA256_DIGEST_SIZE); printf("\n"); printf("SHA-384 Test vectors\n"); sha384(message1, strlen((char *) message1), digest); test(vectors[2][0], digest, SHA384_DIGEST_SIZE); sha384(message2b, strlen((char *) message2b), digest); test(vectors[2][1], digest, SHA384_DIGEST_SIZE); sha384(message3, message3_len, digest); test(vectors[2][2], digest, SHA384_DIGEST_SIZE); printf("\n"); printf("SHA-512 Test vectors\n"); sha512(message1, strlen((char *) message1), digest); test(vectors[3][0], digest, SHA512_DIGEST_SIZE); sha512(message2b, strlen((char *) message2b), digest); test(vectors[3][1], digest, SHA512_DIGEST_SIZE); sha512(message3, message3_len, digest); test(vectors[3][2], digest, SHA512_DIGEST_SIZE); printf("\n"); printf("All tests passed.\n"); return 0; } #endif /* TEST_VECTORS */ hashed-storage-0.5.10/Bundled/SHA256.hs0000644000000000000000000000157212025701732015446 0ustar0000000000000000{-# LANGUAGE CPP, ForeignFunctionInterface #-} -- Private interface to a C implementation of SHA 256. Originally based on code -- by Zooko O'Whielacronx, but rewritten since. Therefore, BSD applies, as for -- the rest of hashed-storage. module Bundled.SHA256 ( sha256 ) where import Foreign import Foreign.C.Types import Data.ByteString.Unsafe (unsafeUseAsCStringLen) import qualified Data.ByteString.Internal as BSI sha256 :: BSI.ByteString -> BSI.ByteString sha256 p = unsafePerformIO $ do digest <- BSI.create 32 $ \digest -> unsafeUseAsCStringLen p $ \(ptr,n) -> c_sha256 ptr (fromIntegral n) digest return $! digest -- void sha256sum(const unsigned char *d, size_t n, unsigned char *md); foreign import ccall unsafe "sha2.h hashed_storage_sha256" c_sha256 :: Ptr CChar -> CSize -> Ptr Word8 -> IO ()