tar-0.5.1.1/0000755000000000000000000000000007346545000010633 5ustar0000000000000000tar-0.5.1.1/Codec/Archive/0000755000000000000000000000000007346545000013231 5ustar0000000000000000tar-0.5.1.1/Codec/Archive/Tar.hs0000644000000000000000000002457307346545000014326 0ustar0000000000000000{-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2012 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Reading, writing and manipulating \"@.tar@\" archive files. -- -- This module uses common names and so is designed to be imported qualified: -- -- > import qualified Codec.Archive.Tar as Tar -- ----------------------------------------------------------------------------- module Codec.Archive.Tar ( -- | Tar archive files are used to store a collection of other files in a -- single file. They consists of a sequence of entries. Each entry describes -- a file or directory (or some other special kind of file). The entry stores -- a little bit of meta-data, in particular the file or directory name. -- -- Unlike some other archive formats, a tar file contains no index. The -- information about each entry is stored next to the entry. Because of this, -- tar files are almost always processed linearly rather than in a -- random-access fashion. -- -- The functions in this package are designed for working on tar files -- linearly and lazily. This makes it possible to do many operations in -- constant space rather than having to load the entire archive into memory. -- -- It can read and write standard POSIX tar files and also the GNU and old -- Unix V7 tar formats. The convenience functions that are provided in the -- "Codec.Archive.Tar.Entry" module for creating archive entries are -- primarily designed for standard portable archives. If you need to -- construct GNU format archives or exactly preserve file ownership and -- permissions then you will need to write some extra helper functions. -- -- This module contains just the simple high level operations without -- exposing the all the details of tar files. If you need to inspect tar -- entries in more detail or construct them directly then you also need -- the module "Codec.Archive.Tar.Entry". -- * High level \"all in one\" operations create, extract, append, -- * Notes -- ** Compressed tar archives -- | Tar files are commonly used in conjunction with gzip compression, as in -- \"@.tar.gz@\" or \"@.tar.bz2@\" files. This module does not directly -- handle compressed tar files however they can be handled easily by -- composing functions from this module and the modules -- @Codec.Compression.GZip@ or @Codec.Compression.BZip@ -- (see @zlib@ or @bzlib@ packages). -- -- Creating a compressed \"@.tar.gz@\" file is just a minor variation on the -- 'create' function, but where throw compression into the pipeline: -- -- > BS.writeFile tar . GZip.compress . Tar.write =<< Tar.pack base dir -- -- Similarly, extracting a compressed \"@.tar.gz@\" is just a minor variation -- on the 'extract' function where we use decompression in the pipeline: -- -- > Tar.unpack dir . Tar.read . GZip.decompress =<< BS.readFile tar -- -- ** Security -- | This is pretty important. A maliciously constructed tar archives could -- contain entries that specify bad file names. It could specify absolute -- file names like \"@\/etc\/passwd@\" or relative files outside of the -- archive like \"..\/..\/..\/something\". This security problem is commonly -- called a \"directory traversal vulnerability\". Historically, such -- vulnerabilities have been common in packages handling tar archives. -- -- The 'extract' and 'unpack' functions check for bad file names. See the -- 'checkSecurity' function for more details. If you need to do any custom -- unpacking then you should use this. -- ** Tarbombs -- | A \"tarbomb\" is a @.tar@ file where not all entries are in a -- subdirectory but instead files extract into the top level directory. The -- 'extract' function does not check for these however if you want to do -- that you can use the 'checkTarbomb' function like so: -- -- > Tar.unpack dir . Tar.checkTarbomb expectedDir -- > . Tar.read =<< BS.readFile tar -- -- In this case extraction will fail if any file is outside of @expectedDir@. -- * Converting between internal and external representation -- | Note, you cannot expect @write . read@ to give exactly the same output -- as input. You can expect the information to be preserved exactly however. -- This is because 'read' accepts common format variations while 'write' -- produces the standard format. read, write, -- * Packing and unpacking files to\/from internal representation -- | These functions are for packing and unpacking portable archives. They -- are not suitable in cases where it is important to preserve file ownership -- and permissions or to archive special files like named pipes and Unix -- device files. pack, unpack, -- * Types -- ** Tar entry type -- | This module provides only very simple and limited read-only access to -- the 'Entry' type. If you need access to the details or if you need to -- construct your own entries then also import "Codec.Archive.Tar.Entry". Entry, entryPath, entryContent, EntryContent(..), -- ** Sequences of tar entries Entries(..), mapEntries, mapEntriesNoFail, foldEntries, foldlEntries, unfoldEntries, -- * Error handling -- | Reading tar files can fail if the data does not match the tar file -- format correctly. -- -- The style of error handling by returning structured errors. The pure -- functions in the library do not throw exceptions, they return the errors -- as data. The IO actions in the library can throw exceptions, in particular -- the 'unpack' action does this. All the error types used are an instance of -- the standard 'Exception' class so it is possible to 'throw' and 'catch' -- them. -- ** Errors from reading tar files FormatError(..), #ifdef TESTS prop_write_read_ustar, prop_write_read_gnu, prop_write_read_v7, #endif ) where import Codec.Archive.Tar.Types import Codec.Archive.Tar.Read import Codec.Archive.Tar.Write import Codec.Archive.Tar.Pack import Codec.Archive.Tar.Unpack import Codec.Archive.Tar.Index (hSeekEndEntryOffset) import Codec.Archive.Tar.Check import Control.Exception (Exception, throw, catch) import qualified Data.ByteString.Lazy as BS import System.IO (withFile, IOMode(..)) import Prelude hiding (read) -- | Create a new @\".tar\"@ file from a directory of files. -- -- It is equivalent to calling the standard @tar@ program like so: -- -- @$ tar -f tarball.tar -C base -c dir@ -- -- This assumes a directory @.\/base\/dir@ with files inside, eg -- @.\/base\/dir\/foo.txt@. The file names inside the resulting tar file will be -- relative to @dir@, eg @dir\/foo.txt@. -- -- This is a high level \"all in one\" operation. Since you may need variations -- on this function it is instructive to see how it is written. It is just: -- -- > BS.writeFile tar . Tar.write =<< Tar.pack base paths -- -- Notes: -- -- The files and directories must not change during this operation or the -- result is not well defined. -- -- The intention of this function is to create tarballs that are portable -- between systems. It is /not/ suitable for doing file system backups because -- file ownership and permissions are not fully preserved. File ownership is -- not preserved at all. File permissions are set to simple portable values: -- -- * @rw-r--r--@ for normal files -- -- * @rwxr-xr-x@ for executable files -- -- * @rwxr-xr-x@ for directories -- create :: FilePath -- ^ Path of the \".tar\" file to write. -> FilePath -- ^ Base directory -> [FilePath] -- ^ Files and directories to archive, relative to base dir -> IO () create tar base paths = BS.writeFile tar . write =<< pack base paths -- | Extract all the files contained in a @\".tar\"@ file. -- -- It is equivalent to calling the standard @tar@ program like so: -- -- @$ tar -x -f tarball.tar -C dir@ -- -- So for example if the @tarball.tar@ file contains @foo\/bar.txt@ then this -- will extract it to @dir\/foo\/bar.txt@. -- -- This is a high level \"all in one\" operation. Since you may need variations -- on this function it is instructive to see how it is written. It is just: -- -- > Tar.unpack dir . Tar.read =<< BS.readFile tar -- -- Notes: -- -- Extracting can fail for a number of reasons. The tarball may be incorrectly -- formatted. There may be IO or permission errors. In such cases an exception -- will be thrown and extraction will not continue. -- -- Since the extraction may fail part way through it is not atomic. For this -- reason you may want to extract into an empty directory and, if the -- extraction fails, recursively delete the directory. -- -- Security: only files inside the target directory will be written. Tarballs -- containing entries that point outside of the tarball (either absolute paths -- or relative paths) will be caught and an exception will be thrown. -- extract :: FilePath -- ^ Destination directory -> FilePath -- ^ Tarball -> IO () extract dir tar = unpack dir . read =<< BS.readFile tar -- | Append new entries to a @\".tar\"@ file from a directory of files. -- -- This is much like 'create', except that all the entries are added to the -- end of an existing tar file. Or if the file does not already exists then -- it behaves the same as 'create'. -- append :: FilePath -- ^ Path of the \".tar\" file to write. -> FilePath -- ^ Base directory -> [FilePath] -- ^ Files and directories to archive, relative to base dir -> IO () append tar base paths = withFile tar ReadWriteMode $ \hnd -> do _ <- hSeekEndEntryOffset hnd Nothing BS.hPut hnd . write =<< pack base paths ------------------------- -- Correctness properties -- #ifdef TESTS prop_write_read_ustar :: [Entry] -> Bool prop_write_read_ustar entries = foldr Next Done entries' == read (write entries') where entries' = [ e { entryFormat = UstarFormat } | e <- entries ] prop_write_read_gnu :: [Entry] -> Bool prop_write_read_gnu entries = foldr Next Done entries' == read (write entries') where entries' = [ e { entryFormat = GnuFormat } | e <- entries ] prop_write_read_v7 :: [Entry] -> Bool prop_write_read_v7 entries = foldr Next Done entries' == read (write entries') where entries' = [ limitToV7FormatCompat e { entryFormat = V7Format } | e <- entries ] #endif tar-0.5.1.1/Codec/Archive/Tar/0000755000000000000000000000000007346545000013757 5ustar0000000000000000tar-0.5.1.1/Codec/Archive/Tar/Check.hs0000644000000000000000000002045307346545000015334 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar -- Copyright : (c) 2008-2012 Duncan Coutts -- 2011 Max Bolingbroke -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Perform various checks on tar file entries. -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Check ( -- * Security checkSecurity, FileNameError(..), -- * Tarbombs checkTarbomb, TarBombError(..), -- * Portability checkPortability, PortabilityError(..), PortabilityPlatform, ) where import Codec.Archive.Tar.Types import Data.Typeable (Typeable) import Control.Exception (Exception) import Control.Monad (MonadPlus(mplus)) import qualified System.FilePath as FilePath.Native ( splitDirectories, isAbsolute, isValid ) import qualified System.FilePath.Windows as FilePath.Windows import qualified System.FilePath.Posix as FilePath.Posix -------------------------- -- Security -- -- | This function checks a sequence of tar entries for file name security -- problems. It checks that: -- -- * file paths are not absolute -- -- * file paths do not contain any path components that are \"@..@\" -- -- * file names are valid -- -- These checks are from the perspective of the current OS. That means we check -- for \"@C:\blah@\" files on Windows and \"\/blah\" files on Unix. For archive -- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the -- link target. A failure in any entry terminates the sequence of entries with -- an error. -- checkSecurity :: Entries e -> Entries (Either e FileNameError) checkSecurity = checkEntries checkEntrySecurity checkEntrySecurity :: Entry -> Maybe FileNameError checkEntrySecurity entry = case entryContent entry of HardLink link -> check (entryPath entry) `mplus` check (fromLinkTarget link) SymbolicLink link -> check (entryPath entry) `mplus` check (fromLinkTarget link) _ -> check (entryPath entry) where check name | FilePath.Native.isAbsolute name = Just $ AbsoluteFileName name | not (FilePath.Native.isValid name) = Just $ InvalidFileName name | any (=="..") (FilePath.Native.splitDirectories name) = Just $ InvalidFileName name | otherwise = Nothing -- | Errors arising from tar file names being in some way invalid or dangerous data FileNameError = InvalidFileName FilePath | AbsoluteFileName FilePath deriving (Typeable) instance Show FileNameError where show = showFileNameError Nothing instance Exception FileNameError showFileNameError :: Maybe PortabilityPlatform -> FileNameError -> String showFileNameError mb_plat err = case err of InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path where plat = maybe "" (' ':) mb_plat -------------------------- -- Tarbombs -- -- | This function checks a sequence of tar entries for being a \"tar bomb\". -- This means that the tar file does not follow the standard convention that -- all entries are within a single subdirectory, e.g. a file \"foo.tar\" would -- usually have all entries within the \"foo/\" subdirectory. -- -- Given the expected subdirectory, this function checks all entries are within -- that subdirectroy. -- -- Note: This check must be used in conjunction with 'checkSecurity' -- (or 'checkPortability'). -- checkTarbomb :: FilePath -> Entries e -> Entries (Either e TarBombError) checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir) checkEntryTarbomb :: FilePath -> Entry -> Maybe TarBombError checkEntryTarbomb _ entry | nonFilesystemEntry = Nothing where -- Ignore some special entries we will not unpack anyway nonFilesystemEntry = case entryContent entry of OtherEntryType 'g' _ _ -> True --PAX global header OtherEntryType 'x' _ _ -> True --PAX individual header _ -> False checkEntryTarbomb expectedTopDir entry = case FilePath.Native.splitDirectories (entryPath entry) of (topDir:_) | topDir == expectedTopDir -> Nothing _ -> Just $ TarBombError expectedTopDir -- | An error that occurs if a tar file is a \"tar bomb\" that would extract -- files outside of the intended directory. data TarBombError = TarBombError FilePath deriving (Typeable) instance Exception TarBombError instance Show TarBombError where show (TarBombError expectedTopDir) = "File in tar archive is not in the expected directory " ++ show expectedTopDir -------------------------- -- Portability -- -- | This function checks a sequence of tar entries for a number of portability -- issues. It will complain if: -- -- * The old \"Unix V7\" or \"gnu\" formats are used. For maximum portability -- only the POSIX standard \"ustar\" format should be used. -- -- * A non-portable entry type is used. Only ordinary files, hard links, -- symlinks and directories are portable. Device files, pipes and others are -- not portable between all common operating systems. -- -- * Non-ASCII characters are used in file names. There is no agreed portable -- convention for Unicode or other extended character sets in file names in -- tar archives. -- -- * File names that would not be portable to both Unix and Windows. This check -- includes characters that are valid in both systems and the \'/\' vs \'\\\' -- directory separator conventions. -- checkPortability :: Entries e -> Entries (Either e PortabilityError) checkPortability = checkEntries checkEntryPortability checkEntryPortability :: Entry -> Maybe PortabilityError checkEntryPortability entry | entryFormat entry `elem` [V7Format, GnuFormat] = Just $ NonPortableFormat (entryFormat entry) | not (portableFileType (entryContent entry)) = Just NonPortableFileType | not (all portableChar posixPath) = Just $ NonPortableEntryNameChar posixPath | not (FilePath.Posix.isValid posixPath) = Just $ NonPortableFileName "unix" (InvalidFileName posixPath) | not (FilePath.Windows.isValid windowsPath) = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath) | FilePath.Posix.isAbsolute posixPath = Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath) | FilePath.Windows.isAbsolute windowsPath = Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath) | any (=="..") (FilePath.Posix.splitDirectories posixPath) = Just $ NonPortableFileName "unix" (InvalidFileName posixPath) | any (=="..") (FilePath.Windows.splitDirectories windowsPath) = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath) | otherwise = Nothing where tarPath = entryTarPath entry posixPath = fromTarPathToPosixPath tarPath windowsPath = fromTarPathToWindowsPath tarPath portableFileType ftype = case ftype of NormalFile {} -> True HardLink {} -> True SymbolicLink {} -> True Directory -> True _ -> False portableChar c = c <= '\127' -- | Portability problems in a tar archive data PortabilityError = NonPortableFormat Format | NonPortableFileType | NonPortableEntryNameChar FilePath | NonPortableFileName PortabilityPlatform FileNameError deriving (Typeable) -- | The name of a platform that portability issues arise from type PortabilityPlatform = String instance Exception PortabilityError instance Show PortabilityError where show (NonPortableFormat format) = "Archive is in the " ++ fmt ++ " format" where fmt = case format of V7Format -> "old Unix V7 tar" UstarFormat -> "ustar" -- I never generate this but a user might GnuFormat -> "GNU tar" show NonPortableFileType = "Non-portable file type in archive" show (NonPortableEntryNameChar posixPath) = "Non-portable character in archive entry name: " ++ show posixPath show (NonPortableFileName platform err) = showFileNameError (Just platform) err -------------------------- -- Utils -- checkEntries :: (Entry -> Maybe e') -> Entries e -> Entries (Either e e') checkEntries checkEntry = mapEntries (\entry -> maybe (Right entry) Left (checkEntry entry)) tar-0.5.1.1/Codec/Archive/Tar/Entry.hs0000644000000000000000000000423107346545000015414 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar.Entry -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Types and functions to manipulate tar entries. -- -- While the "Codec.Archive.Tar" module provides only the simple high level -- API, this module provides full access to the details of tar entries. This -- lets you inspect all the meta-data, construct entries and handle error cases -- more precisely. -- -- This module uses common names and so is designed to be imported qualified: -- -- > import qualified Codec.Archive.Tar as Tar -- > import qualified Codec.Archive.Tar.Entry as Tar -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Entry ( -- * Tar entry and associated types Entry(..), --TODO: should be the following with the Entry constructor not exported, -- but haddock cannot document that properly -- see http://trac.haskell.org/haddock/ticket/3 --Entry(filePath, fileMode, ownerId, groupId, fileSize, modTime, -- fileType, linkTarget, headerExt, fileContent), entryPath, EntryContent(..), Ownership(..), FileSize, Permissions, EpochTime, DevMajor, DevMinor, TypeCode, Format(..), -- * Constructing simple entry values simpleEntry, fileEntry, directoryEntry, -- * Standard file permissions -- | For maximum portability when constructing archives use only these file -- permissions. ordinaryFilePermissions, executableFilePermissions, directoryPermissions, -- * Constructing entries from disk files packFileEntry, packDirectoryEntry, getDirectoryContentsRecursive, -- * TarPath type TarPath, toTarPath, fromTarPath, fromTarPathToPosixPath, fromTarPathToWindowsPath, -- * LinkTarget type LinkTarget, toLinkTarget, fromLinkTarget, fromLinkTargetToPosixPath, fromLinkTargetToWindowsPath, ) where import Codec.Archive.Tar.Types import Codec.Archive.Tar.Pack tar-0.5.1.1/Codec/Archive/Tar/Index.hs0000644000000000000000000007135307346545000015373 0ustar0000000000000000{-# LANGUAGE CPP, BangPatterns, PatternGuards #-} {-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar.Index -- Copyright : (c) 2010-2015 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Random access to the content of a @.tar@ archive. -- -- This module uses common names and so is designed to be imported qualified: -- -- > import qualified Codec.Archive.Tar.Index as TarIndex -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Index ( -- | The @tar@ format does not contain an index of files within the -- archive. Normally, @tar@ file have to be processed linearly. It is -- sometimes useful however to be able to get random access to files -- within the archive. -- -- This module provides an index of a @tar@ file. A linear pass of the -- @tar@ file is needed to 'build' the 'TarIndex', but thereafter you can -- 'lookup' paths in the @tar@ file, and then use 'hReadEntry' to -- seek to the right part of the file and read the entry. -- -- An index cannot be used to lookup 'Directory' entries in a tar file; -- instead, you will get 'TarDir' entry listing all the entries in the -- directory. -- * Index type TarIndex, -- * Index lookup lookup, TarIndexEntry(..), toList, -- ** I\/O operations TarEntryOffset, hReadEntry, hReadEntryHeader, -- * Index construction build, -- ** Incremental construction -- $incremental-construction IndexBuilder, empty, addNextEntry, skipNextEntry, finalise, unfinalise, -- * Serialising indexes serialise, deserialise, -- * Lower level operations with offsets and I\/O on tar files hReadEntryHeaderOrEof, hSeekEntryOffset, hSeekEntryContentOffset, hSeekEndEntryOffset, nextEntryOffset, indexEndEntryOffset, indexNextEntryOffset, -- * Deprecated aliases emptyIndex, finaliseIndex, #ifdef TESTS prop_lookup, prop_toList, prop_valid, prop_serialise_deserialise, prop_serialiseSize, prop_index_matches_tar, prop_finalise_unfinalise, #endif ) where import Data.Typeable (Typeable) import Codec.Archive.Tar.Types as Tar import Codec.Archive.Tar.Read as Tar import qualified Codec.Archive.Tar.Index.StringTable as StringTable import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder) import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder) import qualified System.FilePath.Posix as FilePath import Data.Monoid (Monoid(..)) #if (MIN_VERSION_base(4,5,0)) import Data.Monoid ((<>)) #endif import Data.Word import Data.Int import Data.Bits import qualified Data.Array.Unboxed as A import Prelude hiding (lookup) import System.IO import Control.Exception (assert, throwIO) import Control.DeepSeq import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Unsafe as BS #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) import Data.ByteString.Builder as BS import Data.ByteString.Builder.Extra as BS (toLazyByteStringWith, untrimmedStrategy) #else import Data.ByteString.Lazy.Builder as BS import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith, untrimmedStrategy) #endif #ifdef TESTS import qualified Prelude import Test.QuickCheck import Test.QuickCheck.Property (ioProperty) import Control.Applicative ((<$>), (<*>)) import Control.Monad (unless) import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf) import Data.Maybe import Data.Function (on) import Control.Exception (SomeException, try) import Codec.Archive.Tar.Write as Tar import qualified Data.ByteString.Handle as HBS #endif -- | An index of the entries in a tar file. -- -- This index type is designed to be quite compact and suitable to store either -- on disk or in memory. -- data TarIndex = TarIndex -- As an example of how the mapping works, consider these example files: -- "foo/bar.hs" at offset 0 -- "foo/baz.hs" at offset 1024 -- -- We split the paths into components and enumerate them. -- { "foo" -> TokenId 0, "bar.hs" -> TokenId 1, "baz.hs" -> TokenId 2 } -- -- We convert paths into sequences of 'TokenId's, i.e. -- "foo/bar.hs" becomes [PathComponentId 0, PathComponentId 1] -- "foo/baz.hs" becomes [PathComponentId 0, PathComponentId 2] -- -- We use a trie mapping sequences of 'PathComponentId's to the entry offset: -- { [PathComponentId 0, PathComponentId 1] -> offset 0 -- , [PathComponentId 0, PathComponentId 2] -> offset 1024 } -- The mapping of filepath components as strings to ids. {-# UNPACK #-} !(StringTable PathComponentId) -- Mapping of sequences of filepath component ids to tar entry offsets. {-# UNPACK #-} !(IntTrie PathComponentId TarEntryOffset) -- The offset immediatly after the last entry, where we would append any -- additional entries. {-# UNPACK #-} !TarEntryOffset deriving (Eq, Show, Typeable) instance NFData TarIndex where rnf (TarIndex _ _ _) = () -- fully strict by construction -- | The result of 'lookup' in a 'TarIndex'. It can either be a file directly, -- or a directory entry containing further entries (and all subdirectories -- recursively). Note that the subtrees are constructed lazily, so it's -- cheaper if you don't look at them. -- data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset | TarDir [(FilePath, TarIndexEntry)] deriving (Show, Typeable) newtype PathComponentId = PathComponentId Int deriving (Eq, Ord, Enum, Show, Typeable) -- | An offset within a tar file. Use 'hReadEntry', 'hReadEntryHeader' or -- 'hSeekEntryOffset'. -- -- This is actually a tar \"record\" number, not a byte offset. -- type TarEntryOffset = Word32 -- | Look up a given filepath in the 'TarIndex'. It may return a 'TarFileEntry' -- containing the 'TarEntryOffset' of the file within the tar file, or if -- the filepath identifies a directory then it returns a 'TarDir' containing -- the list of files within that directory. -- -- Given the 'TarEntryOffset' you can then use one of the I\/O operations: -- -- * 'hReadEntry' to read the whole entry; -- -- * 'hReadEntryHeader' to read just the file metadata (e.g. its length); -- lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry lookup (TarIndex pathTable pathTrie _) path = do fpath <- toComponentIds pathTable path tentry <- IntTrie.lookup pathTrie fpath return (mkIndexEntry tentry) where mkIndexEntry (IntTrie.Entry offset) = TarFileEntry offset mkIndexEntry (IntTrie.Completions entries) = TarDir [ (fromComponentId pathTable key, mkIndexEntry entry) | (key, entry) <- entries ] toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId] toComponentIds table = lookupComponents [] . filter (/= BS.Char8.singleton '.') . splitDirectories . BS.Char8.pack where lookupComponents cs' [] = Just (reverse cs') lookupComponents cs' (c:cs) = case StringTable.lookup table c of Nothing -> Nothing Just cid -> lookupComponents (cid:cs') cs fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath fromComponentId table = BS.Char8.unpack . StringTable.index table -- | All the files in the index with their corresponding 'TarEntryOffset's. -- -- Note that the files are in no special order. If you intend to read all or -- most files then is is recommended to sort by the 'TarEntryOffset'. -- toList :: TarIndex -> [(FilePath, TarEntryOffset)] toList (TarIndex pathTable pathTrie _) = [ (path, off) | (cids, off) <- IntTrie.toList pathTrie , let path = FilePath.joinPath (map (fromComponentId pathTable) cids) ] -- | Build a 'TarIndex' from a sequence of tar 'Entries'. The 'Entries' are -- assumed to start at offset @0@ within a file. -- build :: Entries e -> Either e TarIndex build = go empty where go !builder (Next e es) = go (addNextEntry e builder) es go !builder Done = Right $! finalise builder go !_ (Fail err) = Left err -- $incremental-construction -- If you need more control than 'build' then you can construct the index -- in an acumulator style using the 'IndexBuilder' and operations. -- -- Start with 'empty' and use 'addNextEntry' (or 'skipNextEntry') for -- each 'Entry' in the tar file in order. Every entry must added or skipped in -- order, otherwise the resulting 'TarIndex' will report the wrong -- 'TarEntryOffset's. At the end use 'finalise' to get the 'TarIndex'. -- -- For example, 'build' is simply: -- -- > build = go empty -- > where -- > go !builder (Next e es) = go (addNextEntry e builder) es -- > go !builder Done = Right $! finalise builder -- > go !_ (Fail err) = Left err -- | The intermediate type used for incremental construction of a 'TarIndex'. -- data IndexBuilder = IndexBuilder !(StringTableBuilder PathComponentId) !(IntTrieBuilder PathComponentId TarEntryOffset) {-# UNPACK #-} !TarEntryOffset deriving (Eq, Show) instance NFData IndexBuilder where rnf (IndexBuilder _ _ _) = () -- fully strict by construction -- | The initial empty 'IndexBuilder'. -- empty :: IndexBuilder empty = IndexBuilder StringTable.empty IntTrie.empty 0 emptyIndex :: IndexBuilder emptyIndex = empty {-# DEPRECATED emptyIndex "Use TarIndex.empty" #-} -- | Add the next 'Entry' into the 'IndexBuilder'. -- addNextEntry :: Entry -> IndexBuilder -> IndexBuilder addNextEntry entry (IndexBuilder stbl itrie nextOffset) = IndexBuilder stbl' itrie' (nextEntryOffset entry nextOffset) where !entrypath = splitTarPath (entryTarPath entry) (stbl', cids) = StringTable.inserts entrypath stbl itrie' = IntTrie.insert cids nextOffset itrie -- | Use this function if you want to skip some entries and not add them to the -- final 'TarIndex'. -- skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder skipNextEntry entry (IndexBuilder stbl itrie nextOffset) = IndexBuilder stbl itrie (nextEntryOffset entry nextOffset) -- | Finish accumulating 'Entry' information and build the compact 'TarIndex' -- lookup structure. -- finalise :: IndexBuilder -> TarIndex finalise (IndexBuilder stbl itrie finalOffset) = TarIndex pathTable pathTrie finalOffset where pathTable = StringTable.finalise stbl pathTrie = IntTrie.finalise itrie finaliseIndex :: IndexBuilder -> TarIndex finaliseIndex = finalise {-# DEPRECATED finaliseIndex "Use TarIndex.finalise" #-} -- | This is the offset immediately following the entry most recently added -- to the 'IndexBuilder'. You might use this if you need to know the offsets -- but don't want to use the 'TarIndex' lookup structure. -- Use with 'hSeekEntryOffset'. See also 'nextEntryOffset'. -- indexNextEntryOffset :: IndexBuilder -> TarEntryOffset indexNextEntryOffset (IndexBuilder _ _ off) = off -- | This is the offset immediately following the last entry in the tar file. -- This can be useful to append further entries into the tar file. -- Use with 'hSeekEntryOffset', or just use 'hSeekEndEntryOffset' directly. -- indexEndEntryOffset :: TarIndex -> TarEntryOffset indexEndEntryOffset (TarIndex _ _ off) = off -- | Calculate the 'TarEntryOffset' of the next entry, given the size and -- offset of the current entry. -- -- This is much like using 'skipNextEntry' and 'indexNextEntryOffset', but without -- using an 'IndexBuilder'. -- nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset nextEntryOffset entry offset = offset + 1 + case entryContent entry of NormalFile _ size -> blocks size OtherEntryType _ _ size -> blocks size _ -> 0 where -- NOTE: to avoid underflow, do the (fromIntegral :: Int64 -> Word32) last blocks :: Int64 -> TarEntryOffset blocks size = fromIntegral (1 + (size - 1) `div` 512) type FilePathBS = BS.ByteString splitTarPath :: TarPath -> [FilePathBS] splitTarPath (TarPath name prefix) = splitDirectories prefix ++ splitDirectories name splitDirectories :: FilePathBS -> [FilePathBS] splitDirectories bs = case BS.Char8.split '/' bs of c:cs | BS.null c -> BS.Char8.singleton '/' : filter (not . BS.null) cs cs -> filter (not . BS.null) cs ------------------------- -- Resume building an existing index -- -- | Resume building an existing index -- -- A 'TarIndex' is optimized for a highly compact and efficient in-memory -- representation. This, however, makes it read-only. If you have an existing -- 'TarIndex' for a large file, and want to add to it, you can translate the -- 'TarIndex' back to an 'IndexBuilder'. Be aware that this is a relatively -- costly operation (linear in the size of the 'TarIndex'), though still -- faster than starting again from scratch. -- -- This is the left inverse to 'finalise' (modulo ordering). -- unfinalise :: TarIndex -> IndexBuilder unfinalise (TarIndex pathTable pathTrie finalOffset) = IndexBuilder (StringTable.unfinalise pathTable) (IntTrie.unfinalise pathTrie) finalOffset ------------------------- -- I/O operations -- -- | Reads an entire 'Entry' at the given 'TarEntryOffset' in the tar file. -- The 'Handle' must be open for reading and be seekable. -- -- This reads the whole entry into memory strictly, not incrementally. For more -- control, use 'hReadEntryHeader' and then read the entry content manually. -- hReadEntry :: Handle -> TarEntryOffset -> IO Entry hReadEntry hnd off = do entry <- hReadEntryHeader hnd off case entryContent entry of NormalFile _ size -> do body <- LBS.hGet hnd (fromIntegral size) return entry { entryContent = NormalFile body size } OtherEntryType c _ size -> do body <- LBS.hGet hnd (fromIntegral size) return entry { entryContent = OtherEntryType c body size } _ -> return entry -- | Read the header for a 'Entry' at the given 'TarEntryOffset' in the tar -- file. The 'entryContent' will contain the correct metadata but an empty file -- content. The 'Handle' must be open for reading and be seekable. -- -- The 'Handle' position is advanced to the beginning of the entry content (if -- any). You must check the 'entryContent' to see if the entry is of type -- 'NormalFile'. If it is, the 'NormalFile' gives the content length and you -- are free to read this much data from the 'Handle'. -- -- > entry <- Tar.hReadEntryHeader hnd -- > case Tar.entryContent entry of -- > Tar.NormalFile _ size -> do content <- BS.hGet hnd size -- > ... -- -- Of course you don't have to read it all in one go (as 'hReadEntry' does), -- you can use any appropriate method to read it incrementally. -- -- In addition to I\/O errors, this can throw a 'FormatError' if the offset is -- wrong, or if the file is not valid tar format. -- -- There is also the lower level operation 'hSeekEntryOffset'. -- hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry hReadEntryHeader hnd blockOff = do hSeekEntryOffset hnd blockOff header <- LBS.hGet hnd 512 case Tar.read header of Tar.Next entry _ -> return entry Tar.Fail e -> throwIO e Tar.Done -> fail "hReadEntryHeader: impossible" -- | Set the 'Handle' position to the position corresponding to the given -- 'TarEntryOffset'. -- -- This position is where the entry metadata can be read. If you already know -- the entry has a body (and perhaps know it's length), you may wish to seek to -- the body content directly using 'hSeekEntryContentOffset'. -- hSeekEntryOffset :: Handle -> TarEntryOffset -> IO () hSeekEntryOffset hnd blockOff = hSeek hnd AbsoluteSeek (fromIntegral blockOff * 512) -- | Set the 'Handle' position to the entry content position corresponding to -- the given 'TarEntryOffset'. -- -- This position is where the entry content can be read using ordinary I\/O -- operations (though you have to know in advance how big the entry content -- is). This is /only valid/ if you /already know/ the entry has a body (i.e. -- is a normal file). -- hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO () hSeekEntryContentOffset hnd blockOff = hSeekEntryOffset hnd (blockOff + 1) -- | This is a low level variant on 'hReadEntryHeader', that can be used to -- iterate through a tar file, entry by entry. -- -- It has a few differences compared to 'hReadEntryHeader': -- -- * It returns an indication when the end of the tar file is reached. -- -- * It /does not/ move the 'Handle' position to the beginning of the entry -- content. -- -- * It returns the 'TarEntryOffset' of the next entry. -- -- After this action, the 'Handle' position is not in any useful place. If -- you want to skip to the next entry, take the 'TarEntryOffset' returned and -- use 'hReadEntryHeaderOrEof' again. Or if having inspected the 'Entry' -- header you want to read the entry content (if it has one) then use -- 'hSeekEntryContentOffset' on the original input 'TarEntryOffset'. -- hReadEntryHeaderOrEof :: Handle -> TarEntryOffset -> IO (Maybe (Entry, TarEntryOffset)) hReadEntryHeaderOrEof hnd blockOff = do hSeekEntryOffset hnd blockOff header <- LBS.hGet hnd 1024 case Tar.read header of Tar.Next entry _ -> let !blockOff' = nextEntryOffset entry blockOff in return (Just (entry, blockOff')) Tar.Done -> return Nothing Tar.Fail e -> throwIO e -- | Seek to the end of a tar file, to the position where new entries can -- be appended, and return that 'TarEntryOffset'. -- -- If you have a valid 'TarIndex' for this tar file then you should supply it -- because it allows seeking directly to the correct location. -- -- If you do not have an index, then this becomes an expensive linear -- operation because we have to read each tar entry header from the beginning -- to find the location immediately after the last entry (this is because tar -- files have a variable length trailer and we cannot reliably find that by -- starting at the end). In this mode, it will fail with an exception if the -- file is not in fact in the tar format. -- hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset hSeekEndEntryOffset hnd (Just index) = do let offset = indexEndEntryOffset index hSeekEntryOffset hnd offset return offset hSeekEndEntryOffset hnd Nothing = do size <- hFileSize hnd if size == 0 then return 0 else seekToEnd 0 where seekToEnd offset = do mbe <- hReadEntryHeaderOrEof hnd offset case mbe of Nothing -> do hSeekEntryOffset hnd offset return offset Just (_, offset') -> seekToEnd offset' ------------------------- -- (de)serialisation -- -- | The 'TarIndex' is compact in memory, and it has a similarly compact -- external representation. -- serialise :: TarIndex -> BS.ByteString serialise = toStrict . serialiseLBS -- we keep this version around just so we can check we got the size right. serialiseLBS :: TarIndex -> LBS.ByteString serialiseLBS index = BS.toLazyByteStringWith (BS.untrimmedStrategy (serialiseSize index) 512) LBS.empty (serialiseBuilder index) serialiseSize :: TarIndex -> Int serialiseSize (TarIndex stringTable intTrie _) = StringTable.serialiseSize stringTable + IntTrie.serialiseSize intTrie + 8 serialiseBuilder :: TarIndex -> BS.Builder serialiseBuilder (TarIndex stringTable intTrie finalOffset) = BS.word32BE 2 -- format version <> BS.word32BE finalOffset <> StringTable.serialise stringTable <> IntTrie.serialise intTrie -- | Read the external representation back into a 'TarIndex'. -- deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString) deserialise bs | BS.length bs < 8 = Nothing | let ver = readWord32BE bs 0 , ver == 1 = do let !finalOffset = readWord32BE bs 4 (stringTable, bs') <- StringTable.deserialiseV1 (BS.drop 8 bs) (intTrie, bs'') <- IntTrie.deserialise bs' return (TarIndex stringTable intTrie finalOffset, bs'') | let ver = readWord32BE bs 0 , ver == 2 = do let !finalOffset = readWord32BE bs 4 (stringTable, bs') <- StringTable.deserialiseV2 (BS.drop 8 bs) (intTrie, bs'') <- IntTrie.deserialise bs' return (TarIndex stringTable intTrie finalOffset, bs'') | otherwise = Nothing readWord32BE :: BS.ByteString -> Int -> Word32 readWord32BE bs i = assert (i >= 0 && i+3 <= BS.length bs - 1) $ fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 + fromIntegral (BS.unsafeIndex bs (i + 3)) ------------------------- -- Test properties -- #ifdef TESTS -- Not quite the properties of a finite mapping because we also have lookups -- that result in completions. prop_lookup :: ValidPaths -> NonEmptyFilePath -> Bool prop_lookup (ValidPaths paths) (NonEmptyFilePath p) = case (lookup index p, Prelude.lookup p paths) of (Nothing, Nothing) -> True (Just (TarFileEntry offset), Just (_,offset')) -> offset == offset' (Just (TarDir entries), Nothing) -> sort (nub (map fst entries)) == sort (nub completions) _ -> False where index = construct paths completions = [ head (FilePath.splitDirectories completion) | (path,_) <- paths , completion <- maybeToList $ stripPrefix (p ++ "/") path ] prop_toList :: ValidPaths -> Bool prop_toList (ValidPaths paths) = sort (toList index) == sort [ (path, off) | (path, (_sz, off)) <- paths ] where index = construct paths prop_valid :: ValidPaths -> Bool prop_valid (ValidPaths paths) | not $ StringTable.prop_valid pathbits = error "TarIndex: bad string table" | not $ IntTrie.prop_lookup intpaths = error "TarIndex: bad int trie" | not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie" | not $ prop' = error "TarIndex: bad prop" | otherwise = True where index@(TarIndex pathTable _ _) = construct paths pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst) paths intpaths = [ (cids, offset) | (path, (_size, offset)) <- paths , let Just cids = toComponentIds pathTable path ] prop' = flip all paths $ \(file, (_size, offset)) -> case lookup index file of Just (TarFileEntry offset') -> offset' == offset _ -> False prop_serialise_deserialise :: ValidPaths -> Bool prop_serialise_deserialise (ValidPaths paths) = Just (index, BS.empty) == (deserialise . serialise) index where index = construct paths prop_serialiseSize :: ValidPaths -> Bool prop_serialiseSize (ValidPaths paths) = case (LBS.toChunks . serialiseLBS) index of [c1] -> BS.length c1 == serialiseSize index _ -> False where index = construct paths newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show instance Arbitrary NonEmptyFilePath where arbitrary = NonEmptyFilePath . FilePath.joinPath <$> listOf1 (elements ["a", "b", "c", "d"]) newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show instance Arbitrary ValidPaths where arbitrary = do paths <- makeNoPrefix <$> listOf arbitraryPath sizes <- vectorOf (length paths) (getNonNegative <$> arbitrary) let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes return (ValidPaths (zip paths (zip sizes offsets))) where arbitraryPath = FilePath.joinPath <$> listOf1 (elements ["a", "b", "c", "d"]) makeNoPrefix [] = [] makeNoPrefix (k:ks) | all (not . isPrefixOfOther k) ks = k : makeNoPrefix ks | otherwise = makeNoPrefix ks isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a blocks :: Int64 -> TarEntryOffset blocks size = fromIntegral (1 + ((size - 1) `div` 512)) -- Helper for bulk construction. construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex construct = either (\_ -> undefined) id . build . foldr (\(path, (size, _off)) es -> Next (testEntry path size) es) Done example0 :: Entries () example0 = testEntry "foo-1.0/foo-1.0.cabal" 1500 -- at block 0 `Next` testEntry "foo-1.0/LICENSE" 2000 -- at block 4 `Next` testEntry "foo-1.0/Data/Foo.hs" 1000 -- at block 9 `Next` Done example1 :: Entries () example1 = Next (testEntry "./" 1500) Done <> example0 testEntry :: FilePath -> Int64 -> Entry testEntry name size = simpleEntry path (NormalFile mempty size) where Right path = toTarPath False name -- | Simple tar archive containing regular files only data SimpleTarArchive = SimpleTarArchive { simpleTarEntries :: Tar.Entries () , simpleTarRaw :: [(FilePath, LBS.ByteString)] , simpleTarBS :: LBS.ByteString } instance Show SimpleTarArchive where show = show . simpleTarRaw prop_index_matches_tar :: SimpleTarArchive -> Property prop_index_matches_tar sta = ioProperty (try go >>= either (\e -> throwIO (e :: SomeException)) (\_ -> return True)) where go :: IO () go = do h <- HBS.readHandle True (simpleTarBS sta) goEntries h 0 (simpleTarEntries sta) goEntries :: Handle -> TarEntryOffset -> Tar.Entries () -> IO () goEntries _ _ Tar.Done = return () goEntries _ _ (Tar.Fail _) = throwIO (userError "Fail entry in SimpleTarArchive") goEntries h offset (Tar.Next e es) = do goEntry h offset e goEntries h (nextEntryOffset e offset) es goEntry :: Handle -> TarEntryOffset -> Tar.Entry -> IO () goEntry h offset e = do e' <- hReadEntry h offset case (Tar.entryContent e, Tar.entryContent e') of (Tar.NormalFile bs sz, Tar.NormalFile bs' sz') -> unless (sz == sz' && bs == bs') $ throwIO $ userError "Entry mismatch" _otherwise -> throwIO $ userError "unexpected entry types" instance Arbitrary SimpleTarArchive where arbitrary = do numEntries <- sized $ \n -> choose (0, n) rawEntries <- mkRaw numEntries let entries = mkList rawEntries return SimpleTarArchive { simpleTarEntries = mkEntries entries , simpleTarRaw = rawEntries , simpleTarBS = Tar.write entries } where mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)] mkRaw 0 = return [] mkRaw n = do -- Pick a size around 0, 1, or 2 block boundaries sz <- sized $ \n -> elements (take n fileSizes) bs <- LBS.pack `fmap` vectorOf sz arbitrary es <- mkRaw (n - 1) return $ ("file" ++ show n, bs) : es mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry] mkList [] = [] mkList ((fp, bs):es) = entry : mkList es where Right path = toTarPath False fp entry = simpleEntry path content content = NormalFile bs (LBS.length bs) mkEntries :: [Tar.Entry] -> Tar.Entries () mkEntries [] = Tar.Done mkEntries (e:es) = Tar.Next e (mkEntries es) -- Sizes around 0, 1, and 2 block boundaries fileSizes :: [Int] fileSizes = [ 0 , 1 , 2 , 510 , 511 , 512 , 513 , 514 , 1022 , 1023 , 1024 , 1025 , 1026 ] -- | 'IndexBuilder' constructed from a 'SimpleIndex' newtype SimpleIndexBuilder = SimpleIndexBuilder IndexBuilder deriving Show instance Arbitrary SimpleIndexBuilder where arbitrary = SimpleIndexBuilder . build' . simpleTarEntries <$> arbitrary where -- like 'build', but don't finalize build' :: Show e => Entries e -> IndexBuilder build' = go empty where go !builder (Next e es) = go (addNextEntry e builder) es go !builder Done = builder go !_ (Fail err) = error (show err) prop_finalise_unfinalise :: SimpleIndexBuilder -> Bool prop_finalise_unfinalise (SimpleIndexBuilder index) = unfinalise (finalise index) == index #endif toStrict :: LBS.ByteString -> BS.ByteString #if MIN_VERSION_bytestring(0,10,0) toStrict = LBS.toStrict #else toStrict = BS.concat . LBS.toChunks #endif #if !(MIN_VERSION_base(4,5,0)) (<>) :: Monoid m => m -> m -> m (<>) = mappend #endif tar-0.5.1.1/Codec/Archive/Tar/Index/0000755000000000000000000000000007346545000015026 5ustar0000000000000000tar-0.5.1.1/Codec/Archive/Tar/Index/IntTrie.hs0000644000000000000000000004306407346545000016747 0ustar0000000000000000{-# LANGUAGE CPP, BangPatterns, PatternGuards #-} {-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-} module Codec.Archive.Tar.Index.IntTrie ( IntTrie, construct, toList, IntTrieBuilder, empty, insert, finalise, unfinalise, lookup, TrieLookup(..), serialise, serialiseSize, deserialise, #ifdef TESTS test1, test2, test3, ValidPaths(..), prop_lookup, prop_completions, prop_lookup_mono, prop_completions_mono, prop_construct_toList, prop_finalise_unfinalise, prop_serialise_deserialise, prop_serialiseSize, #endif ) where import Prelude hiding (lookup) import Data.Typeable (Typeable) import qualified Data.Array.Unboxed as A import Data.Array.IArray ((!)) import qualified Data.Bits as Bits import Data.Word (Word32) import Data.Bits import Data.Monoid (Monoid(..)) #if (MIN_VERSION_base(4,5,0)) import Data.Monoid ((<>)) #endif import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Unsafe as BS #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) import Data.ByteString.Builder as BS #else import Data.ByteString.Lazy.Builder as BS #endif import Control.Exception (assert) #if MIN_VERSION_containers(0,5,0) import qualified Data.Map.Strict as Map import qualified Data.IntMap.Strict as IntMap import Data.IntMap.Strict (IntMap) #else import qualified Data.Map as Map import qualified Data.IntMap as IntMap import Data.IntMap (IntMap) #endif import Data.List hiding (lookup, insert) import Data.Function (on) #ifdef TESTS import Test.QuickCheck import Control.Applicative ((<$>), (<*>)) #endif -- | A compact mapping from sequences of nats to nats. -- -- NOTE: The tries in this module have values /only/ at the leaves (which -- correspond to files), they do not have values at the branch points (which -- correspond to directories). newtype IntTrie k v = IntTrie (A.UArray Word32 Word32) deriving (Eq, Show, Typeable) -- Compact, read-only implementation of a trie. It's intended for use with file -- paths, but we do that via string ids. #ifdef TESTS -- Example mapping: -- example0 :: [(FilePath, Int)] example0 = [("foo-1.0/foo-1.0.cabal", 512) -- tar block 1 ,("foo-1.0/LICENSE", 2048) -- tar block 4 ,("foo-1.0/Data/Foo.hs", 4096)] -- tar block 8 -- After converting path components to integers this becomes: -- example1 :: [([Word32], Word32)] example1 = [([1,2], 512) ,([1,3], 2048) ,([1,4,5], 4096)] -- As a trie this looks like: -- [ (1, *) ] -- | -- [ (2, 512), (3, 1024), (4, *) ] -- | -- [ (5, 4096) ] -- We use an intermediate trie representation mktrie :: [(Int, TrieNode k v)] -> IntTrieBuilder k v mkleaf :: (Enum k, Enum v) => k -> v -> (Int, TrieNode k v) mknode :: Enum k => k -> IntTrieBuilder k v -> (Int, TrieNode k v) mktrie = IntTrieBuilder . IntMap.fromList mkleaf k v = (fromEnum k, TrieLeaf (enumToWord32 v)) mknode k t = (fromEnum k, TrieNode t) example2 :: IntTrieBuilder Word32 Word32 example2 = mktrie [ mknode 1 t1 ] where t1 = mktrie [ mkleaf 2 512, mkleaf 3 2048, mknode 4 t2 ] t2 = mktrie [ mkleaf 5 4096 ] example2' :: IntTrieBuilder Word32 Word32 example2' = mktrie [ mknode 0 t1 ] where t1 = mktrie [ mknode 3 t2 ] t2 = mktrie [ mknode 1 t3, mknode 2 t4 ] t3 = mktrie [ mkleaf 4 10608 ] t4 = mktrie [ mkleaf 4 10612 ] {- 0: [1,N0,3] 3: [1,N3,6] 6: [2,N1,N2,11,12] 11: [1,4,10608] 14: [1,4,10612] -} example2'' :: IntTrieBuilder Word32 Word32 example2'' = mktrie [ mknode 1 t1, mknode 2 t2 ] where t1 = mktrie [ mkleaf 4 10608 ] t2 = mktrie [ mkleaf 4 10612 ] example2''' :: IntTrieBuilder Word32 Word32 example2''' = mktrie [ mknode 0 t3 ] where t3 = mktrie [ mknode 4 t8, mknode 6 t11 ] t8 = mktrie [ mknode 1 t14 ] t11 = mktrie [ mkleaf 5 10605 ] t14 = mktrie [ mknode 2 t19, mknode 3 t22 ] t19 = mktrie [ mkleaf 7 10608 ] t22 = mktrie [ mkleaf 7 10612 ] {- 0: [1,N0,3] 3: [2,N4,N6,8,11] 8: [1,N1,11] 11: [1,5,10605] 14: [2,N2,N3,16,19] 19: [1,7,10608] 22: [1,7,10612] -} -- We convert from the 'Paths' to the 'IntTrieBuilder' using 'inserts': -- test1 = example2 == inserts example1 empty #endif -- Each node has a size and a sequence of keys followed by an equal length -- sequnce of corresponding entries. Since we're going to flatten this into -- a single array then we will need to replace the trie structure with pointers -- represented as array offsets. -- Each node is a pair of arrays, one of keys and one of Either value pointer. -- We need to distinguish values from internal pointers. We use a tag bit: -- tagLeaf, tagNode, untag :: Word32 -> Word32 tagLeaf = id tagNode = flip Bits.setBit 31 untag = flip Bits.clearBit 31 isNode :: Word32 -> Bool isNode = flip Bits.testBit 31 -- So the overall array form of the above trie is: -- -- offset: 0 1 2 3 4 5 6 7 8 9 10 11 12 -- array: [ 1 | N1 | 3 ][ 3 | 2, 3, N4 | 512, 2048, 10 ][ 1 | 5 | 4096 ] -- \__/ \___/ #ifdef TESTS example3 :: [Word32] example3 = [1, tagNode 1, 3, 3, tagLeaf 2, tagLeaf 3, tagNode 4, 512, 2048, 10, 1, tagLeaf 5, 4096 ] -- We get the array form by using flattenTrie: test2 = example3 == flattenTrie example2 example4 :: IntTrie Int Int example4 = IntTrie (mkArray example3) mkArray :: [Word32] -> A.UArray Word32 Word32 mkArray xs = A.listArray (0, fromIntegral (length xs) - 1) xs test3 = case lookup example4 [1] of Just (Completions [(2,_),(3,_),(4,_)]) -> True _ -> False test1, test2, test3 :: Bool #endif ------------------------------------- -- Decoding the trie array form -- completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v completionsFrom trie@(IntTrie arr) nodeOff = [ (word32ToEnum (untag key), next) | keyOff <- [keysStart..keysEnd] , let key = arr ! keyOff entry = arr ! (keyOff + nodeSize) next | isNode key = Completions (completionsFrom trie entry) | otherwise = Entry (word32ToEnum entry) ] where nodeSize = arr ! nodeOff keysStart = nodeOff + 1 keysEnd = nodeOff + nodeSize -- | Convert the trie to a list -- -- This is the left inverse to 'construct' (modulo ordering). toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)] toList = concatMap (aux []) . (`completionsFrom` 0) where aux :: [k] -> (k, TrieLookup k v) -> [([k], v)] aux ks (k, Entry v) = [(reverse (k:ks), v)] aux ks (k, Completions cs) = concatMap (aux (k:ks)) cs ------------------------------------- -- Toplevel trie array construction -- -- So constructing the 'IntTrie' as a whole is just a matter of stringing -- together all the bits -- | Build an 'IntTrie' from a bunch of (key, value) pairs, where the keys -- are sequences. -- construct :: (Enum k, Enum v) => [([k], v)] -> IntTrie k v construct = finalise . flip inserts empty --------------------------------- -- Looking up in the trie array -- data TrieLookup k v = Entry !v | Completions (Completions k v) deriving Show type Completions k v = [(k, TrieLookup k v)] lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v) lookup trie@(IntTrie arr) = go 0 where go :: Word32 -> [k] -> Maybe (TrieLookup k v) go nodeOff [] = Just (completions nodeOff) go nodeOff (k:ks) = case search nodeOff (tagLeaf k') of Just entryOff | null ks -> Just (entry entryOff) | otherwise -> Nothing Nothing -> case search nodeOff (tagNode k') of Nothing -> Nothing Just entryOff -> go (arr ! entryOff) ks where k' = enumToWord32 k entry entryOff = Entry (word32ToEnum (arr ! entryOff)) completions nodeOff = Completions (completionsFrom trie nodeOff) search :: Word32 -> Word32 -> Maybe Word32 search nodeOff key = fmap (+nodeSize) (bsearch keysStart keysEnd key) where nodeSize = arr ! nodeOff keysStart = nodeOff + 1 keysEnd = nodeOff + nodeSize bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32 bsearch a b key | a > b = Nothing | otherwise = case compare key (arr ! mid) of LT -> bsearch a (mid-1) key EQ -> Just mid GT -> bsearch (mid+1) b key where mid = (a + b) `div` 2 enumToWord32 :: Enum n => n -> Word32 enumToWord32 = fromIntegral . fromEnum word32ToEnum :: Enum n => Word32 -> n word32ToEnum = toEnum . fromIntegral ------------------------- -- Building Tries -- newtype IntTrieBuilder k v = IntTrieBuilder (IntMap (TrieNode k v)) deriving (Show, Eq) data TrieNode k v = TrieLeaf {-# UNPACK #-} !Word32 | TrieNode !(IntTrieBuilder k v) deriving (Show, Eq) empty :: IntTrieBuilder k v empty = IntTrieBuilder IntMap.empty insert :: (Enum k, Enum v) => [k] -> v -> IntTrieBuilder k v -> IntTrieBuilder k v insert [] _v t = t insert (k:ks) v t = insertTrie (fromEnum k) (map fromEnum ks) (enumToWord32 v) t insertTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v -> IntTrieBuilder k v insertTrie k ks v (IntTrieBuilder t) = IntTrieBuilder $ IntMap.alter (\t' -> Just $! maybe (freshTrieNode ks v) (insertTrieNode ks v) t') k t insertTrieNode :: [Int] -> Word32 -> TrieNode k v -> TrieNode k v insertTrieNode [] v _ = TrieLeaf v insertTrieNode (k:ks) v (TrieLeaf _) = TrieNode (freshTrie k ks v) insertTrieNode (k:ks) v (TrieNode t) = TrieNode (insertTrie k ks v t) freshTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v freshTrie k [] v = IntTrieBuilder (IntMap.singleton k (TrieLeaf v)) freshTrie k (k':ks) v = IntTrieBuilder (IntMap.singleton k (TrieNode (freshTrie k' ks v))) freshTrieNode :: [Int] -> Word32 -> TrieNode k v freshTrieNode [] v = TrieLeaf v freshTrieNode (k:ks) v = TrieNode (freshTrie k ks v) inserts :: (Enum k, Enum v) => [([k], v)] -> IntTrieBuilder k v -> IntTrieBuilder k v inserts kvs t = foldl' (\t' (ks, v) -> insert ks v t') t kvs finalise :: IntTrieBuilder k v -> IntTrie k v finalise trie = IntTrie $ A.listArray (0, fromIntegral (flatTrieLength trie) - 1) (flattenTrie trie) unfinalise :: (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v unfinalise trie = go (completionsFrom trie 0) where go kns = IntTrieBuilder $ IntMap.fromList [ (fromEnum k, t) | (k, n) <- kns , let t = case n of Entry v -> TrieLeaf (enumToWord32 v) Completions kns' -> TrieNode (go kns') ] --------------------------------- -- Flattening Tries -- type Offset = Int flatTrieLength :: IntTrieBuilder k v -> Int flatTrieLength (IntTrieBuilder tns) = 1 + 2 * IntMap.size tns + sum [ flatTrieLength n | TrieNode n <- IntMap.elems tns ] -- This is a breadth-first traversal. We keep a list of the tries that we are -- to write out next. Each of these have an offset allocated to them at the -- time we put them into the list. We keep a running offset so we know where -- to allocate next. -- flattenTrie :: IntTrieBuilder k v -> [Word32] flattenTrie trie = go (queue [trie]) (size trie) where size (IntTrieBuilder tns) = 1 + 2 * IntMap.size tns go :: Q (IntTrieBuilder k v) -> Offset -> [Word32] go todo !offset = case dequeue todo of Nothing -> [] Just (IntTrieBuilder tnodes, tries) -> flat ++ go tries' offset' where !count = IntMap.size tnodes flat = fromIntegral count : Map.keys keysValues ++ Map.elems keysValues (!offset', !keysValues, !tries') = #if MIN_VERSION_containers(0,4,2) IntMap.foldlWithKey' accumNodes (offset, Map.empty, tries) tnodes #else foldl' (\a (k,v) -> accumNodes a k v) (offset, Map.empty, tries) (IntMap.toList tnodes) #endif accumNodes :: (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v)) -> Int -> TrieNode k v -> (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v)) accumNodes (!off, !kvs, !tries) !k (TrieLeaf v) = (off, kvs', tries) where kvs' = Map.insert (tagLeaf (int2Word32 k)) v kvs accumNodes (!off, !kvs, !tries) !k (TrieNode t) = (off + size t, kvs', tries') where kvs' = Map.insert (tagNode (int2Word32 k)) (int2Word32 off) kvs tries' = enqueue tries t data Q a = Q [a] [a] queue :: [a] -> Q a queue xs = Q xs [] enqueue :: Q a -> a -> Q a enqueue (Q front back) x = Q front (x : back) dequeue :: Q a -> Maybe (a, Q a) dequeue (Q (x:xs) back) = Just (x, Q xs back) dequeue (Q [] back) = case reverse back of x:xs -> Just (x, Q xs []) [] -> Nothing int2Word32 :: Int -> Word32 int2Word32 = fromIntegral ------------------------- -- (de)serialisation -- serialise :: IntTrie k v -> BS.Builder serialise (IntTrie arr) = let (_, !ixEnd) = A.bounds arr in BS.word32BE (ixEnd+1) <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr) serialiseSize :: IntTrie k v -> Int serialiseSize (IntTrie arr) = let (_, ixEnd) = A.bounds arr in 4 + 4 * (fromIntegral ixEnd + 1) deserialise :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString) deserialise bs | BS.length bs >= 4 , let lenArr = readWord32BE bs 0 lenTotal = 4 + 4 * fromIntegral lenArr , BS.length bs >= 4 + 4 * fromIntegral lenArr , let !arr = A.array (0, lenArr-1) [ (i, readWord32BE bs off) | (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ] !bs' = BS.drop lenTotal bs = Just (IntTrie arr, bs') | otherwise = Nothing readWord32BE :: BS.ByteString -> Int -> Word32 readWord32BE bs i = assert (i >= 0 && i+3 <= BS.length bs - 1) $ fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 + fromIntegral (BS.unsafeIndex bs (i + 3)) ------------------------- -- Correctness property -- #ifdef TESTS prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v) => [([k], v)] -> Bool prop_lookup paths = flip all paths $ \(key, value) -> case lookup trie key of Just (Entry value') | value' == value -> True Just (Entry value') -> error $ "IntTrie: " ++ show (key, value, value') Nothing -> error $ "IntTrie: didn't find " ++ show key Just (Completions xs) -> error $ "IntTrie: " ++ show xs where trie = construct paths prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool prop_completions paths = inserts paths empty == convertCompletions (completionsFrom (construct paths) 0) where convertCompletions :: Ord k => Completions k v -> IntTrieBuilder k v convertCompletions kls = IntTrieBuilder $ IntMap.fromList [ case l of Entry v -> mkleaf k v Completions kls' -> mknode k (convertCompletions kls') | (k, l) <- sortBy (compare `on` fst) kls ] prop_lookup_mono :: ValidPaths -> Bool prop_lookup_mono (ValidPaths paths) = prop_lookup paths prop_completions_mono :: ValidPaths -> Bool prop_completions_mono (ValidPaths paths) = prop_completions paths prop_construct_toList :: ValidPaths -> Bool prop_construct_toList (ValidPaths paths) = sortBy (compare `on` fst) (toList (construct paths)) == sortBy (compare `on` fst) paths prop_finalise_unfinalise :: ValidPaths -> Bool prop_finalise_unfinalise (ValidPaths paths) = builder == unfinalise (finalise builder) where builder :: IntTrieBuilder Char Char builder = inserts paths empty prop_serialise_deserialise :: ValidPaths -> Bool prop_serialise_deserialise (ValidPaths paths) = Just (trie, BS.empty) == (deserialise . toStrict . BS.toLazyByteString . serialise) trie where trie :: IntTrie Char Char trie = construct paths prop_serialiseSize :: ValidPaths -> Bool prop_serialiseSize (ValidPaths paths) = (fromIntegral . LBS.length . BS.toLazyByteString . serialise) trie == serialiseSize trie where trie :: IntTrie Char Char trie = construct paths newtype ValidPaths = ValidPaths [([Char], Char)] deriving Show instance Arbitrary ValidPaths where arbitrary = ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary) where makeNoPrefix [] = [] makeNoPrefix ((k,v):kvs) | all (\(k', _) -> not (isPrefixOfOther k k')) kvs = (k,v) : makeNoPrefix kvs | otherwise = makeNoPrefix kvs shrink (ValidPaths kvs) = map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs where noPrefix [] = True noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs' && noPrefix kvs' nonEmpty = all (not . null . fst) isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a toStrict :: LBS.ByteString -> BS.ByteString #if MIN_VERSION_bytestring(0,10,0) toStrict = LBS.toStrict #else toStrict = BS.concat . LBS.toChunks #endif #endif #if !(MIN_VERSION_base(4,5,0)) (<>) :: Monoid m => m -> m -> m (<>) = mappend #endif tar-0.5.1.1/Codec/Archive/Tar/Index/StringTable.hs0000644000000000000000000002514007346545000017602 0ustar0000000000000000{-# LANGUAGE CPP, BangPatterns, PatternGuards, DeriveDataTypeable #-} module Codec.Archive.Tar.Index.StringTable ( StringTable, lookup, index, construct, StringTableBuilder, empty, insert, inserts, finalise, unfinalise, serialise, serialiseSize, deserialiseV1, deserialiseV2, #ifdef TESTS prop_valid, prop_sorted, prop_finalise_unfinalise, prop_serialise_deserialise, prop_serialiseSize, #endif ) where import Data.Typeable (Typeable) import Prelude hiding (lookup, id) import Data.List hiding (lookup, insert) import Data.Function (on) import Data.Word (Word32) import Data.Int (Int32) import Data.Bits import Data.Monoid (Monoid(..)) #if (MIN_VERSION_base(4,5,0)) import Data.Monoid ((<>)) #endif import Control.Exception (assert) import qualified Data.Array.Unboxed as A import Data.Array.Unboxed ((!)) #if MIN_VERSION_containers(0,5,0) import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) #else import qualified Data.Map as Map import Data.Map (Map) #endif import qualified Data.ByteString as BS import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Lazy as LBS #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) import Data.ByteString.Builder as BS import Data.ByteString.Builder.Extra as BS (byteStringCopy) #else import Data.ByteString.Lazy.Builder as BS import Data.ByteString.Lazy.Builder.Extras as BS (byteStringCopy) #endif -- | An effecient mapping from strings to a dense set of integers. -- data StringTable id = StringTable {-# UNPACK #-} !BS.ByteString -- all strings concatenated {-# UNPACK #-} !(A.UArray Int32 Word32) -- string offset table {-# UNPACK #-} !(A.UArray Int32 Int32) -- string index to id table {-# UNPACK #-} !(A.UArray Int32 Int32) -- string id to index table deriving (Show, Typeable) instance (Eq id, Enum id) => Eq (StringTable id) where tbl1 == tbl2 = unfinalise tbl1 == unfinalise tbl2 -- | Look up a string in the token table. If the string is present, return -- its corresponding index. -- lookup :: Enum id => StringTable id -> BS.ByteString -> Maybe id lookup (StringTable bs offsets ids _ixs) str = binarySearch 0 (topBound-1) str where (0, topBound) = A.bounds offsets binarySearch !a !b !key | a > b = Nothing | otherwise = case compare key (index' bs offsets mid) of LT -> binarySearch a (mid-1) key EQ -> Just $! toEnum (fromIntegral (ids ! mid)) GT -> binarySearch (mid+1) b key where mid = (a + b) `div` 2 index' :: BS.ByteString -> A.UArray Int32 Word32 -> Int32 -> BS.ByteString index' bs offsets i = BS.unsafeTake len . BS.unsafeDrop start $ bs where start, end, len :: Int start = fromIntegral (offsets ! i) end = fromIntegral (offsets ! (i+1)) len = end - start -- | Given the index of a string in the table, return the string. -- index :: Enum id => StringTable id -> id -> BS.ByteString index (StringTable bs offsets _ids ixs) = index' bs offsets . (ixs !) . fromIntegral . fromEnum -- | Given a list of strings, construct a 'StringTable' mapping those strings -- to a dense set of integers. Also return the ids for all the strings used -- in the construction. -- construct :: Enum id => [BS.ByteString] -> StringTable id construct = finalise . foldl' (\tbl s -> fst (insert s tbl)) empty data StringTableBuilder id = StringTableBuilder !(Map BS.ByteString id) {-# UNPACK #-} !Word32 deriving (Eq, Show, Typeable) empty :: StringTableBuilder id empty = StringTableBuilder Map.empty 0 insert :: Enum id => BS.ByteString -> StringTableBuilder id -> (StringTableBuilder id, id) insert str builder@(StringTableBuilder smap nextid) = case Map.lookup str smap of Just id -> (builder, id) Nothing -> let !id = toEnum (fromIntegral nextid) !smap' = Map.insert str id smap in (StringTableBuilder smap' (nextid+1), id) inserts :: Enum id => [BS.ByteString] -> StringTableBuilder id -> (StringTableBuilder id, [id]) inserts bss builder = mapAccumL (flip insert) builder bss finalise :: Enum id => StringTableBuilder id -> StringTable id finalise (StringTableBuilder smap _) = (StringTable strs offsets ids ixs) where strs = BS.concat (Map.keys smap) offsets = A.listArray (0, fromIntegral (Map.size smap)) . scanl (\off str -> off + fromIntegral (BS.length str)) 0 $ Map.keys smap ids = A.listArray (0, fromIntegral (Map.size smap) - 1) . map (fromIntegral . fromEnum) $ Map.elems smap ixs = A.array (A.bounds ids) [ (id,ix) | (ix,id) <- A.assocs ids ] unfinalise :: Enum id => StringTable id -> StringTableBuilder id unfinalise (StringTable strs offsets ids _) = StringTableBuilder smap nextid where smap = Map.fromAscList [ (index' strs offsets ix, toEnum (fromIntegral (ids ! ix))) | ix <- [0..h] ] (0,h) = A.bounds ids nextid = fromIntegral (h+1) ------------------------- -- (de)serialisation -- serialise :: StringTable id -> BS.Builder serialise (StringTable strs offs ids ixs) = let (_, !ixEnd) = A.bounds offs in BS.word32BE (fromIntegral (BS.length strs)) <> BS.word32BE (fromIntegral ixEnd + 1) <> BS.byteStringCopy strs <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems offs) <> foldr (\n r -> BS.int32BE n <> r) mempty (A.elems ids) <> foldr (\n r -> BS.int32BE n <> r) mempty (A.elems ixs) serialiseSize :: StringTable id -> Int serialiseSize (StringTable strs offs _ids _ixs) = let (_, !ixEnd) = A.bounds offs in 4 * 2 + BS.length strs + 4 * (fromIntegral ixEnd + 1) + 8 * fromIntegral ixEnd deserialiseV1 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString) deserialiseV1 bs | BS.length bs >= 8 , let lenStrs = fromIntegral (readWord32BE bs 0) lenArr = fromIntegral (readWord32BE bs 4) lenTotal= 8 + lenStrs + 4 * lenArr , BS.length bs >= lenTotal , let strs = BS.take lenStrs (BS.drop 8 bs) arr = A.array (0, fromIntegral lenArr - 1) [ (i, readWord32BE bs off) | (i, off) <- zip [0 .. fromIntegral lenArr - 1] [offArrS,offArrS+4 .. offArrE] ] ids = A.array (0, fromIntegral lenArr - 1) [ (i,i) | i <- [0 .. fromIntegral lenArr - 1] ] ixs = ids -- two identity mappings offArrS = 8 + lenStrs offArrE = offArrS + 4 * lenArr - 1 !stringTable = StringTable strs arr ids ixs !bs' = BS.drop lenTotal bs = Just (stringTable, bs') | otherwise = Nothing deserialiseV2 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString) deserialiseV2 bs | BS.length bs >= 8 , let lenStrs = fromIntegral (readWord32BE bs 0) lenArr = fromIntegral (readWord32BE bs 4) lenTotal= 8 -- the two length prefixes + lenStrs + 4 * lenArr +(4 * (lenArr - 1)) * 2 -- offsets array is 1 longer , BS.length bs >= lenTotal , let strs = BS.take lenStrs (BS.drop 8 bs) offs = A.listArray (0, fromIntegral lenArr - 1) [ readWord32BE bs off | off <- offsets offsOff ] -- the second two arrays are 1 shorter ids = A.listArray (0, fromIntegral lenArr - 2) [ readInt32BE bs off | off <- offsets idsOff ] ixs = A.listArray (0, fromIntegral lenArr - 2) [ readInt32BE bs off | off <- offsets ixsOff ] offsOff = 8 + lenStrs idsOff = offsOff + 4 * lenArr ixsOff = idsOff + 4 * (lenArr-1) offsets from = [from,from+4 .. from + 4 * (lenArr - 1)] !stringTable = StringTable strs offs ids ixs !bs' = BS.drop lenTotal bs = Just (stringTable, bs') | otherwise = Nothing readInt32BE :: BS.ByteString -> Int -> Int32 readInt32BE bs i = fromIntegral (readWord32BE bs i) readWord32BE :: BS.ByteString -> Int -> Word32 readWord32BE bs i = assert (i >= 0 && i+3 <= BS.length bs - 1) $ fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 + fromIntegral (BS.unsafeIndex bs (i + 3)) #ifdef TESTS prop_valid :: [BS.ByteString] -> Bool prop_valid strs = all lookupIndex (enumStrings tbl) && all indexLookup (enumIds tbl) where tbl :: StringTable Int tbl = construct strs lookupIndex str = index tbl ident == str where Just ident = lookup tbl str indexLookup ident = lookup tbl str == Just ident where str = index tbl ident -- this is important so we can use Map.fromAscList prop_sorted :: [BS.ByteString] -> Bool prop_sorted strings = isSorted [ index' strs offsets ix | ix <- A.range (A.bounds ids) ] where _tbl :: StringTable Int _tbl@(StringTable strs offsets ids _ixs) = construct strings isSorted xs = and (zipWith (<) xs (tail xs)) prop_finalise_unfinalise :: [BS.ByteString] -> Bool prop_finalise_unfinalise strs = builder == unfinalise (finalise builder) where builder :: StringTableBuilder Int builder = foldl' (\tbl s -> fst (insert s tbl)) empty strs prop_serialise_deserialise :: [BS.ByteString] -> Bool prop_serialise_deserialise strs = Just (strtable, BS.empty) == (deserialiseV2 . toStrict . BS.toLazyByteString . serialise) strtable where strtable :: StringTable Int strtable = construct strs prop_serialiseSize :: [BS.ByteString] -> Bool prop_serialiseSize strs = (fromIntegral . LBS.length . BS.toLazyByteString . serialise) strtable == serialiseSize strtable where strtable :: StringTable Int strtable = construct strs enumStrings :: Enum id => StringTable id -> [BS.ByteString] enumStrings (StringTable bs offsets _ _) = map (index' bs offsets) [0..h-1] where (0,h) = A.bounds offsets enumIds :: Enum id => StringTable id -> [id] enumIds (StringTable _ offsets _ _) = [toEnum 0 .. toEnum (fromIntegral (h-1))] where (0,h) = A.bounds offsets toStrict :: LBS.ByteString -> BS.ByteString #if MIN_VERSION_bytestring(0,10,0) toStrict = LBS.toStrict #else toStrict = BS.concat . LBS.toChunks #endif #endif #if !(MIN_VERSION_base(4,5,0)) (<>) :: Monoid m => m -> m -> m (<>) = mappend #endif tar-0.5.1.1/Codec/Archive/Tar/Pack.hs0000644000000000000000000001630107346545000015172 0ustar0000000000000000{-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009, 2012, 2016 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Pack ( pack, packFileEntry, packDirectoryEntry, getDirectoryContentsRecursive, ) where import Codec.Archive.Tar.Types import qualified Data.ByteString.Lazy as BS import System.FilePath ( () ) import qualified System.FilePath as FilePath.Native ( addTrailingPathSeparator, hasTrailingPathSeparator ) import System.Directory ( getDirectoryContents, doesDirectoryExist, getModificationTime , Permissions(..), getPermissions ) #if MIN_VERSION_directory(1,2,0) -- The directory package switched to the new time package import Data.Time.Clock ( UTCTime ) import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds ) #else import System.Time ( ClockTime(..) ) #endif import System.IO ( IOMode(ReadMode), openBinaryFile, hFileSize ) import System.IO.Unsafe (unsafeInterleaveIO) -- | Creates a tar archive from a list of directory or files. Any directories -- specified will have their contents included recursively. Paths in the -- archive will be relative to the given base directory. -- -- This is a portable implementation of packing suitable for portable archives. -- In particular it only constructs 'NormalFile' and 'Directory' entries. Hard -- links and symbolic links are treated like ordinary files. It cannot be used -- to pack directories containing recursive symbolic links. Special files like -- FIFOs (named pipes), sockets or device files will also cause problems. -- -- An exception will be thrown for any file names that are too long to -- represent as a 'TarPath'. -- -- * This function returns results lazily. Subdirectories are scanned -- and files are read one by one as the list of entries is consumed. -- pack :: FilePath -- ^ Base directory -> [FilePath] -- ^ Files and directories to pack, relative to the base dir -> IO [Entry] pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir preparePaths :: FilePath -> [FilePath] -> IO [FilePath] preparePaths baseDir paths = fmap concat $ interleave [ do isDir <- doesDirectoryExist (baseDir path) if isDir then do entries <- getDirectoryContentsRecursive (baseDir path) let entries' = map (path ) entries dir = FilePath.Native.addTrailingPathSeparator path if null path then return entries' else return (dir : entries') else return [path] | path <- paths ] packPaths :: FilePath -> [FilePath] -> IO [Entry] packPaths baseDir paths = interleave [ do tarpath <- either fail return (toTarPath isDir relpath) if isDir then packDirectoryEntry filepath tarpath else packFileEntry filepath tarpath | relpath <- paths , let isDir = FilePath.Native.hasTrailingPathSeparator filepath filepath = baseDir relpath ] interleave :: [IO a] -> IO [a] interleave = unsafeInterleaveIO . go where go [] = return [] go (x:xs) = do x' <- x xs' <- interleave xs return (x':xs') -- | Construct a tar 'Entry' based on a local file. -- -- This sets the entry size, the data contained in the file and the file's -- modification time. If the file is executable then that information is also -- preserved. File ownership and detailed permissions are not preserved. -- -- * The file contents is read lazily. -- packFileEntry :: FilePath -- ^ Full path to find the file on the local disk -> TarPath -- ^ Path to use for the tar Entry in the archive -> IO Entry packFileEntry filepath tarpath = do mtime <- getModTime filepath perms <- getPermissions filepath file <- openBinaryFile filepath ReadMode size <- hFileSize file content <- BS.hGetContents file return (simpleEntry tarpath (NormalFile content (fromIntegral size))) { entryPermissions = if executable perms then executableFilePermissions else ordinaryFilePermissions, entryTime = mtime } -- | Construct a tar 'Entry' based on a local directory (but not its contents). -- -- The only attribute of the directory that is used is its modification time. -- Directory ownership and detailed permissions are not preserved. -- packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk -> TarPath -- ^ Path to use for the tar Entry in the archive -> IO Entry packDirectoryEntry filepath tarpath = do mtime <- getModTime filepath return (directoryEntry tarpath) { entryTime = mtime } -- | This is a utility function, much like 'getDirectoryContents'. The -- difference is that it includes the contents of subdirectories. -- -- The paths returned are all relative to the top directory. Directory paths -- are distinguishable by having a trailing path separator -- (see 'FilePath.Native.hasTrailingPathSeparator'). -- -- All directories are listed before the files that they contain. Amongst the -- contents of a directory, subdirectories are listed after normal files. The -- overall result is that files within a directory will be together in a single -- contiguous group. This tends to improve file layout and IO performance when -- creating or extracting tar archives. -- -- * This function returns results lazily. Subdirectories are not scanned -- until the files entries in the parent directory have been consumed. -- getDirectoryContentsRecursive :: FilePath -> IO [FilePath] getDirectoryContentsRecursive dir0 = fmap tail (recurseDirectories dir0 [""]) recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath] recurseDirectories _ [] = return [] recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do (files, dirs') <- collect [] [] =<< getDirectoryContents (base dir) files' <- recurseDirectories base (dirs' ++ dirs) return (dir : files ++ files') where collect files dirs' [] = return (reverse files, reverse dirs') collect files dirs' (entry:entries) | ignore entry = collect files dirs' entries collect files dirs' (entry:entries) = do let dirEntry = dir entry dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry isDirectory <- doesDirectoryExist (base dirEntry) if isDirectory then collect files (dirEntry':dirs') entries else collect (dirEntry:files) dirs' entries ignore ['.'] = True ignore ['.', '.'] = True ignore _ = False getModTime :: FilePath -> IO EpochTime getModTime path = do #if MIN_VERSION_directory(1,2,0) -- The directory package switched to the new time package t <- getModificationTime path return . floor . utcTimeToPOSIXSeconds $ t #else (TOD s _) <- getModificationTime path return $! fromIntegral s #endif tar-0.5.1.1/Codec/Archive/Tar/Read.hs0000644000000000000000000002424007346545000015170 0ustar0000000000000000{-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar.Read -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009 Duncan Coutts, -- 2011 Max Bolingbroke -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Read (read, FormatError(..)) where import Codec.Archive.Tar.Types import Data.Char (ord) import Data.Int (Int64) import Data.Bits (Bits(shiftL)) import Control.Exception (Exception(..)) import Data.Typeable (Typeable) import Control.Applicative import Control.Monad import Control.DeepSeq import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Lazy as LBS import Prelude hiding (read) #if !MIN_VERSION_bytestring(0,10,0) import Data.Monoid (Monoid(..)) import qualified Data.ByteString.Lazy.Internal as LBS #endif -- | Errors that can be encountered when parsing a Tar archive. data FormatError = TruncatedArchive | ShortTrailer | BadTrailer | TrailingJunk | ChecksumIncorrect | NotTarFormat | UnrecognisedTarFormat | HeaderBadNumericEncoding #if MIN_VERSION_base(4,8,0) deriving (Eq, Show, Typeable) instance Exception FormatError where displayException TruncatedArchive = "truncated tar archive" displayException ShortTrailer = "short tar trailer" displayException BadTrailer = "bad tar trailer" displayException TrailingJunk = "tar file has trailing junk" displayException ChecksumIncorrect = "tar checksum error" displayException NotTarFormat = "data is not in tar format" displayException UnrecognisedTarFormat = "tar entry not in a recognised format" displayException HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)" #else deriving (Eq, Typeable) instance Show FormatError where show TruncatedArchive = "truncated tar archive" show ShortTrailer = "short tar trailer" show BadTrailer = "bad tar trailer" show TrailingJunk = "tar file has trailing junk" show ChecksumIncorrect = "tar checksum error" show NotTarFormat = "data is not in tar format" show UnrecognisedTarFormat = "tar entry not in a recognised format" show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)" instance Exception FormatError #endif instance NFData FormatError where rnf !_ = () -- enumerations are fully strict by construction -- | Convert a data stream in the tar file format into an internal data -- structure. Decoding errors are reported by the 'Fail' constructor of the -- 'Entries' type. -- -- * The conversion is done lazily. -- read :: LBS.ByteString -> Entries FormatError read = unfoldEntries getEntry getEntry :: LBS.ByteString -> Either FormatError (Maybe (Entry, LBS.ByteString)) getEntry bs | BS.length header < 512 = Left TruncatedArchive -- Tar files end with at least two blocks of all '0'. Checking this serves -- two purposes. It checks the format but also forces the tail of the data -- which is necessary to close the file if it came from a lazily read file. | LBS.head bs == 0 = case LBS.splitAt 1024 bs of (end, trailing) | LBS.length end /= 1024 -> Left ShortTrailer | not (LBS.all (== 0) end) -> Left BadTrailer | not (LBS.all (== 0) trailing) -> Left TrailingJunk | otherwise -> Right Nothing | otherwise = partial $ do case (chksum_, format_) of (Ok chksum, _ ) | correctChecksum header chksum -> return () (Ok _, Ok _) -> Error ChecksumIncorrect _ -> Error NotTarFormat -- These fields are partial, have to check them format <- format_; mode <- mode_; uid <- uid_; gid <- gid_; size <- size_; mtime <- mtime_; devmajor <- devmajor_; devminor <- devminor_; let content = LBS.take size (LBS.drop 512 bs) padding = (512 - size) `mod` 512 bs' = LBS.drop (512 + size + padding) bs entry = Entry { entryTarPath = TarPath name prefix, entryContent = case typecode of '\0' -> NormalFile content size '0' -> NormalFile content size '1' -> HardLink (LinkTarget linkname) '2' -> SymbolicLink (LinkTarget linkname) _ | format == V7Format -> OtherEntryType typecode content size '3' -> CharacterDevice devmajor devminor '4' -> BlockDevice devmajor devminor '5' -> Directory '6' -> NamedPipe '7' -> NormalFile content size _ -> OtherEntryType typecode content size, entryPermissions = mode, entryOwnership = Ownership (BS.Char8.unpack uname) (BS.Char8.unpack gname) uid gid, entryTime = mtime, entryFormat = format } return (Just (entry, bs')) where #if MIN_VERSION_bytestring(0,10,0) header = LBS.toStrict (LBS.take 512 bs) #else header = toStrict (LBS.take 512 bs) toStrict = LBS.foldrChunks mappend mempty #endif name = getString 0 100 header mode_ = getOct 100 8 header uid_ = getOct 108 8 header gid_ = getOct 116 8 header size_ = getOct 124 12 header mtime_ = getOct 136 12 header chksum_ = getOct 148 8 header typecode = getByte 156 header linkname = getString 157 100 header magic = getChars 257 8 header uname = getString 265 32 header gname = getString 297 32 header devmajor_ = getOct 329 8 header devminor_ = getOct 337 8 header prefix = getString 345 155 header -- trailing = getBytes 500 12 header format_ | magic == ustarMagic = return UstarFormat | magic == gnuMagic = return GnuFormat | magic == v7Magic = return V7Format | otherwise = Error UnrecognisedTarFormat v7Magic, ustarMagic, gnuMagic :: BS.ByteString v7Magic = BS.Char8.pack "\0\0\0\0\0\0\0\0" ustarMagic = BS.Char8.pack "ustar\NUL00" gnuMagic = BS.Char8.pack "ustar \NUL" correctChecksum :: BS.ByteString -> Int -> Bool correctChecksum header checksum = checksum == checksum' where -- sum of all 512 bytes in the header block, -- treating each byte as an 8-bit unsigned value sumchars = BS.foldl' (\x y -> x + fromIntegral y) 0 -- treating the 8 bytes of chksum as blank characters. checksum' = sumchars (BS.take 148 header) + 256 -- 256 = sumchars (BS.Char8.replicate 8 ' ') + sumchars (BS.drop 156 header) -- * TAR format primitive input {-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int #-} {-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int64 #-} getOct :: (Integral a, Bits a) => Int -> Int -> BS.ByteString -> Partial FormatError a getOct off len = parseOct . BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ') . BS.Char8.dropWhile (== ' ') . getBytes off len where parseOct s | BS.null s = return 0 -- As a star extension, octal fields can hold a base-256 value if the high -- bit of the initial character is set. The initial character can be: -- 0x80 ==> trailing characters hold a positive base-256 value -- 0xFF ==> trailing characters hold a negative base-256 value -- -- In both cases, there won't be a trailing NUL/space. -- -- GNU tar seems to contain a half-implementation of code that deals with -- extra bits in the first character, but I don't think it works and the -- docs I can find on star seem to suggest that these will always be 0, -- which is what I will assume. parseOct s | BS.head s == 128 = return (readBytes (BS.tail s)) | BS.head s == 255 = return (negate (readBytes (BS.tail s))) parseOct s = case readOct s of Just x -> return x Nothing -> Error HeaderBadNumericEncoding readBytes :: (Integral a, Bits a) => BS.ByteString -> a readBytes = BS.foldl' (\acc x -> acc `shiftL` 8 + fromIntegral x) 0 getBytes :: Int -> Int -> BS.ByteString -> BS.ByteString getBytes off len = BS.take len . BS.drop off getByte :: Int -> BS.ByteString -> Char getByte off bs = BS.Char8.index bs off getChars :: Int -> Int -> BS.ByteString -> BS.ByteString getChars off len = getBytes off len getString :: Int -> Int -> BS.ByteString -> BS.ByteString getString off len = BS.copy . BS.Char8.takeWhile (/='\0') . getBytes off len -- These days we'd just use Either, but in older versions of base there was no -- Monad instance for Either, it was in mtl with an anoying Error constraint. -- data Partial e a = Error e | Ok a partial :: Partial e a -> Either e a partial (Error msg) = Left msg partial (Ok x) = Right x instance Functor (Partial e) where fmap = liftM instance Applicative (Partial e) where pure = Ok (<*>) = ap instance Monad (Partial e) where return = pure Error m >>= _ = Error m Ok x >>= k = k x #if !MIN_VERSION_base(4,13,0) fail = error "fail @(Partial e)" #endif {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int #-} {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-} readOct :: Integral n => BS.ByteString -> Maybe n readOct bs0 = case go 0 0 bs0 of -1 -> Nothing n -> Just n where go :: Integral n => Int -> n -> BS.ByteString -> n go !i !n !bs | BS.null bs = if i == 0 then -1 else n | otherwise = case BS.unsafeHead bs of w | w >= 0x30 && w <= 0x39 -> go (i+1) (n * 8 + (fromIntegral w - 0x30)) (BS.unsafeTail bs) | otherwise -> -1 tar-0.5.1.1/Codec/Archive/Tar/Types.hs0000644000000000000000000006041707346545000015427 0ustar0000000000000000{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar.Types -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009 Duncan Coutts -- 2011 Max Bolingbroke -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Types to represent the content of @.tar@ archives. -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Types ( Entry(..), entryPath, EntryContent(..), FileSize, Permissions, Ownership(..), EpochTime, TypeCode, DevMajor, DevMinor, Format(..), simpleEntry, fileEntry, directoryEntry, ordinaryFilePermissions, executableFilePermissions, directoryPermissions, TarPath(..), toTarPath, fromTarPath, fromTarPathToPosixPath, fromTarPathToWindowsPath, LinkTarget(..), toLinkTarget, fromLinkTarget, fromLinkTargetToPosixPath, fromLinkTargetToWindowsPath, Entries(..), mapEntries, mapEntriesNoFail, foldEntries, foldlEntries, unfoldEntries, #ifdef TESTS limitToV7FormatCompat #endif ) where import Data.Int (Int64) import Data.Monoid (Monoid(..)) import Data.Semigroup as Sem import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import Control.DeepSeq import qualified System.FilePath as FilePath.Native ( joinPath, splitDirectories, addTrailingPathSeparator ) import qualified System.FilePath.Posix as FilePath.Posix ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator , addTrailingPathSeparator ) import qualified System.FilePath.Windows as FilePath.Windows ( joinPath, addTrailingPathSeparator ) import System.Posix.Types ( FileMode ) #ifdef TESTS import Test.QuickCheck import Control.Applicative ((<$>), (<*>), pure) import Data.Word (Word16) #endif type FileSize = Int64 -- | The number of seconds since the UNIX epoch type EpochTime = Int64 type DevMajor = Int type DevMinor = Int type TypeCode = Char type Permissions = FileMode -- | Tar archive entry. -- data Entry = Entry { -- | The path of the file or directory within the archive. This is in a -- tar-specific form. Use 'entryPath' to get a native 'FilePath'. entryTarPath :: {-# UNPACK #-} !TarPath, -- | The real content of the entry. For 'NormalFile' this includes the -- file data. An entry usually contains a 'NormalFile' or a 'Directory'. entryContent :: !EntryContent, -- | File permissions (Unix style file mode). entryPermissions :: {-# UNPACK #-} !Permissions, -- | The user and group to which this file belongs. entryOwnership :: {-# UNPACK #-} !Ownership, -- | The time the file was last modified. entryTime :: {-# UNPACK #-} !EpochTime, -- | The tar format the archive is using. entryFormat :: !Format } deriving (Eq, Show) -- | Native 'FilePath' of the file or directory within the archive. -- entryPath :: Entry -> FilePath entryPath = fromTarPath . entryTarPath -- | The content of a tar archive entry, which depends on the type of entry. -- -- Portable archives should contain only 'NormalFile' and 'Directory'. -- data EntryContent = NormalFile LBS.ByteString {-# UNPACK #-} !FileSize | Directory | SymbolicLink !LinkTarget | HardLink !LinkTarget | CharacterDevice {-# UNPACK #-} !DevMajor {-# UNPACK #-} !DevMinor | BlockDevice {-# UNPACK #-} !DevMajor {-# UNPACK #-} !DevMinor | NamedPipe | OtherEntryType {-# UNPACK #-} !TypeCode LBS.ByteString {-# UNPACK #-} !FileSize deriving (Eq, Ord, Show) data Ownership = Ownership { -- | The owner user name. Should be set to @\"\"@ if unknown. ownerName :: String, -- | The owner group name. Should be set to @\"\"@ if unknown. groupName :: String, -- | Numeric owner user id. Should be set to @0@ if unknown. ownerId :: {-# UNPACK #-} !Int, -- | Numeric owner group id. Should be set to @0@ if unknown. groupId :: {-# UNPACK #-} !Int } deriving (Eq, Ord, Show) -- | There have been a number of extensions to the tar file format over the -- years. They all share the basic entry fields and put more meta-data in -- different extended headers. -- data Format = -- | This is the classic Unix V7 tar format. It does not support owner and -- group names, just numeric Ids. It also does not support device numbers. V7Format -- | The \"USTAR\" format is an extension of the classic V7 format. It was -- later standardised by POSIX. It has some restrictions but is the most -- portable format. -- | UstarFormat -- | The GNU tar implementation also extends the classic V7 format, though -- in a slightly different way from the USTAR format. In general for new -- archives the standard USTAR/POSIX should be used. -- | GnuFormat deriving (Eq, Ord, Show) instance NFData Entry where rnf (Entry _ c _ _ _ _) = rnf c instance NFData EntryContent where rnf x = case x of NormalFile c _ -> rnflbs c OtherEntryType _ c _ -> rnflbs c _ -> seq x () where #if MIN_VERSION_bytestring(0,10,0) rnflbs = rnf #else rnflbs = foldr (\ !_bs r -> r) () . LBS.toChunks #endif instance NFData Ownership where rnf (Ownership o g _ _) = rnf o `seq` rnf g -- | @rw-r--r--@ for normal files ordinaryFilePermissions :: Permissions ordinaryFilePermissions = 0o0644 -- | @rwxr-xr-x@ for executable files executableFilePermissions :: Permissions executableFilePermissions = 0o0755 -- | @rwxr-xr-x@ for directories directoryPermissions :: Permissions directoryPermissions = 0o0755 -- | An 'Entry' with all default values except for the file name and type. It -- uses the portable USTAR/POSIX format (see 'UstarHeader'). -- -- You can use this as a basis and override specific fields, eg: -- -- > (emptyEntry name HardLink) { linkTarget = target } -- simpleEntry :: TarPath -> EntryContent -> Entry simpleEntry tarpath content = Entry { entryTarPath = tarpath, entryContent = content, entryPermissions = case content of Directory -> directoryPermissions _ -> ordinaryFilePermissions, entryOwnership = Ownership "" "" 0 0, entryTime = 0, entryFormat = UstarFormat } -- | A tar 'Entry' for a file. -- -- Entry fields such as file permissions and ownership have default values. -- -- You can use this as a basis and override specific fields. For example if you -- need an executable file you could use: -- -- > (fileEntry name content) { fileMode = executableFileMode } -- fileEntry :: TarPath -> LBS.ByteString -> Entry fileEntry name fileContent = simpleEntry name (NormalFile fileContent (LBS.length fileContent)) -- | A tar 'Entry' for a directory. -- -- Entry fields such as file permissions and ownership have default values. -- directoryEntry :: TarPath -> Entry directoryEntry name = simpleEntry name Directory -- -- * Tar paths -- -- | The classic tar format allowed just 100 characters for the file name. The -- USTAR format extended this with an extra 155 characters, however it uses a -- complex method of splitting the name between the two sections. -- -- Instead of just putting any overflow into the extended area, it uses the -- extended area as a prefix. The aggravating insane bit however is that the -- prefix (if any) must only contain a directory prefix. That is the split -- between the two areas must be on a directory separator boundary. So there is -- no simple calculation to work out if a file name is too long. Instead we -- have to try to find a valid split that makes the name fit in the two areas. -- -- The rationale presumably was to make it a bit more compatible with old tar -- programs that only understand the classic format. A classic tar would be -- able to extract the file name and possibly some dir prefix, but not the -- full dir prefix. So the files would end up in the wrong place, but that's -- probably better than ending up with the wrong names too. -- -- So it's understandable but rather annoying. -- -- * Tar paths use Posix format (ie @\'/\'@ directory separators), irrespective -- of the local path conventions. -- -- * The directory separator between the prefix and name is /not/ stored. -- data TarPath = TarPath {-# UNPACK #-} !BS.ByteString -- path name, 100 characters max. {-# UNPACK #-} !BS.ByteString -- path prefix, 155 characters max. deriving (Eq, Ord) instance NFData TarPath where rnf (TarPath _ _) = () -- fully strict by construction instance Show TarPath where show = show . fromTarPath -- | Convert a 'TarPath' to a native 'FilePath'. -- -- The native 'FilePath' will use the native directory separator but it is not -- otherwise checked for validity or sanity. In particular: -- -- * The tar path may be invalid as a native path, eg the file name @\"nul\"@ -- is not valid on Windows. -- -- * The tar path may be an absolute path or may contain @\"..\"@ components. -- For security reasons this should not usually be allowed, but it is your -- responsibility to check for these conditions (eg using 'checkSecurity'). -- fromTarPath :: TarPath -> FilePath fromTarPath (TarPath namebs prefixbs) = adjustDirectory $ FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where name = BS.Char8.unpack namebs prefix = BS.Char8.unpack prefixbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'. -- -- The difference compared to 'fromTarPath' is that it always returns a Unix -- style path irrespective of the current operating system. -- -- This is useful to check how a 'TarPath' would be interpreted on a specific -- operating system, eg to perform portability checks. -- fromTarPathToPosixPath :: TarPath -> FilePath fromTarPathToPosixPath (TarPath namebs prefixbs) = adjustDirectory $ FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where name = BS.Char8.unpack namebs prefix = BS.Char8.unpack prefixbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Posix.addTrailingPathSeparator | otherwise = id -- | Convert a 'TarPath' to a Windows 'FilePath'. -- -- The only difference compared to 'fromTarPath' is that it always returns a -- Windows style path irrespective of the current operating system. -- -- This is useful to check how a 'TarPath' would be interpreted on a specific -- operating system, eg to perform portability checks. -- fromTarPathToWindowsPath :: TarPath -> FilePath fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $ FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where name = BS.Char8.unpack namebs prefix = BS.Char8.unpack prefixbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Windows.addTrailingPathSeparator | otherwise = id -- | Convert a native 'FilePath' to a 'TarPath'. -- -- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a -- description of the problem with splitting long 'FilePath's. -- toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for -- directories a 'TarPath' must always use a trailing @\/@. -> FilePath -> Either String TarPath toTarPath isDir = splitLongPath . addTrailingSep . FilePath.Posix.joinPath . FilePath.Native.splitDirectories where addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator | otherwise = id -- | Take a sanitised path, split on directory separators and try to pack it -- into the 155 + 100 tar file name format. -- -- The strategy is this: take the name-directory components in reverse order -- and try to fit as many components into the 100 long name area as possible. -- If all the remaining components fit in the 155 name area then we win. -- splitLongPath :: FilePath -> Either String TarPath splitLongPath path = case packName nameMax (reverse (FilePath.Posix.splitPath path)) of Left err -> Left err Right (name, []) -> Right $! TarPath (BS.Char8.pack name) BS.empty Right (name, first:rest) -> case packName prefixMax remainder of Left err -> Left err Right (_ , (_:_)) -> Left "File name too long (cannot split)" Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name) (BS.Char8.pack prefix) where -- drop the '/' between the name and prefix: remainder = init first : rest where nameMax, prefixMax :: Int nameMax = 100 prefixMax = 155 packName _ [] = Left "File name empty" packName maxLen (c:cs) | n > maxLen = Left "File name too long" | otherwise = Right (packName' maxLen n [c] cs) where n = length c packName' maxLen n ok (c:cs) | n' <= maxLen = packName' maxLen n' (c:ok) cs where n' = n + length c packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs) -- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and -- 'HardLink' entry types. -- newtype LinkTarget = LinkTarget BS.ByteString deriving (Eq, Ord, Show) instance NFData LinkTarget where #if MIN_VERSION_bytestring(0,10,0) rnf (LinkTarget bs) = rnf bs #else rnf (LinkTarget !_bs) = () #endif -- | Convert a native 'FilePath' to a tar 'LinkTarget'. This may fail if the -- string is longer than 100 characters or if it contains non-portable -- characters. -- toLinkTarget :: FilePath -> Maybe LinkTarget toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path) | otherwise = Nothing -- | Convert a tar 'LinkTarget' to a native 'FilePath'. -- fromLinkTarget :: LinkTarget -> FilePath fromLinkTarget (LinkTarget pathbs) = adjustDirectory $ FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path where path = BS.Char8.unpack pathbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a tar 'LinkTarget' to a Unix/Posix 'FilePath'. -- fromLinkTargetToPosixPath :: LinkTarget -> FilePath fromLinkTargetToPosixPath (LinkTarget pathbs) = adjustDirectory $ FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories path where path = BS.Char8.unpack pathbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a tar 'LinkTarget' to a Windows 'FilePath'. -- fromLinkTargetToWindowsPath :: LinkTarget -> FilePath fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $ FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories path where path = BS.Char8.unpack pathbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Windows.addTrailingPathSeparator | otherwise = id -- -- * Entries type -- -- | A tar archive is a sequence of entries. -- -- The point of this type as opposed to just using a list is that it makes the -- failure case explicit. We need this because the sequence of entries we get -- from reading a tarball can include errors. -- -- It is a concrete data type so you can manipulate it directly but it is often -- clearer to use the provided functions for mapping, folding and unfolding. -- -- Converting from a list can be done with just @foldr Next Done@. Converting -- back into a list can be done with 'foldEntries' however in that case you -- must be prepared to handle the 'Fail' case inherent in the 'Entries' type. -- -- The 'Monoid' instance lets you concatenate archives or append entries to an -- archive. -- data Entries e = Next Entry (Entries e) | Done | Fail e deriving (Eq, Show) infixr 5 `Next` -- | This is like the standard 'unfoldr' function on lists, but for 'Entries'. -- It includes failure as an extra possibility that the stepper function may -- return. -- -- It can be used to generate 'Entries' from some other type. For example it is -- used internally to lazily unfold entries from a 'LBS.ByteString'. -- unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e unfoldEntries f = unfold where unfold x = case f x of Left err -> Fail err Right Nothing -> Done Right (Just (e, x')) -> Next e (unfold x') -- | This is like the standard 'foldr' function on lists, but for 'Entries'. -- Compared to 'foldr' it takes an extra function to account for the -- possibility of failure. -- -- This is used to consume a sequence of entries. For example it could be used -- to scan a tarball for problems or to collect an index of the contents. -- foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a foldEntries next done fail' = fold where fold (Next e es) = next e (fold es) fold Done = done fold (Fail err) = fail' err -- | A 'foldl'-like function on Entries. It either returns the final -- accumulator result, or the failure along with the intermediate accumulator -- value. -- foldlEntries :: (a -> Entry -> a) -> a -> Entries e -> Either (e, a) a foldlEntries f z = go z where go !acc (Next e es) = go (f acc e) es go !acc Done = Right acc go !acc (Fail err) = Left (err, acc) -- | This is like the standard 'map' function on lists, but for 'Entries'. It -- includes failure as a extra possible outcome of the mapping function. -- -- If your mapping function cannot fail it may be more convenient to use -- 'mapEntriesNoFail' mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e') mapEntries f = foldEntries (\entry rest -> either (Fail . Right) (flip Next rest) (f entry)) Done (Fail . Left) -- | Like 'mapEntries' but the mapping function itself cannot fail. -- mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e mapEntriesNoFail f = foldEntries (\entry -> Next (f entry)) Done Fail -- | @since 0.5.1.0 instance Sem.Semigroup (Entries e) where a <> b = foldEntries Next b Fail a instance Monoid (Entries e) where mempty = Done mappend = (Sem.<>) instance Functor Entries where fmap f = foldEntries Next Done (Fail . f) instance NFData e => NFData (Entries e) where rnf (Next e es) = rnf e `seq` rnf es rnf Done = () rnf (Fail e) = rnf e ------------------------- -- QuickCheck instances -- #ifdef TESTS instance Arbitrary Entry where arbitrary = Entry <$> arbitrary <*> arbitrary <*> arbitraryPermissions <*> arbitrary <*> arbitraryEpochTime <*> arbitrary where arbitraryPermissions :: Gen Permissions arbitraryPermissions = fromIntegral <$> (arbitrary :: Gen Word16) arbitraryEpochTime :: Gen EpochTime arbitraryEpochTime = arbitraryOctal 11 shrink (Entry path content perms author time format) = [ Entry path' content' perms author' time' format | (path', content', author', time') <- shrink (path, content, author, time) ] ++ [ Entry path content perms' author time format | perms' <- shrinkIntegral perms ] instance Arbitrary TarPath where arbitrary = either error id . toTarPath False . FilePath.Posix.joinPath <$> listOf1ToN (255 `div` 5) (elements (map (replicate 4) "abcd")) shrink = map (either error id . toTarPath False) . map FilePath.Posix.joinPath . filter (not . null) . shrinkList shrinkNothing . FilePath.Posix.splitPath . fromTarPathToPosixPath instance Arbitrary LinkTarget where arbitrary = maybe (error "link target too large") id . toLinkTarget . FilePath.Native.joinPath <$> listOf1ToN (100 `div` 5) (elements (map (replicate 4) "abcd")) shrink = map (maybe (error "link target too large") id . toLinkTarget) . map FilePath.Posix.joinPath . filter (not . null) . shrinkList shrinkNothing . FilePath.Posix.splitPath . fromLinkTargetToPosixPath listOf1ToN :: Int -> Gen a -> Gen [a] listOf1ToN n g = sized $ \sz -> do n <- choose (1, min n (max 1 sz)) vectorOf n g listOf0ToN :: Int -> Gen a -> Gen [a] listOf0ToN n g = sized $ \sz -> do n <- choose (0, min n sz) vectorOf n g instance Arbitrary EntryContent where arbitrary = frequency [ (16, do bs <- arbitrary; return (NormalFile bs (LBS.length bs))) , (2, pure Directory) , (1, SymbolicLink <$> arbitrary) , (1, HardLink <$> arbitrary) , (1, CharacterDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7) , (1, BlockDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7) , (1, pure NamedPipe) , (1, do c <- elements (['A'..'Z']++['a'..'z']) bs <- arbitrary; return (OtherEntryType c bs (LBS.length bs))) ] shrink (NormalFile bs _) = [ NormalFile bs' (LBS.length bs') | bs' <- shrink bs ] shrink Directory = [] shrink (SymbolicLink link) = [ SymbolicLink link' | link' <- shrink link ] shrink (HardLink link) = [ HardLink link' | link' <- shrink link ] shrink (CharacterDevice ma mi) = [ CharacterDevice ma' mi' | (ma', mi') <- shrink (ma, mi) ] shrink (BlockDevice ma mi) = [ BlockDevice ma' mi' | (ma', mi') <- shrink (ma, mi) ] shrink NamedPipe = [] shrink (OtherEntryType c bs _) = [ OtherEntryType c bs' (LBS.length bs') | bs' <- shrink bs ] instance Arbitrary LBS.ByteString where arbitrary = fmap LBS.pack arbitrary shrink = map LBS.pack . shrink . LBS.unpack instance Arbitrary BS.ByteString where arbitrary = fmap BS.pack arbitrary shrink = map BS.pack . shrink . BS.unpack instance Arbitrary Ownership where arbitrary = Ownership <$> name <*> name <*> idno <*> idno where -- restrict user/group to posix ^[a-z][-a-z0-9]{0,30}$ name = do first <- choose ('a', 'z') rest <- listOf0ToN 30 (oneof [choose ('a', 'z'), choose ('0', '9'), pure '-']) return $ first : rest idno = arbitraryOctal 7 shrink (Ownership oname gname oid gid) = [ Ownership oname' gname' oid' gid' | (oname', gname', oid', gid') <- shrink (oname, gname, oid, gid) ] instance Arbitrary Format where arbitrary = elements [V7Format, UstarFormat, GnuFormat] --arbitraryOctal :: (Integral n, Random n) => Int -> Gen n arbitraryOctal n = oneof [ pure 0 , choose (0, upperBound) , pure upperBound ] where upperBound = 8^n-1 -- For QC tests it's useful to have a way to limit the info to that which can -- be expressed in the old V7 format limitToV7FormatCompat :: Entry -> Entry limitToV7FormatCompat entry@Entry { entryFormat = V7Format } = entry { entryContent = case entryContent entry of CharacterDevice _ _ -> OtherEntryType '3' LBS.empty 0 BlockDevice _ _ -> OtherEntryType '4' LBS.empty 0 Directory -> OtherEntryType '5' LBS.empty 0 NamedPipe -> OtherEntryType '6' LBS.empty 0 other -> other, entryOwnership = (entryOwnership entry) { groupName = "", ownerName = "" }, entryTarPath = let TarPath name _prefix = entryTarPath entry in TarPath name BS.empty } limitToV7FormatCompat entry = entry #endif tar-0.5.1.1/Codec/Archive/Tar/Unpack.hs0000644000000000000000000001113107346545000015531 0ustar0000000000000000{-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009, 2012, 2016 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Unpack ( unpack, ) where import Codec.Archive.Tar.Types import Codec.Archive.Tar.Check import qualified Data.ByteString.Lazy as BS import System.FilePath ( () ) import qualified System.FilePath as FilePath.Native ( takeDirectory ) import System.Directory ( createDirectoryIfMissing, copyFile ) import Control.Exception ( Exception, throwIO ) #if MIN_VERSION_directory(1,2,3) import System.Directory ( setModificationTime ) import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) import Control.Exception as Exception ( catch ) import System.IO.Error ( isPermissionError ) #endif -- | Create local files and directories based on the entries of a tar archive. -- -- This is a portable implementation of unpacking suitable for portable -- archives. It handles 'NormalFile' and 'Directory' entries and has simulated -- support for 'SymbolicLink' and 'HardLink' entries. Links are implemented by -- copying the target file. This therefore works on Windows as well as Unix. -- All other entry types are ignored, that is they are not unpacked and no -- exception is raised. -- -- If the 'Entries' ends in an error then it is raised an an exception. Any -- files or directories that have been unpacked before the error was -- encountered will not be deleted. For this reason you may want to unpack -- into an empty directory so that you can easily clean up if unpacking fails -- part-way. -- -- On its own, this function only checks for security (using 'checkSecurity'). -- You can do other checks by applying checking functions to the 'Entries' that -- you pass to this function. For example: -- -- > unpack dir (checkTarbomb expectedDir entries) -- -- If you care about the priority of the reported errors then you may want to -- use 'checkSecurity' before 'checkTarbomb' or other checks. -- unpack :: Exception e => FilePath -> Entries e -> IO () unpack baseDir entries = unpackEntries [] (checkSecurity entries) >>= emulateLinks where -- We're relying here on 'checkSecurity' to make sure we're not scribbling -- files all over the place. unpackEntries _ (Fail err) = either throwIO throwIO err unpackEntries links Done = return links unpackEntries links (Next entry es) = case entryContent entry of NormalFile file _ -> extractFile path file mtime >> unpackEntries links es Directory -> extractDir path mtime >> unpackEntries links es HardLink link -> (unpackEntries $! saveLink path link links) es SymbolicLink link -> (unpackEntries $! saveLink path link links) es _ -> unpackEntries links es --ignore other file types where path = entryPath entry mtime = entryTime entry extractFile path content mtime = do -- Note that tar archives do not make sure each directory is created -- before files they contain, indeed we may have to create several -- levels of directory. createDirectoryIfMissing True absDir BS.writeFile absPath content setModTime absPath mtime where absDir = baseDir FilePath.Native.takeDirectory path absPath = baseDir path extractDir path mtime = do createDirectoryIfMissing True absPath setModTime absPath mtime where absPath = baseDir path saveLink path link links = seq (length path) $ seq (length link') $ (path, link'):links where link' = fromLinkTarget link emulateLinks = mapM_ $ \(relPath, relLinkTarget) -> let absPath = baseDir relPath absTarget = FilePath.Native.takeDirectory absPath relLinkTarget in copyFile absTarget absPath setModTime :: FilePath -> EpochTime -> IO () #if MIN_VERSION_directory(1,2,3) -- functionality only supported as of directory-1.2.3.x setModTime path t = setModificationTime path (posixSecondsToUTCTime (fromIntegral t)) `Exception.catch` \e -> if isPermissionError e then return () else throwIO e #else setModTime _path _t = return () #endif tar-0.5.1.1/Codec/Archive/Tar/Write.hs0000644000000000000000000001111707346545000015406 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar.Write -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Write (write) where import Codec.Archive.Tar.Types import Data.Char (ord) import Data.List (foldl') import Data.Monoid (mempty) import Numeric (showOct) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy.Char8 as LBS.Char8 -- | Create the external representation of a tar archive by serialising a list -- of tar entries. -- -- * The conversion is done lazily. -- write :: [Entry] -> LBS.ByteString write es = LBS.concat $ map putEntry es ++ [LBS.replicate (512*2) 0] putEntry :: Entry -> LBS.ByteString putEntry entry = case entryContent entry of NormalFile content size -> LBS.concat [ header, content, padding size ] OtherEntryType _ content size -> LBS.concat [ header, content, padding size ] _ -> header where header = putHeader entry padding size = LBS.replicate paddingSize 0 where paddingSize = fromIntegral (negate size `mod` 512) putHeader :: Entry -> LBS.ByteString putHeader entry = LBS.Char8.pack $ take 148 block ++ putOct 7 checksum ++ ' ' : drop 156 block -- ++ putOct 8 checksum -- ++ drop 156 block where block = putHeaderNoChkSum entry checksum = foldl' (\x y -> x + ord y) 0 block putHeaderNoChkSum :: Entry -> String putHeaderNoChkSum Entry { entryTarPath = TarPath name prefix, entryContent = content, entryPermissions = permissions, entryOwnership = ownership, entryTime = modTime, entryFormat = format } = concat [ putBString 100 $ name , putOct 8 $ permissions , putOct 8 $ ownerId ownership , putOct 8 $ groupId ownership , putOct 12 $ contentSize , putOct 12 $ modTime , fill 8 $ ' ' -- dummy checksum , putChar8 $ typeCode , putBString 100 $ linkTarget ] ++ case format of V7Format -> fill 255 '\NUL' UstarFormat -> concat [ putBString 8 $ ustarMagic , putString 32 $ ownerName ownership , putString 32 $ groupName ownership , putOct 8 $ deviceMajor , putOct 8 $ deviceMinor , putBString 155 $ prefix , fill 12 $ '\NUL' ] GnuFormat -> concat [ putBString 8 $ gnuMagic , putString 32 $ ownerName ownership , putString 32 $ groupName ownership , putGnuDev 8 $ deviceMajor , putGnuDev 8 $ deviceMinor , putBString 155 $ prefix , fill 12 $ '\NUL' ] where (typeCode, contentSize, linkTarget, deviceMajor, deviceMinor) = case content of NormalFile _ size -> ('0' , size, mempty, 0, 0) Directory -> ('5' , 0, mempty, 0, 0) SymbolicLink (LinkTarget link) -> ('2' , 0, link, 0, 0) HardLink (LinkTarget link) -> ('1' , 0, link, 0, 0) CharacterDevice major minor -> ('3' , 0, mempty, major, minor) BlockDevice major minor -> ('4' , 0, mempty, major, minor) NamedPipe -> ('6' , 0, mempty, 0, 0) OtherEntryType code _ size -> (code, size, mempty, 0, 0) putGnuDev w n = case content of CharacterDevice _ _ -> putOct w n BlockDevice _ _ -> putOct w n _ -> replicate w '\NUL' ustarMagic, gnuMagic :: BS.ByteString ustarMagic = BS.Char8.pack "ustar\NUL00" gnuMagic = BS.Char8.pack "ustar \NUL" -- * TAR format primitive output type FieldWidth = Int putBString :: FieldWidth -> BS.ByteString -> String putBString n s = BS.Char8.unpack (BS.take n s) ++ fill (n - BS.length s) '\NUL' putString :: FieldWidth -> String -> String putString n s = take n s ++ fill (n - length s) '\NUL' --TODO: check integer widths, eg for large file sizes putOct :: (Integral a, Show a) => FieldWidth -> a -> String putOct n x = let octStr = take (n-1) $ showOct x "" in fill (n - length octStr - 1) '0' ++ octStr ++ putChar8 '\NUL' putChar8 :: Char -> String putChar8 c = [c] fill :: FieldWidth -> Char -> String fill n c = replicate n c tar-0.5.1.1/LICENSE0000644000000000000000000000305507346545000011643 0ustar0000000000000000Copyright (c) 2007 Björn Bringert, 2008-2015 Duncan Coutts, 2011 Max Bolingbroke All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the names of the copyright owners nor the names of the contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. tar-0.5.1.1/Setup.lhs0000644000000000000000000000010307346545000012435 0ustar0000000000000000> import Distribution.Simple > main :: IO () > main = defaultMain tar-0.5.1.1/bench/0000755000000000000000000000000007346545000011712 5ustar0000000000000000tar-0.5.1.1/bench/Main.hs0000644000000000000000000000210407346545000013127 0ustar0000000000000000module Main where import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Index as TarIndex import qualified Data.ByteString.Lazy as BS import Control.Exception import Criterion import Criterion.Main main = defaultMain benchmarks benchmarks :: [Benchmark] benchmarks = [ env loadTarFile $ \tarfile -> bench "read" (nf Tar.read tarfile) , env loadTarEntriesList $ \entries -> bench "write" (nf Tar.write entries) , env loadTarEntries $ \entries -> bench "index build" (nf TarIndex.build entries) , env loadTarIndex $ \entries -> bench "index rebuild" (nf (TarIndex.finalise . TarIndex.unfinalise) entries) ] loadTarFile :: IO BS.ByteString loadTarFile = BS.readFile "01-index.tar" loadTarEntries :: IO (Tar.Entries Tar.FormatError) loadTarEntries = fmap Tar.read loadTarFile loadTarEntriesList :: IO [Tar.Entry] loadTarEntriesList = fmap (Tar.foldEntries (:) [] throw) loadTarEntries loadTarIndex :: IO TarIndex.TarIndex loadTarIndex = fmap (either throw id . TarIndex.build) loadTarEntries tar-0.5.1.1/changelog.md0000755000000000000000000000534007346545000013111 0ustar0000000000000000See also http://pvp.haskell.org/faq 0.5.1.1 Herbert Valerio Riedel March 2018 * Add support for GHC 8.8.1 / base-4.13 0.5.1.0 Herbert Valerio Riedel March 2018 * Add support for GHC 8.4.1 / base-4.11 * Add `Semigroup` instance for `Entries` 0.5.0.3 Duncan Coutts May 2016 * Fix tarbomb logic to ignore special PAX entries. Was breaking many valid tarballs. https://github.com/haskell/cabal/issues/3390 0.5.0.2 Duncan Coutts April 2016 * Fix compatability when using ghc-7.4.x and directory >= 1.2.3 0.5.0.1 Duncan Coutts January 2016 * Fix compatability with directory-1.2.3+ 0.5.0.0 Duncan Coutts January 2016 * Work with old version of bytestring (using bytestring-builder package). * Builds with GHC 6.10 -- 8.0. * Change type of Index.serialise to be simply strict bytestring. * Preserve file timestamps on unpack (with directory-1.2.3+) 0.4.5.0 Duncan Coutts January 2016 * Revert accidental minor API change in 0.4.x series (the type of the owner and group name strings). The 0.4.3.0 and 0.4.4.0 releases contained the accidental API change. * Add a handy foldlEntries function 0.4.4.0 Duncan Coutts January 2016 * Build and warning fixes for GHC 7.10 and 8.0 * New Index module function `toList` to get all index entries 0.4.3.0 Duncan Coutts January 2016 * New Index function `unfinalise` to extend existing index * 9x faster reading * 9x faster index construction * 24x faster index extension * More compact entry types, using ByteStrings * More Eq and Show instances * Greater QC test coverage * Fix minor bug in reading non-standard v7 format entries 0.4.2.2 Edsko de Vries October 2015 * Fix bug in Index 0.4.2.1 Duncan Coutts July 2015 * Fix tests for the Index modules (the code was right) 0.4.2.0 Duncan Coutts July 2015 * New Index module for random access to tar file contents * New lower level tar file I/O actions * New tarball file 'append' action 0.4.1.0 Duncan Coutts January 2015 * Build with GHC 7.10 * Switch from old-time to time package * Added more instance for Entries type 0.4.0.1 Duncan Coutts October 2012 * fixes to work with directory 1.2 * More Eq/Ord instances 0.4.0.0 Duncan Coutts February 2012 * More explicit error types and error handling * Support star base-256 number format * Improved API documentation tar-0.5.1.1/tar.cabal0000644000000000000000000001205007346545000012403 0ustar0000000000000000cabal-version: 1.12 name: tar version: 0.5.1.1 license: BSD3 license-file: LICENSE author: Duncan Coutts Bjorn Bringert maintainer: Duncan Coutts bug-reports: https://github.com/haskell/tar/issues copyright: 2007 Bjorn Bringert 2008-2016 Duncan Coutts category: Codec synopsis: Reading, writing and manipulating ".tar" archive files. description: This library is for working with \"@.tar@\" archive files. It can read and write a range of common variations of archive format including V7, POSIX USTAR and GNU formats. . It provides support for packing and unpacking portable archives. This makes it suitable for distribution but not backup because details like file ownership and exact permissions are not preserved. . It also provides features for random access to archive content using an index. build-type: Simple extra-source-files: changelog.md tested-with: GHC==7.0.4, GHC==7.2.2, GHC==7.4.2, GHC==7.6.3, GHC==7.8.4, GHC==7.10.3, GHC==8.0.2, GHC==8.2.2, GHC==8.4.1 source-repository head type: git location: https://github.com/haskell/tar.git flag old-time default: False flag old-bytestring default: False library build-depends: base >= 4 && < 4.14, filepath < 1.5, array < 0.6, containers >= 0.2 && < 0.7, deepseq >= 1.1 && < 1.5 if flag(old-time) build-depends: directory < 1.2, old-time < 1.2 else build-depends: directory >= 1.2 && < 1.4, time < 1.10 if flag(old-bytestring) build-depends: bytestring-builder >= 0.10.4.0.2 && < 0.11, bytestring == 0.9.* else build-depends: bytestring == 0.10.* if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* exposed-modules: Codec.Archive.Tar Codec.Archive.Tar.Entry Codec.Archive.Tar.Check Codec.Archive.Tar.Index other-modules: Codec.Archive.Tar.Types Codec.Archive.Tar.Read Codec.Archive.Tar.Write Codec.Archive.Tar.Pack Codec.Archive.Tar.Unpack Codec.Archive.Tar.Index.StringTable Codec.Archive.Tar.Index.IntTrie default-language: Haskell2010 -- Previously, the package used GHC's default Haskell mode which implies -- NDI; so we keep it transitionally enabled here until we've reviewed the -- code to make sure there isn't any code relies on NDI and keeps compiling -- albeit with different semantics even without NDI default-extensions: NondecreasingIndentation other-extensions: BangPatterns CPP DeriveDataTypeable GeneralizedNewtypeDeriving PatternGuards ScopedTypeVariables ghc-options: -Wall -fno-warn-unused-imports test-suite properties type: exitcode-stdio-1.0 build-depends: base, filepath, array, containers, deepseq, bytestring-handle, QuickCheck == 2.*, tasty >= 0.10 && <0.12, tasty-quickcheck == 0.8.* if flag(old-time) build-depends: directory < 1.2, old-time else build-depends: directory >= 1.2, time if flag(old-bytestring) build-depends: bytestring-builder, bytestring >= 0.9 && <0.10 else build-depends: bytestring >= 0.10 if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* default-language: Haskell2010 hs-source-dirs: . test main-is: test/Properties.hs cpp-options: -DTESTS other-modules: Codec.Archive.Tar.Index Codec.Archive.Tar.Index.StringTable Codec.Archive.Tar.Index.IntTrie -- shared w/ lib:tar component other-modules: Codec.Archive.Tar Codec.Archive.Tar.Check Codec.Archive.Tar.Pack Codec.Archive.Tar.Read Codec.Archive.Tar.Types Codec.Archive.Tar.Unpack Codec.Archive.Tar.Write other-extensions: CPP BangPatterns, DeriveDataTypeable ScopedTypeVariables ghc-options: -fno-ignore-asserts benchmark bench type: exitcode-stdio-1.0 hs-source-dirs: . bench main-is: bench/Main.hs build-depends: base, bytestring >= 0.10, filepath, directory >= 1.2, array, containers, deepseq, time, criterion >= 1.0 if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* default-language: Haskell2010 -- shared w/ lib:tar component other-modules: Codec.Archive.Tar Codec.Archive.Tar.Check Codec.Archive.Tar.Index Codec.Archive.Tar.Index.IntTrie Codec.Archive.Tar.Index.StringTable Codec.Archive.Tar.Pack Codec.Archive.Tar.Read Codec.Archive.Tar.Types Codec.Archive.Tar.Unpack Codec.Archive.Tar.Write tar-0.5.1.1/test/0000755000000000000000000000000007346545000011612 5ustar0000000000000000tar-0.5.1.1/test/Properties.hs0000644000000000000000000000400107346545000014275 0ustar0000000000000000module Main where import qualified Codec.Archive.Tar.Index as Index import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie import qualified Codec.Archive.Tar.Index.StringTable as StringTable import qualified Codec.Archive.Tar as Tar import qualified Data.ByteString as BS import Test.Tasty import Test.Tasty.QuickCheck main :: IO () main = defaultMain $ testGroup "tar tests" [ testGroup "write/read" [ testProperty "ustar format" Tar.prop_write_read_ustar, testProperty "gnu format" Tar.prop_write_read_gnu, testProperty "v7 format" Tar.prop_write_read_v7 ] , testGroup "string table" [ testProperty "construction" StringTable.prop_valid, testProperty "sorted" StringTable.prop_sorted, testProperty "serialise" StringTable.prop_serialise_deserialise, testProperty "size" StringTable.prop_serialiseSize, testProperty "unfinalise" StringTable.prop_finalise_unfinalise ] , testGroup "int trie" [ testProperty "unit 1" IntTrie.test1, testProperty "unit 2" IntTrie.test2, testProperty "unit 3" IntTrie.test3, testProperty "lookups" IntTrie.prop_lookup_mono, testProperty "completions" IntTrie.prop_completions_mono, testProperty "toList" IntTrie.prop_construct_toList, testProperty "serialise" IntTrie.prop_serialise_deserialise, testProperty "size" IntTrie.prop_serialiseSize, testProperty "unfinalise" IntTrie.prop_finalise_unfinalise ] , testGroup "index" [ testProperty "lookup" Index.prop_lookup, testProperty "valid" Index.prop_valid, testProperty "toList" Index.prop_toList, testProperty "serialise" Index.prop_serialise_deserialise, testProperty "size" Index.prop_serialiseSize, testProperty "matches tar" Index.prop_index_matches_tar, testProperty "unfinalise" Index.prop_finalise_unfinalise ] ]