cassava-0.4.4.0/0000755000000000000000000000000012570632764011500 5ustar0000000000000000cassava-0.4.4.0/cassava.cabal0000644000000000000000000000556312570632764014116 0ustar0000000000000000Name: cassava Version: 0.4.4.0 Synopsis: A CSV parsing and encoding library Description: A CSV parsing and encoding library optimized for ease of use and high performance. Homepage: https://github.com/tibbe/cassava License: BSD3 License-file: LICENSE Bug-reports: https://github.com/tibbe/cassava/issues Copyright: (c) 2012 Johan Tibell (c) 2012 Bryan O'Sullivan (c) 2011 MailRank, Inc. Author: Johan Tibell Maintainer: johan.tibell@gmail.com Category: Text, Web, CSV Build-type: Simple Cabal-version: >=1.8 Extra-source-files: examples/*.hs Tested-with: GHC == 7.10.2, GHC == 7.8.4, GHC == 7.6.3 Library Exposed-modules: Data.Csv Data.Csv.Builder Data.Csv.Incremental Data.Csv.Parser Data.Csv.Streaming Other-modules: Data.Csv.Compat.Monoid Data.Csv.Conversion Data.Csv.Conversion.Internal Data.Csv.Encoding Data.Csv.Types Data.Csv.Util Build-depends: array < 0.6, attoparsec >= 0.10.2 && < 0.14, base >= 4.5 && < 5, blaze-builder < 0.5, bytestring < 0.11, containers < 0.6, deepseq < 1.5, hashable < 1.3, text < 1.3, unordered-containers < 0.3, vector < 0.12 ghc-options: -Wall -O2 -- GHC.Generics lived in `ghc-prim` for GHC 7.2 & GHC 7.4 only if impl(ghc < 7.6) build-depends: ghc-prim == 0.2.* Test-suite unit-tests Type: exitcode-stdio-1.0 Main-is: UnitTests.hs Build-depends: attoparsec, base >= 4.5, bytestring, cassava, hashable < 1.3, HUnit, QuickCheck >= 2.0, test-framework, test-framework-hunit, test-framework-quickcheck2, text, unordered-containers, vector hs-source-dirs: tests ghc-options: -Wall Benchmark benchmarks Type: exitcode-stdio-1.0 Main-is: Benchmarks.hs Other-modules: Data.Csv Data.Csv.Compat.Monoid Data.Csv.Conversion Data.Csv.Conversion.Internal Data.Csv.Encoding Data.Csv.Incremental Data.Csv.Parser Data.Csv.Streaming Data.Csv.Types Data.Csv.Util Build-depends: array < 0.6, attoparsec >= 0.10.2 && < 0.14, base >= 4.5 && < 5, blaze-builder < 0.5, bytestring < 0.11, containers < 0.6, criterion >= 1.0, deepseq < 1.5, hashable < 1.3, lazy-csv >= 0.5, text < 1.3, text, unordered-containers < 0.3, vector < 0.12 ghc-options: -Wall -O2 if impl(ghc >= 7.2.1) cpp-options: -DGENERICS -- GHC.Generics lived in `ghc-prim` for GHC 7.2 & GHC 7.4 only if impl(ghc < 7.6) build-depends: ghc-prim == 0.2.* -- We cannot depend on the library directly as that creates a -- dependency cycle. hs-source-dirs: . benchmarks source-repository head type: git location: https://github.com/tibbe/cassava.git cassava-0.4.4.0/LICENSE0000644000000000000000000000276112570632764012513 0ustar0000000000000000Copyright (c)2012, Johan Tibell All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Johan Tibell nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cassava-0.4.4.0/Setup.hs0000644000000000000000000000005612570632764013135 0ustar0000000000000000import Distribution.Simple main = defaultMain cassava-0.4.4.0/benchmarks/0000755000000000000000000000000012570632764013615 5ustar0000000000000000cassava-0.4.4.0/benchmarks/Benchmarks.hs0000644000000000000000000001271112570632764016230 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings, RecordWildCards, TypeSynonymInstances #-} {-# OPTIONS_GHC -funbox-strict-fields #-} module Main ( main ) where import Control.Applicative import Control.Exception (evaluate) import Control.DeepSeq import Criterion.Main import Data.ByteString (ByteString) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.HashMap.Strict as HM import Control.Monad (mzero) import Data.Text (Text) import qualified Text.CSV.Lazy.ByteString as LazyCsv import Data.Vector (Vector) import qualified Data.Vector as V import Data.Csv import qualified Data.Csv.Streaming as Streaming #if !MIN_VERSION_bytestring(0,10,0) instance NFData (B.ByteString) where rnf !s = () #endif data President = President { presidency :: !Int , president :: !Text , wikipediaEntry :: !ByteString , tookOffice :: !ByteString , leftOffice :: !ByteString , party :: !Text , homeState :: !Text } instance NFData President where rnf (President {}) = () instance FromRecord President where parseRecord v | V.length v == 7 = President <$> v .!! 0 <*> v .!! 1 <*> v .!! 2 <*> v .!! 3 <*> v .!! 4 <*> v .!! 5 <*> v .!! 6 | otherwise = mzero -- | Unchecked version of '(.!)'. (.!!) :: FromField a => Record -> Int -> Parser a v .!! idx = parseField (V.unsafeIndex v idx) {-# INLINE (.!!) #-} infixl 9 .!! instance ToRecord President where toRecord (President {..}) = record [toField presidency, toField president, toField wikipediaEntry, toField tookOffice, toField leftOffice, toField party, toField homeState] instance FromNamedRecord President where parseNamedRecord m = President <$> m .: "Presidency" <*> m .: "President" <*> m .: "Wikipedia Entry" <*> m .: "Took office" <*> m .: "Left office" <*> m .: "Party" <*> m .: "Home State" instance ToNamedRecord President where toNamedRecord (President {..}) = namedRecord [ "Presidency" .= presidency , "President" .= president , "Wikipedia Entry" .= wikipediaEntry , "Took office" .= tookOffice , "Left office" .= leftOffice , "Party" .= party , "Home State" .= homeState ] fromStrict :: B.ByteString -> BL.ByteString fromStrict s = BL.fromChunks [s] type BSHashMap a = HM.HashMap B.ByteString a instance NFData LazyCsv.CSVField where rnf LazyCsv.CSVField {} = () rnf LazyCsv.CSVFieldError {} = () instance NFData LazyCsv.CSVError where rnf (LazyCsv.IncorrectRow !_ !_ !_ xs) = rnf xs rnf (LazyCsv.BlankLine _ _ _ field) = rnf field rnf (LazyCsv.FieldError field) = rnf field rnf (LazyCsv.DuplicateHeader _ _ s) = rnf s rnf LazyCsv.NoData = () main :: IO () main = do !csvData <- fromStrict `fmap` B.readFile "benchmarks/presidents.csv" !csvDataN <- fromStrict `fmap` B.readFile "benchmarks/presidents_with_header.csv" let (Right !presidents) = V.toList <$> decodePresidents csvData (Right (!hdr, !presidentsNV)) = decodePresidentsN csvDataN !presidentsN = V.toList presidentsNV evaluate (rnf [presidents, presidentsN]) defaultMain [ bgroup "positional" [ bgroup "decode" [ bench "presidents/without conversion" $ whnf idDecode csvData , bench "presidents/with conversion" $ whnf decodePresidents csvData , bgroup "streaming" [ bench "presidents/without conversion" $ nf idDecodeS csvData , bench "presidents/with conversion" $ nf decodePresidentsS csvData ] ] , bgroup "encode" [ bench "presidents/with conversion" $ whnf encode presidents ] ] , bgroup "named" [ bgroup "decode" [ bench "presidents/without conversion" $ whnf idDecodeN csvDataN , bench "presidents/with conversion" $ whnf decodePresidentsN csvDataN ] , bgroup "encode" [ bench "presidents/with conversion" $ whnf (encodeByName hdr) presidentsN ] ] , bgroup "comparison" [ bench "lazy-csv" $ nf LazyCsv.parseCSV csvData ] ] where decodePresidents :: BL.ByteString -> Either String (Vector President) decodePresidents = decode NoHeader decodePresidentsN :: BL.ByteString -> Either String (Header, Vector President) decodePresidentsN = decodeByName decodePresidentsS :: BL.ByteString -> Streaming.Records President decodePresidentsS = Streaming.decode NoHeader idDecode :: BL.ByteString -> Either String (Vector (Vector B.ByteString)) idDecode = decode NoHeader idDecodeN :: BL.ByteString -> Either String (Header, Vector (BSHashMap B.ByteString)) idDecodeN = decodeByName idDecodeS :: BL.ByteString -> Streaming.Records (Vector B.ByteString) idDecodeS = Streaming.decode NoHeader cassava-0.4.4.0/Data/0000755000000000000000000000000012570632764012351 5ustar0000000000000000cassava-0.4.4.0/Data/Csv.hs0000644000000000000000000002313312570632764013442 0ustar0000000000000000-- | This module implements encoding and decoding of CSV data. The -- implementation is RFC 4180 compliant, with the following -- extensions: -- -- * Empty lines are ignored. -- -- * Non-escaped fields may contain any characters except -- double-quotes, commas, carriage returns, and newlines. -- -- * Escaped fields may contain any characters (but double-quotes -- need to be escaped). module Data.Csv ( -- * Usage examples -- $example -- ** Encoding and decoding custom data types -- $example-instance -- *** Index-based record conversion -- $example-indexed-instance -- *** Name-based record conversion -- $example-named-instance -- * Treating CSV data as opaque byte strings -- $generic-processing -- * Custom type conversions -- $customtypeconversions -- ** Dealing with bad data -- $baddata -- * Encoding and decoding -- $encoding HasHeader(..) , decode , decodeByName , Quoting(..) , encode , encodeByName , encodeDefaultOrderedByName , DefaultOrdered(..) -- ** Encoding and decoding options -- $options , DecodeOptions(..) , defaultDecodeOptions , decodeWith , decodeByNameWith , EncodeOptions(..) , defaultEncodeOptions , encodeWith , encodeByNameWith , encodeDefaultOrderedByNameWith -- * Core CSV types , Csv , Record , Field , Header , Name , NamedRecord -- * Type conversion -- $typeconversion -- ** Index-based record conversion -- $indexbased , FromRecord(..) , Parser , runParser , index , (.!) , unsafeIndex , ToRecord(..) , record , Only(..) -- ** Name-based record conversion -- $namebased , FromNamedRecord(..) , lookup , (.:) , ToNamedRecord(..) , namedRecord , namedField , (.=) , header -- ** Field conversion -- $fieldconversion , FromField(..) , ToField(..) ) where import Prelude hiding (lookup) import Data.Csv.Conversion import Data.Csv.Encoding import Data.Csv.Types -- $example -- -- Encoding standard Haskell types: -- -- > >>> encode [("John" :: Text, 27), ("Jane", 28)] -- > "John,27\r\nJane,28\r\n" -- -- Since string literals are overloaded we have to supply a type -- signature as the compiler couldn't deduce which string type (i.e. -- 'String' or 'Text') we want to use. In most cases type inference -- will infer the type from the context and you can omit type -- signatures. -- -- Decoding standard Haskell types: -- -- > >>> decode NoHeader "John,27\r\nJane,28\r\n" :: Either String (Vector (Text, Int)) -- > Right [("John",27),("Jane",28)] -- -- We pass 'NoHeader' as the first argument to indicate that the CSV -- input data isn't preceded by a header. -- -- In practice, the return type of 'decode' rarely needs to be given, -- as it can often be inferred from the context. -- $example-instance -- -- To encode and decode your own data types you need to defined -- instances of either 'ToRecord' and 'FromRecord' or 'ToNamedRecord' -- and 'FromNamedRecord'. The former is used for encoding/decoding -- using the column index and the latter using column name. -- -- There are two ways to to define these instances, either by manually -- defining them or by using GHC generics to derive them automatically. -- $example-indexed-instance -- -- Derived: -- -- > {-# LANGUAGE DeriveGeneric #-} -- > -- > data Person = Person { name :: !Text , salary :: !Int } -- > deriving Generic -- > -- > instance FromRecord Person -- > instance ToRecord Person -- -- Manually defined: -- -- > data Person = Person { name :: !Text , salary :: !Int } -- > -- > instance FromRecord Person where -- > parseRecord v -- > | length v == 2 = Person <$> v .! 0 <*> v .! 1 -- > | otherwise = mzero -- > instance ToRecord Person where -- > toRecord (Person name age) = record [ -- > toField name, toField age] -- -- We can now use e.g. 'encode' and 'decode' to encode and decode our -- data type. -- -- Encoding: -- -- > >>> encode [Person ("John" :: Text) 27] -- > "John,27\r\n" -- -- Decoding: -- -- > >>> decode NoHeader "John,27\r\n" :: Either String (Vector Person) -- > Right (["name","salary"],[Person {name = "John", salary = 27}]) -- -- $example-named-instance -- -- Derived: -- -- > {-# LANGUAGE DeriveGeneric #-} -- > -- > data Person = Person { name :: !Text , salary :: !Int } -- > deriving Generic -- > -- > instance FromNamedRecord Person -- > instance ToNamedRecord Person -- > instance DefaultOrdered Person -- -- Manually defined: -- -- > data Person = Person { name :: !Text , salary :: !Int } -- > -- > instance FromNamedRecord Person where -- > parseNamedRecord m = Person <$> m .: "name" <*> m .: "salary" -- > instance ToNamedRecord Person -- > toNamedRecord (Person name salary) = namedRecord [ -- > "name" .= name, "salary" .= salary] -- > instance DefaultOrdered Person -- > where headerOrder = header ["name", "salary"] -- -- We can now use e.g. 'encodeDefaultOrderedByName' (or 'encodeByName' -- with an explicit header order) and 'decodeByName' to encode and -- decode our data type. -- -- Encoding: -- -- > >>> encodeDefaultOrderedByName [Person ("John" :: Text) 27] -- > "name,salary\r\nJohn,27\r\n" -- -- Decoding: -- -- > >>> decodeByName "name,salary\r\nJohn,27\r\n" :: Either String (Header, Vector Person) -- > Right (["name","salary"],[Person {name = "John", salary = 27}]) -- -- $generic-processing -- -- Sometimes you might want to work with a CSV file which contents is -- unknown to you. For example, you might want remove the second -- column of a file without knowing anything about its content. To -- parse a CSV file to a generic representation, just convert each -- record to a @'Vector' 'ByteString'@ value, like so: -- -- > >>> decode NoHeader "John,27\r\nJane,28\r\n" :: Either String (Vector (Vector ByteString)) -- > Right [["John","27"],["Jane","28"]] -- -- As the example output above shows, all the fields are returned as -- uninterpreted 'ByteString' values. -- $customtypeconversions -- -- Most of the time the existing 'FromField' and 'ToField' instances -- do what you want. However, if you need to parse a different format -- (e.g. hex) but use a type (e.g. 'Int') for which there's already a -- 'FromField' instance, you need to use a @newtype@. Example: -- -- > newtype Hex = Hex Int -- > -- > parseHex :: ByteString -> Parser Int -- > parseHex = ... -- > -- > instance FromField Hex where -- > parseField s = Hex <$> parseHex s -- -- Other than giving an explicit type signature, you can pattern match -- on the @newtype@ constructor to indicate which type conversion you -- want to have the library use: -- -- > case decode NoHeader "0xff,0xaa\r\n0x11,0x22\r\n" of -- > Left err -> putStrLn err -- > Right v -> forM_ v $ \ (Hex val1, Hex val2) -> -- > print (val1, val2) -- -- If a field might be in one several different formats, you can use a -- newtype to normalize the result: -- -- > newtype HexOrDecimal = HexOrDecimal Int -- > -- > instance FromField DefaultToZero where -- > parseField s = case runParser (parseField s :: Parser Hex) of -- > Left err -> HexOrDecimal <$> parseField s -- Uses Int instance -- > Right n -> pure $ HexOrDecimal n -- -- You can use the unit type, @()@, to ignore a column. The -- 'parseField' method for @()@ doesn't look at the 'Field' and thus -- always decodes successfully. Note that it lacks a corresponding -- 'ToField' instance. Example: -- -- > case decode NoHeader "foo,1\r\nbar,22" of -- > Left err -> putStrLn err -- > Right v -> forM_ v $ \ ((), i) -> print (i :: Int) -- $baddata -- -- If your input might contain invalid fields, you can write a custom -- 'FromField' instance to deal with them. Example: -- -- > newtype DefaultToZero = DefaultToZero Int -- > -- > instance FromField DefaultToZero where -- > parseField s = case runParser (parseField s) of -- > Left err -> pure $ DefaultToZero 0 -- > Right n -> pure $ DefaultToZero n -- $encoding -- -- Encoding and decoding is a two step process. To encode a value, it -- is first converted to a generic representation, using either -- 'ToRecord' or 'ToNamedRecord'. The generic representation is then -- encoded as CSV data. To decode a value the process is reversed and -- either 'FromRecord' or 'FromNamedRecord' is used instead. Both -- these steps are combined in the 'encode' and 'decode' functions. -- $typeconversion -- -- There are two ways to convert CSV records to and from and -- user-defined data types: index-based conversion and name-based -- conversion. -- $indexbased -- -- Index-based conversion lets you convert CSV records to and from -- user-defined data types by referring to a field's position (its -- index) in the record. The first column in a CSV file is given index -- 0, the second index 1, and so on. -- $namebased -- -- Name-based conversion lets you convert CSV records to and from -- user-defined data types by referring to a field's name. The names -- of the fields are defined by the first line in the file, also known -- as the header. Name-based conversion is more robust to changes in -- the file structure e.g. to reording or addition of columns, but can -- be a bit slower. -- $options -- -- These functions can be used to control how data is encoded and -- decoded. For example, they can be used to encode data in a -- tab-separated format instead of in a comma-separated format. -- $fieldconversion -- -- The 'FromField' and 'ToField' classes define how to convert between -- 'Field's and values you care about (e.g. 'Int's). Most of the time -- you don't need to write your own instances as the standard ones -- cover most use cases. cassava-0.4.4.0/Data/Csv/0000755000000000000000000000000012570632764013104 5ustar0000000000000000cassava-0.4.4.0/Data/Csv/Builder.hs0000644000000000000000000000512312570632764015027 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables #-} -- | Low-level bytestring builders. Most users want to use the more -- type-safe 'Data.Csv.Incremental' module instead. module Data.Csv.Builder ( -- * Encoding single records and headers encodeHeader , encodeRecord , encodeNamedRecord , encodeDefaultOrderedNamedRecord -- ** Encoding options , encodeHeaderWith , encodeRecordWith , encodeNamedRecordWith , encodeDefaultOrderedNamedRecordWith ) where import Data.Monoid import Blaze.ByteString.Builder as Builder import Data.Csv.Conversion import qualified Data.Csv.Encoding as Encoding import Data.Csv.Encoding (EncodeOptions(..)) import Data.Csv.Types hiding (toNamedRecord) -- | Encode a header. encodeHeader :: Header -> Builder.Builder encodeHeader = encodeRecord -- | Encode a single record. encodeRecord :: ToRecord a => a -> Builder.Builder encodeRecord = encodeRecordWith Encoding.defaultEncodeOptions -- | Encode a single named record, given the field order. encodeNamedRecord :: ToNamedRecord a => Header -> a -> Builder.Builder encodeNamedRecord = encodeNamedRecordWith Encoding.defaultEncodeOptions -- | Encode a single named record, using the default field order. encodeDefaultOrderedNamedRecord :: (DefaultOrdered a, ToNamedRecord a) => a -> Builder.Builder encodeDefaultOrderedNamedRecord = encodeDefaultOrderedNamedRecordWith Encoding.defaultEncodeOptions -- | Like 'encodeHeader', but lets you customize how the CSV data is -- encoded. encodeHeaderWith :: EncodeOptions -> Header -> Builder.Builder encodeHeaderWith = encodeRecordWith -- | Like 'encodeRecord', but lets you customize how the CSV data is -- encoded. encodeRecordWith :: ToRecord a => EncodeOptions -> a -> Builder.Builder encodeRecordWith opts r = Encoding.encodeRecord (encQuoting opts) (encDelimiter opts) (toRecord r) <> Encoding.recordSep (encUseCrLf opts) -- | Like 'encodeNamedRecord', but lets you customize how the CSV data -- is encoded. encodeNamedRecordWith :: ToNamedRecord a => EncodeOptions -> Header -> a -> Builder.Builder encodeNamedRecordWith opts hdr nr = Encoding.encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (toNamedRecord nr) <> Encoding.recordSep (encUseCrLf opts) -- | Like 'encodeDefaultOrderedNamedRecord', but lets you customize -- how the CSV data is encoded. encodeDefaultOrderedNamedRecordWith :: forall a. (DefaultOrdered a, ToNamedRecord a) => EncodeOptions -> a -> Builder.Builder encodeDefaultOrderedNamedRecordWith opts = encodeNamedRecordWith opts (headerOrder (undefined :: a)) cassava-0.4.4.0/Data/Csv/Conversion.hs0000644000000000000000000012430012570632764015565 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, DefaultSignatures, FlexibleContexts, FlexibleInstances, KindSignatures, MultiParamTypeClasses, OverlappingInstances, OverloadedStrings, Rank2Types, ScopedTypeVariables, TypeOperators, UndecidableInstances #-} module Data.Csv.Conversion ( -- * Type conversion Only(..) , FromRecord(..) , FromNamedRecord(..) , ToNamedRecord(..) , DefaultOrdered(..) , FromField(..) , ToRecord(..) , ToField(..) -- * Parser , Parser , runParser -- * Accessors , index , (.!) , unsafeIndex , lookup , (.:) , namedField , (.=) , record , namedRecord , header ) where import Control.Applicative (Alternative, (<|>), empty) import Control.Monad (MonadPlus, mplus, mzero) import Data.Attoparsec.ByteString.Char8 (double) import qualified Data.Attoparsec.ByteString.Char8 as A8 import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as L #if MIN_VERSION_bytestring(0,10,4) import qualified Data.ByteString.Short as SBS #endif import Data.Hashable (Hashable) import qualified Data.HashMap.Lazy as HM import Data.Int (Int8, Int16, Int32, Int64) import qualified Data.IntMap as IM import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as LT import qualified Data.Text.Lazy.Encoding as LT import Data.Vector (Vector, (!)) import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Data.Word (Word8, Word16, Word32, Word64) import GHC.Float (double2Float) import GHC.Generics import Prelude hiding (lookup, takeWhile) import Data.Csv.Conversion.Internal import Data.Csv.Types #if !MIN_VERSION_base(4,8,0) import Control.Applicative (Applicative, (<$>), (<*>), (<*), (*>), pure) import Data.Monoid (Monoid, mappend, mempty) import Data.Traversable (traverse) import Data.Word (Word) #endif ------------------------------------------------------------------------ -- bytestring compatibility toStrict :: L.ByteString -> B.ByteString fromStrict :: B.ByteString -> L.ByteString #if MIN_VERSION_bytestring(0,10,0) toStrict = L.toStrict fromStrict = L.fromStrict #else toStrict = B.concat . L.toChunks fromStrict = L.fromChunks . (:[]) #endif {-# INLINE toStrict #-} {-# INLINE fromStrict #-} ------------------------------------------------------------------------ -- Type conversion ------------------------------------------------------------------------ -- Index-based conversion -- | A type that can be converted from a single CSV record, with the -- possibility of failure. -- -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a -- conversion fail, e.g. if a 'Record' has the wrong number of -- columns. -- -- Given this example data: -- -- > John,56 -- > Jane,55 -- -- here's an example type and instance: -- -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance FromRecord Person where -- > parseRecord v -- > | length v == 2 = Person <$> -- > v .! 0 <*> -- > v .! 1 -- > | otherwise = mzero class FromRecord a where parseRecord :: Record -> Parser a default parseRecord :: (Generic a, GFromRecord (Rep a)) => Record -> Parser a parseRecord r = to <$> gparseRecord r -- | Haskell lacks a single-element tuple type, so if you CSV data -- with just one column you can use the 'Only' type to represent a -- single-column result. newtype Only a = Only { fromOnly :: a } deriving (Eq, Ord, Read, Show) -- | A type that can be converted to a single CSV record. -- -- An example type and instance: -- -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance ToRecord Person where -- > toRecord (Person name age) = record [ -- > toField name, toField age] -- -- Outputs data on this form: -- -- > John,56 -- > Jane,55 class ToRecord a where -- | Convert a value to a record. toRecord :: a -> Record default toRecord :: (Generic a, GToRecord (Rep a) Field) => a -> Record toRecord = V.fromList . gtoRecord . from instance FromField a => FromRecord (Only a) where parseRecord v | n == 1 = Only <$> unsafeIndex v 0 | otherwise = lengthMismatch 1 v where n = V.length v -- TODO: Check if we want all toRecord conversions to be stricter. instance ToField a => ToRecord (Only a) where toRecord = V.singleton . toField . fromOnly instance (FromField a, FromField b) => FromRecord (a, b) where parseRecord v | n == 2 = (,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 | otherwise = lengthMismatch 2 v where n = V.length v instance (ToField a, ToField b) => ToRecord (a, b) where toRecord (a, b) = V.fromList [toField a, toField b] instance (FromField a, FromField b, FromField c) => FromRecord (a, b, c) where parseRecord v | n == 3 = (,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 | otherwise = lengthMismatch 3 v where n = V.length v instance (ToField a, ToField b, ToField c) => ToRecord (a, b, c) where toRecord (a, b, c) = V.fromList [toField a, toField b, toField c] instance (FromField a, FromField b, FromField c, FromField d) => FromRecord (a, b, c, d) where parseRecord v | n == 4 = (,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 | otherwise = lengthMismatch 4 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d) => ToRecord (a, b, c, d) where toRecord (a, b, c, d) = V.fromList [ toField a, toField b, toField c, toField d] instance (FromField a, FromField b, FromField c, FromField d, FromField e) => FromRecord (a, b, c, d, e) where parseRecord v | n == 5 = (,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 | otherwise = lengthMismatch 5 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e) => ToRecord (a, b, c, d, e) where toRecord (a, b, c, d, e) = V.fromList [ toField a, toField b, toField c, toField d, toField e] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f) => FromRecord (a, b, c, d, e, f) where parseRecord v | n == 6 = (,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 | otherwise = lengthMismatch 6 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f) => ToRecord (a, b, c, d, e, f) where toRecord (a, b, c, d, e, f) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g) => FromRecord (a, b, c, d, e, f, g) where parseRecord v | n == 7 = (,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 | otherwise = lengthMismatch 7 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g) => ToRecord (a, b, c, d, e, f, g) where toRecord (a, b, c, d, e, f, g) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h) => FromRecord (a, b, c, d, e, f, g, h) where parseRecord v | n == 8 = (,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 | otherwise = lengthMismatch 8 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h) => ToRecord (a, b, c, d, e, f, g, h) where toRecord (a, b, c, d, e, f, g, h) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i) => FromRecord (a, b, c, d, e, f, g, h, i) where parseRecord v | n == 9 = (,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 | otherwise = lengthMismatch 9 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i) => ToRecord (a, b, c, d, e, f, g, h, i) where toRecord (a, b, c, d, e, f, g, h, i) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i, FromField j) => FromRecord (a, b, c, d, e, f, g, h, i, j) where parseRecord v | n == 10 = (,,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 <*> unsafeIndex v 9 | otherwise = lengthMismatch 10 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i, ToField j) => ToRecord (a, b, c, d, e, f, g, h, i, j) where toRecord (a, b, c, d, e, f, g, h, i, j) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i, toField j] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i, FromField j, FromField k) => FromRecord (a, b, c, d, e, f, g, h, i, j, k) where parseRecord v | n == 11 = (,,,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 <*> unsafeIndex v 9 <*> unsafeIndex v 10 | otherwise = lengthMismatch 11 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i, ToField j, ToField k) => ToRecord (a, b, c, d, e, f, g, h, i, j, k) where toRecord (a, b, c, d, e, f, g, h, i, j, k) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i, toField j, toField k] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i, FromField j, FromField k, FromField l) => FromRecord (a, b, c, d, e, f, g, h, i, j, k, l) where parseRecord v | n == 12 = (,,,,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 <*> unsafeIndex v 9 <*> unsafeIndex v 10 <*> unsafeIndex v 11 | otherwise = lengthMismatch 12 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i, ToField j, ToField k, ToField l) => ToRecord (a, b, c, d, e, f, g, h, i, j, k, l) where toRecord (a, b, c, d, e, f, g, h, i, j, k, l) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i, toField j, toField k, toField l] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i, FromField j, FromField k, FromField l, FromField m) => FromRecord (a, b, c, d, e, f, g, h, i, j, k, l, m) where parseRecord v | n == 13 = (,,,,,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 <*> unsafeIndex v 9 <*> unsafeIndex v 10 <*> unsafeIndex v 11 <*> unsafeIndex v 12 | otherwise = lengthMismatch 13 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i, ToField j, ToField k, ToField l, ToField m) => ToRecord (a, b, c, d, e, f, g, h, i, j, k, l, m) where toRecord (a, b, c, d, e, f, g, h, i, j, k, l, m) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i, toField j, toField k, toField l, toField m] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i, FromField j, FromField k, FromField l, FromField m, FromField n) => FromRecord (a, b, c, d, e, f, g, h, i, j, k, l, m, n) where parseRecord v | n == 14 = (,,,,,,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 <*> unsafeIndex v 9 <*> unsafeIndex v 10 <*> unsafeIndex v 11 <*> unsafeIndex v 12 <*> unsafeIndex v 13 | otherwise = lengthMismatch 14 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i, ToField j, ToField k, ToField l, ToField m, ToField n) => ToRecord (a, b, c, d, e, f, g, h, i, j, k, l, m, n) where toRecord (a, b, c, d, e, f, g, h, i, j, k, l, m, n) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i, toField j, toField k, toField l, toField m, toField n] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g, FromField h, FromField i, FromField j, FromField k, FromField l, FromField m, FromField n, FromField o) => FromRecord (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) where parseRecord v | n == 15 = (,,,,,,,,,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 <*> unsafeIndex v 7 <*> unsafeIndex v 8 <*> unsafeIndex v 9 <*> unsafeIndex v 10 <*> unsafeIndex v 11 <*> unsafeIndex v 12 <*> unsafeIndex v 13 <*> unsafeIndex v 14 | otherwise = lengthMismatch 15 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g, ToField h, ToField i, ToField j, ToField k, ToField l, ToField m, ToField n, ToField o) => ToRecord (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) where toRecord (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g, toField h, toField i, toField j, toField k, toField l, toField m, toField n, toField o] lengthMismatch :: Int -> Record -> Parser a lengthMismatch expected v = fail $ "cannot unpack array of length " ++ show n ++ " into a " ++ desired ++ ". Input record: " ++ show v where n = V.length v desired | expected == 1 = "Only" | expected == 2 = "pair" | otherwise = show expected ++ "-tuple" instance FromField a => FromRecord [a] where parseRecord = traverse parseField . V.toList instance ToField a => ToRecord [a] where toRecord = V.fromList . map toField instance FromField a => FromRecord (V.Vector a) where parseRecord = traverse parseField instance ToField a => ToRecord (Vector a) where toRecord = V.map toField instance (FromField a, U.Unbox a) => FromRecord (U.Vector a) where parseRecord = fmap U.convert . traverse parseField instance (ToField a, U.Unbox a) => ToRecord (U.Vector a) where toRecord = V.map toField . U.convert ------------------------------------------------------------------------ -- Name-based conversion -- | A type that can be converted from a single CSV record, with the -- possibility of failure. -- -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a -- conversion fail, e.g. if a 'Record' has the wrong number of -- columns. -- -- Given this example data: -- -- > name,age -- > John,56 -- > Jane,55 -- -- here's an example type and instance: -- -- > {-# LANGUAGE OverloadedStrings #-} -- > -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance FromNamedRecord Person where -- > parseNamedRecord m = Person <$> -- > m .: "name" <*> -- > m .: "age" -- -- Note the use of the @OverloadedStrings@ language extension which -- enables 'B8.ByteString' values to be written as string literals. class FromNamedRecord a where parseNamedRecord :: NamedRecord -> Parser a default parseNamedRecord :: (Generic a, GFromNamedRecord (Rep a)) => NamedRecord -> Parser a parseNamedRecord r = to <$> gparseNamedRecord r -- | A type that can be converted to a single CSV record. -- -- An example type and instance: -- -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance ToNamedRecord Person where -- > toNamedRecord (Person name age) = namedRecord [ -- > "name" .= name, "age" .= age] class ToNamedRecord a where -- | Convert a value to a named record. toNamedRecord :: a -> NamedRecord default toNamedRecord :: (Generic a, GToRecord (Rep a) (B.ByteString, B.ByteString)) => a -> NamedRecord toNamedRecord = namedRecord . gtoRecord . from -- | A type that has a default field order when converted to CSV. This -- class lets you specify how to get the headers to use for a record -- type that's an instance of 'ToNamedRecord'. -- -- To derive an instance, the type is required to only have one -- constructor and that constructor must have named fields (also known -- as selectors) for all fields. -- -- Right: @data Foo = Foo { foo :: !Int }@ -- -- Wrong: @data Bar = Bar Int@ -- -- If you try to derive an instance using GHC generics and your type -- doesn't have named fields, you will get an error along the lines -- of: -- -- > :9:10: -- > No instance for (DefaultOrdered (M1 S NoSelector (K1 R Char) ())) -- > arising from a use of ‘Data.Csv.Conversion.$gdmheader’ -- > In the expression: Data.Csv.Conversion.$gdmheader -- > In an equation for ‘header’: -- > header = Data.Csv.Conversion.$gdmheader -- > In the instance declaration for ‘DefaultOrdered Foo’ -- class DefaultOrdered a where -- | The header order for this record. Should include the names -- used in the 'NamedRecord' returned by 'toNamedRecord'. Pass -- 'undefined' as the argument, together with a type annotation -- e.g. @'headerOrder' ('undefined' :: MyRecord)@. headerOrder :: a -> Header -- TODO: Add Generic implementation default headerOrder :: (Generic a, GToNamedRecordHeader (Rep a)) => a -> Header headerOrder = V.fromList. gtoNamedRecordHeader . from instance (FromField a, FromField b, Ord a) => FromNamedRecord (M.Map a b) where parseNamedRecord m = M.fromList <$> (traverse parseBoth $ HM.toList m) instance (ToField a, ToField b, Ord a) => ToNamedRecord (M.Map a b) where toNamedRecord = HM.fromList . map (\ (k, v) -> (toField k, toField v)) . M.toList instance (Eq a, FromField a, FromField b, Hashable a) => FromNamedRecord (HM.HashMap a b) where parseNamedRecord m = HM.fromList <$> (traverse parseBoth $ HM.toList m) instance (Eq a, ToField a, ToField b, Hashable a) => ToNamedRecord (HM.HashMap a b) where toNamedRecord = HM.fromList . map (\ (k, v) -> (toField k, toField v)) . HM.toList parseBoth :: (FromField a, FromField b) => (Field, Field) -> Parser (a, b) parseBoth (k, v) = (,) <$> parseField k <*> parseField v ------------------------------------------------------------------------ -- Individual field conversion -- | A type that can be converted from a single CSV field, with the -- possibility of failure. -- -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a -- conversion fail, e.g. if a 'Field' can't be converted to the given -- type. -- -- Example type and instance: -- -- > {-# LANGUAGE OverloadedStrings #-} -- > -- > data Color = Red | Green | Blue -- > -- > instance FromField Color where -- > parseField s -- > | s == "R" = pure Red -- > | s == "G" = pure Green -- > | s == "B" = pure Blue -- > | otherwise = mzero class FromField a where parseField :: Field -> Parser a -- | A type that can be converted to a single CSV field. -- -- Example type and instance: -- -- > {-# LANGUAGE OverloadedStrings #-} -- > -- > data Color = Red | Green | Blue -- > -- > instance ToField Color where -- > toField Red = "R" -- > toField Green = "G" -- > toField Blue = "B" class ToField a where toField :: a -> Field -- | 'Nothing' if the 'Field' is 'B.empty', 'Just' otherwise. instance FromField a => FromField (Maybe a) where parseField s | B.null s = pure Nothing | otherwise = Just <$> parseField s {-# INLINE parseField #-} -- | 'Nothing' is encoded as an 'B.empty' field. instance ToField a => ToField (Maybe a) where toField = maybe B.empty toField {-# INLINE toField #-} -- | @'Left' field@ if conversion failed, 'Right' otherwise. instance FromField a => FromField (Either Field a) where parseField s = case runParser (parseField s) of Left _ -> pure $ Left s Right a -> pure $ Right a {-# INLINE parseField #-} -- | Ignores the 'Field'. Always succeeds. instance FromField () where parseField _ = pure () {-# INLINE parseField #-} -- | Assumes UTF-8 encoding. instance FromField Char where parseField s = case T.decodeUtf8' s of Left e -> fail $ show e Right t | T.compareLength t 1 == EQ -> pure (T.head t) | otherwise -> typeError "Char" s Nothing {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField Char where toField = toField . T.encodeUtf8 . T.singleton {-# INLINE toField #-} -- | Accepts same syntax as 'rational'. Ignores whitespace. instance FromField Double where parseField = parseDouble {-# INLINE parseField #-} -- | Uses decimal notation or scientific notation, depending on the -- number. instance ToField Double where toField = realFloat {-# INLINE toField #-} -- | Accepts same syntax as 'rational'. Ignores whitespace. instance FromField Float where parseField s = double2Float <$> parseDouble s {-# INLINE parseField #-} -- | Uses decimal notation or scientific notation, depending on the -- number. instance ToField Float where toField = realFloat {-# INLINE toField #-} parseDouble :: B.ByteString -> Parser Double parseDouble s = case parseOnly (ws *> double <* ws) s of Left err -> typeError "Double" s (Just err) Right n -> pure n {-# INLINE parseDouble #-} -- | Accepts a signed decimal number. Ignores whitespace. instance FromField Int where parseField = parseSigned "Int" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. Ignores whitespace. instance FromField Integer where parseField = parseSigned "Integer" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Integer where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. Ignores whitespace. instance FromField Int8 where parseField = parseSigned "Int8" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int8 where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. Ignores whitespace. instance FromField Int16 where parseField = parseSigned "Int16" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int16 where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. Ignores whitespace. instance FromField Int32 where parseField = parseSigned "Int32" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int32 where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. Ignores whitespace. instance FromField Int64 where parseField = parseSigned "Int64" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int64 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. Ignores whitespace. instance FromField Word where parseField = parseUnsigned "Word" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. Ignores whitespace. instance FromField Word8 where parseField = parseUnsigned "Word8" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word8 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. Ignores whitespace. instance FromField Word16 where parseField = parseUnsigned "Word16" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word16 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. Ignores whitespace. instance FromField Word32 where parseField = parseUnsigned "Word32" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word32 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. Ignores whitespace. instance FromField Word64 where parseField = parseUnsigned "Word64" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word64 where toField = decimal {-# INLINE toField #-} instance FromField B.ByteString where parseField = pure {-# INLINE parseField #-} instance ToField B.ByteString where toField = id {-# INLINE toField #-} instance FromField L.ByteString where parseField = pure . fromStrict {-# INLINE parseField #-} instance ToField L.ByteString where toField = toStrict {-# INLINE toField #-} #if MIN_VERSION_bytestring(0,10,4) instance FromField SBS.ShortByteString where parseField = pure . SBS.toShort {-# INLINE parseField #-} instance ToField SBS.ShortByteString where toField = SBS.fromShort {-# INLINE toField #-} #endif -- | Assumes UTF-8 encoding. Fails on invalid byte sequences. instance FromField T.Text where parseField = either (fail . show) pure . T.decodeUtf8' {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField T.Text where toField = toField . T.encodeUtf8 {-# INLINE toField #-} -- | Assumes UTF-8 encoding. Fails on invalid byte sequences. instance FromField LT.Text where parseField = either (fail . show) (pure . LT.fromStrict) . T.decodeUtf8' {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField LT.Text where toField = toField . toStrict . LT.encodeUtf8 {-# INLINE toField #-} -- | Assumes UTF-8 encoding. Fails on invalid byte sequences. instance FromField [Char] where parseField = fmap T.unpack . parseField {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField [Char] where toField = toField . T.pack {-# INLINE toField #-} parseSigned :: (Integral a, Num a) => String -> B.ByteString -> Parser a parseSigned typ s = case parseOnly (ws *> A8.signed A8.decimal <* ws) s of Left err -> typeError typ s (Just err) Right n -> pure n {-# INLINE parseSigned #-} parseUnsigned :: Integral a => String -> B.ByteString -> Parser a parseUnsigned typ s = case parseOnly (ws *> A8.decimal <* ws) s of Left err -> typeError typ s (Just err) Right n -> pure n {-# INLINE parseUnsigned #-} ws :: A8.Parser () ws = A8.skipWhile (\c -> c == ' ' || c == '\t') ------------------------------------------------------------------------ -- Custom version of attoparsec @parseOnly@ function which fails if -- there is leftover content after parsing a field. parseOnly :: A8.Parser a -> B.ByteString -> Either String a parseOnly parser input = go (A8.parse parser input) where go (A8.Fail _ _ err) = Left err go (A8.Partial f) = go2 (f B.empty) go (A8.Done leftover result) | B.null leftover = Right result | otherwise = Left ("incomplete field parse, leftover: " ++ show (B.unpack leftover)) go2 (A8.Fail _ _ err) = Left err go2 (A8.Partial _) = error "parseOnly: impossible error!" go2 (A8.Done leftover result) | B.null leftover = Right result | otherwise = Left ("incomplete field parse, leftover: " ++ show (B.unpack leftover)) {-# INLINE parseOnly #-} typeError :: String -> B.ByteString -> Maybe String -> Parser a typeError typ s mmsg = fail $ "expected " ++ typ ++ ", got " ++ show (B8.unpack s) ++ cause where cause = case mmsg of Just msg -> " (" ++ msg ++ ")" Nothing -> "" ------------------------------------------------------------------------ -- Constructors and accessors -- | Retrieve the /n/th field in the given record. The result is -- 'empty' if the value cannot be converted to the desired type. -- Raises an exception if the index is out of bounds. -- -- 'index' is a simple convenience function that is equivalent to -- @'parseField' (v '!' idx)@. If you're certain that the index is not -- out of bounds, using 'unsafeIndex' is somewhat faster. index :: FromField a => Record -> Int -> Parser a index v idx = parseField (v ! idx) {-# INLINE index #-} -- | Alias for 'index'. (.!) :: FromField a => Record -> Int -> Parser a (.!) = index {-# INLINE (.!) #-} infixl 9 .! -- | Like 'index' but without bounds checking. unsafeIndex :: FromField a => Record -> Int -> Parser a unsafeIndex v idx = parseField (V.unsafeIndex v idx) {-# INLINE unsafeIndex #-} -- | Retrieve a field in the given record by name. The result is -- 'empty' if the field is missing or if the value cannot be converted -- to the desired type. lookup :: FromField a => NamedRecord -> B.ByteString -> Parser a lookup m name = maybe (fail err) parseField $ HM.lookup name m where err = "no field named " ++ show (B8.unpack name) {-# INLINE lookup #-} -- | Alias for 'lookup'. (.:) :: FromField a => NamedRecord -> B.ByteString -> Parser a (.:) = lookup {-# INLINE (.:) #-} -- | Construct a pair from a name and a value. For use with -- 'namedRecord'. namedField :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString) namedField name val = (name, toField val) {-# INLINE namedField #-} -- | Alias for 'namedField'. (.=) :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString) (.=) = namedField {-# INLINE (.=) #-} -- | Construct a record from a list of 'B.ByteString's. Use 'toField' -- to convert values to 'B.ByteString's for use with 'record'. record :: [B.ByteString] -> Record record = V.fromList -- | Construct a named record from a list of name-value 'B.ByteString' -- pairs. Use '.=' to construct such a pair from a name and a value. namedRecord :: [(B.ByteString, B.ByteString)] -> NamedRecord namedRecord = HM.fromList -- | Construct a header from a list of 'B.ByteString's. header :: [B.ByteString] -> Header header = V.fromList ------------------------------------------------------------------------ -- Parser for converting records to data types -- | Failure continuation. type Failure f r = String -> f r -- | Success continuation. type Success a f r = a -> f r -- | Conversion of a field to a value might fail e.g. if the field is -- malformed. This possibility is captured by the 'Parser' type, which -- lets you compose several field conversions together in such a way -- that if any of them fail, the whole record conversion fails. newtype Parser a = Parser { unParser :: forall f r. Failure f r -> Success a f r -> f r } instance Monad Parser where m >>= g = Parser $ \kf ks -> let ks' a = unParser (g a) kf ks in unParser m kf ks' {-# INLINE (>>=) #-} return a = Parser $ \_kf ks -> ks a {-# INLINE return #-} fail msg = Parser $ \kf _ks -> kf msg {-# INLINE fail #-} instance Functor Parser where fmap f m = Parser $ \kf ks -> let ks' a = ks (f a) in unParser m kf ks' {-# INLINE fmap #-} instance Applicative Parser where pure = return {-# INLINE pure #-} (<*>) = apP {-# INLINE (<*>) #-} instance Alternative Parser where empty = fail "empty" {-# INLINE empty #-} (<|>) = mplus {-# INLINE (<|>) #-} instance MonadPlus Parser where mzero = fail "mzero" {-# INLINE mzero #-} mplus a b = Parser $ \kf ks -> let kf' _ = unParser b kf ks in unParser a kf' ks {-# INLINE mplus #-} instance Monoid (Parser a) where mempty = fail "mempty" {-# INLINE mempty #-} mappend = mplus {-# INLINE mappend #-} apP :: Parser (a -> b) -> Parser a -> Parser b apP d e = do b <- d a <- e return (b a) {-# INLINE apP #-} -- | Run a 'Parser', returning either @'Left' errMsg@ or @'Right' -- result@. Forces the value in the 'Left' or 'Right' constructors to -- weak head normal form. -- -- You most likely won't need to use this function directly, but it's -- included for completeness. runParser :: Parser a -> Either String a runParser p = unParser p left right where left !errMsg = Left errMsg right !x = Right x {-# INLINE runParser #-} ------------------------------------------------------------------------ -- Generics class GFromRecord f where gparseRecord :: Record -> Parser (f p) instance GFromRecordSum f Record => GFromRecord (M1 i n f) where gparseRecord v = case (IM.lookup n gparseRecordSum) of Nothing -> lengthMismatch n v Just p -> M1 <$> p v where n = V.length v class GFromNamedRecord f where gparseNamedRecord :: NamedRecord -> Parser (f p) instance GFromRecordSum f NamedRecord => GFromNamedRecord (M1 i n f) where gparseNamedRecord v = foldr (\f p -> p <|> M1 <$> f v) empty (IM.elems gparseRecordSum) class GFromRecordSum f r where gparseRecordSum :: IM.IntMap (r -> Parser (f p)) instance (GFromRecordSum a r, GFromRecordSum b r) => GFromRecordSum (a :+: b) r where gparseRecordSum = IM.unionWith (\a b r -> a r <|> b r) (fmap (L1 <$>) <$> gparseRecordSum) (fmap (R1 <$>) <$> gparseRecordSum) instance GFromRecordProd f r => GFromRecordSum (M1 i n f) r where gparseRecordSum = IM.singleton n (fmap (M1 <$>) f) where (n, f) = gparseRecordProd 0 class GFromRecordProd f r where gparseRecordProd :: Int -> (Int, r -> Parser (f p)) instance GFromRecordProd U1 r where gparseRecordProd n = (n, const (pure U1)) instance (GFromRecordProd a r, GFromRecordProd b r) => GFromRecordProd (a :*: b) r where gparseRecordProd n0 = (n2, f) where f r = (:*:) <$> fa r <*> fb r (n1, fa) = gparseRecordProd n0 (n2, fb) = gparseRecordProd n1 instance GFromRecordProd f Record => GFromRecordProd (M1 i n f) Record where gparseRecordProd n = fmap (M1 <$>) <$> gparseRecordProd n instance FromField a => GFromRecordProd (K1 i a) Record where gparseRecordProd n = (n + 1, \v -> K1 <$> parseField (V.unsafeIndex v n)) data Proxy s (f :: * -> *) a = Proxy instance (FromField a, Selector s) => GFromRecordProd (M1 S s (K1 i a)) NamedRecord where gparseRecordProd n = (n + 1, \v -> (M1 . K1) <$> v .: name) where name = T.encodeUtf8 (T.pack (selName (Proxy :: Proxy s f a))) class GToRecord a f where gtoRecord :: a p -> [f] instance GToRecord U1 f where gtoRecord U1 = [] instance (GToRecord a f, GToRecord b f) => GToRecord (a :*: b) f where gtoRecord (a :*: b) = gtoRecord a ++ gtoRecord b instance (GToRecord a f, GToRecord b f) => GToRecord (a :+: b) f where gtoRecord (L1 a) = gtoRecord a gtoRecord (R1 b) = gtoRecord b instance GToRecord a f => GToRecord (M1 D c a) f where gtoRecord (M1 a) = gtoRecord a instance GToRecord a f => GToRecord (M1 C c a) f where gtoRecord (M1 a) = gtoRecord a instance GToRecord a Field => GToRecord (M1 S c a) Field where gtoRecord (M1 a) = gtoRecord a instance ToField a => GToRecord (K1 i a) Field where gtoRecord (K1 a) = [toField a] instance (ToField a, Selector s) => GToRecord (M1 S s (K1 i a)) (B.ByteString, B.ByteString) where gtoRecord m@(M1 (K1 a)) = [T.encodeUtf8 (T.pack (selName m)) .= toField a] -- We statically fail on sum types and product types without selectors -- (field names). class GToNamedRecordHeader a where gtoNamedRecordHeader :: a p -> [Name] instance GToNamedRecordHeader U1 where gtoNamedRecordHeader _ = [] instance (GToNamedRecordHeader a, GToNamedRecordHeader b) => GToNamedRecordHeader (a :*: b) where gtoNamedRecordHeader _ = gtoNamedRecordHeader (undefined :: a p) ++ gtoNamedRecordHeader (undefined :: b p) instance GToNamedRecordHeader a => GToNamedRecordHeader (M1 D c a) where gtoNamedRecordHeader _ = gtoNamedRecordHeader (undefined :: a p) instance GToNamedRecordHeader a => GToNamedRecordHeader (M1 C c a) where gtoNamedRecordHeader _ = gtoNamedRecordHeader (undefined :: a p) -- | Instance to ensure that you cannot derive DefaultOrdered for -- constructors without selectors. instance DefaultOrdered (M1 S NoSelector a ()) => GToNamedRecordHeader (M1 S NoSelector a) where gtoNamedRecordHeader _ = error "You cannot derive DefaultOrdered for constructors without selectors." instance Selector s => GToNamedRecordHeader (M1 S s a) where gtoNamedRecordHeader m | null name = error "Cannot derive DefaultOrdered for constructors without selectors" | otherwise = [B8.pack (selName m)] where name = selName m cassava-0.4.4.0/Data/Csv/Encoding.hs0000644000000000000000000003425312570632764015175 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, OverloadedStrings, ScopedTypeVariables #-} -- Module: Data.Csv.Encoding -- Copyright: (c) 2011 MailRank, Inc. -- (c) 2012 Johan Tibell -- License: BSD3 -- Maintainer: Johan Tibell -- Stability: experimental -- Portability: portable -- -- Encoding and decoding of data types into CSV. module Data.Csv.Encoding ( -- * Encoding and decoding HasHeader(..) , decode , decodeByName , Quoting(..) , encode , encodeByName , encodeDefaultOrderedByName -- ** Encoding and decoding options , DecodeOptions(..) , defaultDecodeOptions , decodeWith , decodeByNameWith , EncodeOptions(..) , defaultEncodeOptions , encodeWith , encodeByNameWith , encodeDefaultOrderedByNameWith -- ** Encoding and decoding single records , encodeRecord , encodeNamedRecord , recordSep ) where import Blaze.ByteString.Builder (Builder, fromByteString, fromWord8, toLazyByteString, toByteString) import Blaze.ByteString.Builder.Char8 (fromString) import Control.Applicative ((<|>), optional) import Data.Attoparsec.ByteString.Char8 (endOfInput) import qualified Data.Attoparsec.ByteString.Lazy as AL import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.HashMap.Strict as HM import Data.Vector (Vector) import qualified Data.Vector as V import Data.Word (Word8) import Prelude hiding (unlines) import Data.Csv.Compat.Monoid ((<>)) import qualified Data.Csv.Conversion as Conversion import Data.Csv.Conversion (FromNamedRecord, FromRecord, ToNamedRecord, ToRecord, parseNamedRecord, parseRecord, runParser, toNamedRecord, toRecord) import Data.Csv.Parser hiding (csv, csvWithHeader) import qualified Data.Csv.Parser as Parser import Data.Csv.Types hiding (toNamedRecord) import qualified Data.Csv.Types as Types import Data.Csv.Util (blankLine, endOfLine) #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((*>), pure) import Data.Monoid (mconcat, mempty) #endif -- TODO: 'encode' isn't as efficient as it could be. ------------------------------------------------------------------------ -- * Encoding and decoding -- | Efficiently deserialize CSV records from a lazy 'L.ByteString'. -- If this fails due to incomplete or invalid input, @'Left' msg@ is -- returned. Equivalent to @'decodeWith' 'defaultDecodeOptions'@. decode :: FromRecord a => HasHeader -- ^ Data contains header that should be -- skipped -> L.ByteString -- ^ CSV data -> Either String (Vector a) decode = decodeWith defaultDecodeOptions {-# INLINE decode #-} -- | Efficiently deserialize CSV records from a lazy 'L.ByteString'. -- If this fails due to incomplete or invalid input, @'Left' msg@ is -- returned. The data is assumed to be preceeded by a header. -- Equivalent to @'decodeByNameWith' 'defaultDecodeOptions'@. decodeByName :: FromNamedRecord a => L.ByteString -- ^ CSV data -> Either String (Header, Vector a) decodeByName = decodeByNameWith defaultDecodeOptions {-# INLINE decodeByName #-} -- | Efficiently serialize CSV records as a lazy 'L.ByteString'. encode :: ToRecord a => [a] -> L.ByteString encode = encodeWith defaultEncodeOptions {-# INLINE encode #-} -- | Efficiently serialize CSV records as a lazy 'L.ByteString'. The -- header is written before any records and dictates the field order. encodeByName :: ToNamedRecord a => Header -> [a] -> L.ByteString encodeByName = encodeByNameWith defaultEncodeOptions {-# INLINE encodeByName #-} -- | Like 'encodeByName', but header and field order is dictated by -- the 'Conversion.header' method. encodeDefaultOrderedByName :: (Conversion.DefaultOrdered a, ToNamedRecord a) => [a] -> L.ByteString encodeDefaultOrderedByName = encodeDefaultOrderedByNameWith defaultEncodeOptions {-# INLINE encodeDefaultOrderedByName #-} ------------------------------------------------------------------------ -- ** Encoding and decoding options -- | Like 'decode', but lets you customize how the CSV data is parsed. decodeWith :: FromRecord a => DecodeOptions -- ^ Decoding options -> HasHeader -- ^ Data contains header that should be -- skipped -> L.ByteString -- ^ CSV data -> Either String (Vector a) decodeWith = decodeWithC csv {-# INLINE [1] decodeWith #-} {-# RULES "idDecodeWith" decodeWith = idDecodeWith #-} -- | Same as 'decodeWith', but more efficient as no type -- conversion is performed. idDecodeWith :: DecodeOptions -> HasHeader -> L.ByteString -> Either String (Vector (Vector B.ByteString)) idDecodeWith = decodeWithC Parser.csv -- | Decode CSV data using the provided parser, skipping a leading -- header if 'hasHeader' is 'HasHeader'. Returns 'Left' @errMsg@ on -- failure. decodeWithC :: (DecodeOptions -> AL.Parser a) -> DecodeOptions -> HasHeader -> BL8.ByteString -> Either String a decodeWithC p !opts hasHeader = decodeWithP parser where parser = case hasHeader of HasHeader -> header (decDelimiter opts) *> p opts NoHeader -> p opts {-# INLINE decodeWithC #-} -- | Like 'decodeByName', but lets you customize how the CSV data is -- parsed. decodeByNameWith :: FromNamedRecord a => DecodeOptions -- ^ Decoding options -> L.ByteString -- ^ CSV data -> Either String (Header, Vector a) decodeByNameWith !opts = decodeWithP (csvWithHeader opts) -- | Should quoting be applied to fields, and at which level data Quoting = QuoteNone -- ^ No quotes | QuoteMinimal -- ^ Quotes according to RFC 4180 | QuoteAll -- ^ Always quote deriving (Eq, Show) -- | Options that controls how data is encoded. These options can be -- used to e.g. encode data in a tab-separated format instead of in a -- comma-separated format. -- -- To avoid having your program stop compiling when new fields are -- added to 'EncodeOptions', create option records by overriding -- values in 'defaultEncodeOptions'. Example: -- -- > myOptions = defaultEncodeOptions { -- > encDelimiter = fromIntegral (ord '\t') -- > } -- -- /N.B./ The 'encDelimiter' must /not/ be the quote character (i.e. -- @\"@) or one of the record separator characters (i.e. @\\n@ or -- @\\r@). data EncodeOptions = EncodeOptions { -- | Field delimiter. encDelimiter :: {-# UNPACK #-} !Word8 -- | Record separator selection. @True@ for CRLF (@\\r\\n@) and -- @False@ for LF (@\\n@). , encUseCrLf :: !Bool -- | Include a header row when encoding @ToNamedRecord@ -- instances. , encIncludeHeader :: !Bool -- | What kind of quoting should be applied to text fields. , encQuoting :: !Quoting } deriving (Eq, Show) -- | Encoding options for CSV files. defaultEncodeOptions :: EncodeOptions defaultEncodeOptions = EncodeOptions { encDelimiter = 44 -- comma , encUseCrLf = True , encIncludeHeader = True , encQuoting = QuoteMinimal } -- | Like 'encode', but lets you customize how the CSV data is -- encoded. encodeWith :: ToRecord a => EncodeOptions -> [a] -> L.ByteString encodeWith opts | validDelim (encDelimiter opts) = toLazyByteString . unlines (recordSep (encUseCrLf opts)) . map (encodeRecord (encQuoting opts) (encDelimiter opts) . toRecord) | otherwise = encodeOptionsError {-# INLINE encodeWith #-} -- | Check if the delimiter is valid. validDelim :: Word8 -> Bool validDelim delim = delim `notElem` [cr, nl, dquote] where nl = 10 cr = 13 dquote = 34 -- | Raises an exception indicating that the provided delimiter isn't -- valid. See 'validDelim'. -- -- Keep this message consistent with the documentation of -- 'EncodeOptions'. encodeOptionsError :: a encodeOptionsError = error $ "Data.Csv: " ++ "The 'encDelimiter' must /not/ be the quote character (i.e. " ++ "\") or one of the record separator characters (i.e. \\n or " ++ "\\r)" -- | Encode a single record, without the trailing record separator -- (i.e. newline). encodeRecord :: Quoting -> Word8 -> Record -> Builder encodeRecord qtng delim = mconcat . intersperse (fromWord8 delim) . map fromByteString . map (escape qtng delim) . V.toList {-# INLINE encodeRecord #-} -- | Encode a single named record, without the trailing record -- separator (i.e. newline), using the given field order. encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder encodeNamedRecord hdr qtng delim = encodeRecord qtng delim . namedRecordToRecord hdr -- TODO: Optimize escape :: Quoting -> Word8 -> B.ByteString -> B.ByteString escape !qtng !delim !s | (qtng == QuoteMinimal && B.any (\ b -> b == dquote || b == delim || b == nl || b == cr || b == sp) s ) || qtng == QuoteAll = toByteString $ fromWord8 dquote <> B.foldl (\ acc b -> acc <> if b == dquote then fromByteString "\"\"" else fromWord8 b) mempty s <> fromWord8 dquote | otherwise = s where dquote = 34 nl = 10 cr = 13 sp = 32 -- | Like 'encodeByName', but lets you customize how the CSV data is -- encoded. encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> [a] -> L.ByteString encodeByNameWith opts hdr v | validDelim (encDelimiter opts) = toLazyByteString (rows (encIncludeHeader opts)) | otherwise = encodeOptionsError where rows False = records rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <> recordSep (encUseCrLf opts) <> records records = unlines (recordSep (encUseCrLf opts)) . map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) . toNamedRecord) $ v {-# INLINE encodeByNameWith #-} -- | Like 'encodeDefaultOrderedByNameWith', but lets you customize how -- the CSV data is encoded. encodeDefaultOrderedByNameWith :: forall a. (Conversion.DefaultOrdered a, ToNamedRecord a) => EncodeOptions -> [a] -> L.ByteString encodeDefaultOrderedByNameWith opts v | validDelim (encDelimiter opts) = toLazyByteString (rows (encIncludeHeader opts)) | otherwise = encodeOptionsError where hdr = (Conversion.headerOrder (undefined :: a)) rows False = records rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <> recordSep (encUseCrLf opts) <> records records = unlines (recordSep (encUseCrLf opts)) . map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) . toNamedRecord) $ v {-# INLINE encodeDefaultOrderedByNameWith #-} namedRecordToRecord :: Header -> NamedRecord -> Record namedRecordToRecord hdr nr = V.map find hdr where find n = case HM.lookup n nr of Nothing -> moduleError "namedRecordToRecord" $ "header contains name " ++ show (B8.unpack n) ++ " which is not present in the named record" Just v -> v moduleError :: String -> String -> a moduleError func msg = error $ "Data.Csv.Encoding." ++ func ++ ": " ++ msg {-# NOINLINE moduleError #-} recordSep :: Bool -> Builder recordSep False = fromWord8 10 -- new line (\n) recordSep True = fromString "\r\n" unlines :: Builder -> [Builder] -> Builder unlines _ [] = mempty unlines sep (b:bs) = b <> sep <> unlines sep bs intersperse :: Builder -> [Builder] -> [Builder] intersperse _ [] = [] intersperse sep (x:xs) = x : prependToAll sep xs prependToAll :: Builder -> [Builder] -> [Builder] prependToAll _ [] = [] prependToAll sep (x:xs) = sep <> x : prependToAll sep xs decodeWithP :: AL.Parser a -> L.ByteString -> Either String a decodeWithP p s = case AL.parse p s of AL.Done _ v -> Right v AL.Fail left _ msg -> Left errMsg where errMsg = "parse error (" ++ msg ++ ") at " ++ (if BL8.length left > 100 then (take 100 $ BL8.unpack left) ++ " (truncated)" else show (BL8.unpack left)) {-# INLINE decodeWithP #-} -- These alternative implementation of the 'csv' and 'csvWithHeader' -- parsers from the 'Parser' module performs the -- 'FromRecord'/'FromNamedRecord' conversions on-the-fly, thereby -- avoiding the need to hold a big 'CSV' value in memory. The 'CSV' -- type has a quite large memory overhead due to high constant -- overheads of 'B.ByteString' and 'V.Vector'. -- TODO: Check that the error messages don't duplicate prefixes, as in -- "parse error: conversion error: ...". -- | Parse a CSV file that does not include a header. csv :: FromRecord a => DecodeOptions -> AL.Parser (V.Vector a) csv !opts = do vals <- records _ <- optional endOfLine endOfInput return $! V.fromList vals where records = do !r <- record (decDelimiter opts) if blankLine r then (endOfLine *> records) <|> pure [] else case runParser (parseRecord r) of Left msg -> fail $ "conversion error: " ++ msg Right val -> do !vals <- (endOfLine *> records) <|> pure [] return (val : vals) {-# INLINE csv #-} -- | Parse a CSV file that includes a header. csvWithHeader :: FromNamedRecord a => DecodeOptions -> AL.Parser (Header, V.Vector a) csvWithHeader !opts = do !hdr <- header (decDelimiter opts) vals <- records hdr _ <- optional endOfLine endOfInput let !v = V.fromList vals return (hdr, v) where records hdr = do !r <- record (decDelimiter opts) if blankLine r then (endOfLine *> records hdr) <|> pure [] else case runParser (convert hdr r) of Left msg -> fail $ "conversion error: " ++ msg Right val -> do !vals <- (endOfLine *> records hdr) <|> pure [] return (val : vals) convert hdr = parseNamedRecord . Types.toNamedRecord hdr cassava-0.4.4.0/Data/Csv/Incremental.hs0000644000000000000000000003676512570632764015722 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, DeriveFunctor, ScopedTypeVariables #-} -- | This module allows for incremental decoding and encoding of CSV -- data. This is useful if you e.g. want to interleave I\/O with -- parsing or if you want finer grained control over how you deal with -- type conversion errors. -- -- Decoding example: -- -- > main :: IO () -- > main = withFile "salaries.csv" ReadMode $ \ csvFile -> do -- > let loop !_ (Fail _ errMsg) = putStrLn errMsg >> exitFailure -- > loop acc (Many rs k) = loop (acc + sumSalaries rs) =<< feed k -- > loop acc (Done rs) = putStrLn $ "Total salaries: " ++ -- > show (sumSalaries rs + acc) -- > -- > feed k = do -- > isEof <- hIsEOF csvFile -- > if isEof -- > then return $ k B.empty -- > else k `fmap` B.hGetSome csvFile 4096 -- > loop 0 (decode NoHeader) -- > where -- > sumSalaries rs = sum [salary | Right (_ :: String, salary :: Int) <- rs] -- -- Encoding example: -- -- > data Person = Person { name :: !String, salary :: !Int } -- > deriving Generic -- > -- > instance FromNamedRecord Person -- > instance ToNamedRecord Person -- > instance DefaultOrdered Person -- > -- > persons :: [Person] -- > persons = [Person "John" 50000, Person "Jane" 60000] -- > -- > main :: IO () -- > main = putStrLn $ encodeDefaultOrderedByName (go persons) -- > where -- > go (x:xs) = encodeNamedRecord x <> go xs -- module Data.Csv.Incremental ( -- * Decoding HeaderParser(..) , decodeHeader , decodeHeaderWith -- $typeconversion , Parser(..) -- ** Index-based record conversion -- $indexbased , HasHeader(..) , decode , decodeWith -- ** Name-based record conversion -- $namebased , decodeByName , decodeByNameWith -- * Encoding -- ** Index-based record conversion -- $indexbased , encode , encodeWith , encodeRecord , Builder -- ** Name-based record conversion -- $namebased , encodeByName , encodeDefaultOrderedByName , encodeByNameWith , encodeDefaultOrderedByNameWith , encodeNamedRecord , NamedBuilder ) where import Control.Applicative ((<|>)) import qualified Data.Attoparsec.ByteString as A import Data.Attoparsec.ByteString.Char8 (endOfInput) import qualified Data.ByteString as B import qualified Blaze.ByteString.Builder as Builder import qualified Data.ByteString.Lazy as L import Data.Monoid ((<>)) import qualified Data.Vector as V import Data.Word (Word8) import Data.Csv.Conversion hiding (Parser, header, namedRecord, record, toNamedRecord) import qualified Data.Csv.Conversion as Conversion import qualified Data.Csv.Encoding as Encoding import Data.Csv.Encoding (EncodeOptions(..), Quoting(..), recordSep) import Data.Csv.Parser import Data.Csv.Types import Data.Csv.Util (endOfLine) #if !MIN_VERSION_base(4,8,0) import Data.Monoid (Monoid(mappend, mempty)) import Control.Applicative ((<*)) #endif -- $feed-header -- -- These functions are sometimes convenient when working with -- 'HeaderParser', but don't let you do anything you couldn't already -- do using the 'HeaderParser' constructors directly. -- $indexbased -- -- See documentation on index-based conversion in "Data.Csv" for more -- information. -- $namebased -- -- See documentation on name-based conversion in "Data.Csv" for more -- information. -- $feed-records -- -- These functions are sometimes convenient when working with -- 'Parser', but don't let you do anything you couldn't already do -- using the 'Parser' constructors directly. ------------------------------------------------------------------------ -- * Decoding headers -- | An incremental parser that when fed data eventually returns a -- parsed 'Header', or an error. data HeaderParser a = -- | The input data was malformed. The first field contains any -- unconsumed input and second field contains information about -- the parse error. FailH !B.ByteString String -- | The parser needs more input data before it can produce a -- result. Use an 'B.empty' string to indicate that no more -- input data is available. If fed an 'B.empty string', the -- continuation is guaranteed to return either 'FailH' or -- 'DoneH'. | PartialH (B.ByteString -> HeaderParser a) -- | The parse succeeded and produced the given 'Header'. | DoneH !Header a deriving Functor instance Show a => Show (HeaderParser a) where showsPrec d (FailH rest msg) = showParen (d > appPrec) showStr where showStr = showString "FailH " . showsPrec (appPrec+1) rest . showString " " . showsPrec (appPrec+1) msg showsPrec _ (PartialH _) = showString "PartialH " showsPrec d (DoneH hdr x) = showParen (d > appPrec) showStr where showStr = showString "DoneH " . showsPrec (appPrec+1) hdr . showString " " . showsPrec (appPrec+1) x -- Application has precedence one more than the most tightly-binding -- operator appPrec :: Int appPrec = 10 -- | Parse a CSV header in an incremental fashion. When done, the -- 'HeaderParser' returns any unconsumed input in the second field of -- the 'DoneH' constructor. decodeHeader :: HeaderParser B.ByteString decodeHeader = decodeHeaderWith defaultDecodeOptions -- | Like 'decodeHeader', but lets you customize how the CSV data is -- parsed. decodeHeaderWith :: DecodeOptions -> HeaderParser B.ByteString decodeHeaderWith !opts = PartialH (go . parser) where parser = A.parse (header $ decDelimiter opts) go (A.Fail rest _ msg) = FailH rest err where err = "parse error (" ++ msg ++ ")" -- TODO: Check empty and give attoparsec one last chance to return -- something: go (A.Partial k) = PartialH $ \ s -> go (k s) go (A.Done rest r) = DoneH r rest ------------------------------------------------------------------------ -- * Decoding records -- $typeconversion -- -- Just like in the case of non-incremental decoding, there are two -- ways to convert CSV records to and from and user-defined data -- types: index-based conversion and name-based conversion. -- | An incremental parser that when fed data eventually produces some -- parsed records, converted to the desired type, or an error in case -- of malformed input data. data Parser a = -- | The input data was malformed. The first field contains any -- unconsumed input and second field contains information about -- the parse error. Fail !B.ByteString String -- | The parser parsed and converted zero or more records. Any -- records that failed type conversion are returned as @'Left' -- errMsg@ and the rest as @'Right' val@. Feed a 'B.ByteString' -- to the continuation to continue parsing. Use an 'B.empty' -- string to indicate that no more input data is available. If -- fed an 'B.empty' string, the continuation is guaranteed to -- return either 'Fail' or 'Done'. | Many [Either String a] (B.ByteString -> Parser a) -- | The parser parsed and converted some records. Any records -- that failed type conversion are returned as @'Left' errMsg@ -- and the rest as @'Right' val@. | Done [Either String a] deriving Functor instance Show a => Show (Parser a) where showsPrec d (Fail rest msg) = showParen (d > appPrec) showStr where showStr = showString "Fail " . showsPrec (appPrec+1) rest . showString " " . showsPrec (appPrec+1) msg showsPrec d (Many rs _) = showParen (d > appPrec) showStr where showStr = showString "Many " . showsPrec (appPrec+1) rs . showString " " showsPrec d (Done rs) = showParen (d > appPrec) showStr where showStr = showString "Done " . showsPrec (appPrec+1) rs -- | Have we read all available input? data More = Incomplete | Complete deriving (Eq, Show) -- | Efficiently deserialize CSV in an incremental fashion. Equivalent -- to @'decodeWith' 'defaultDecodeOptions'@. decode :: FromRecord a => HasHeader -- ^ Data contains header that should be -- skipped -> Parser a decode = decodeWith defaultDecodeOptions -- | Like 'decode', but lets you customize how the CSV data is parsed. decodeWith :: FromRecord a => DecodeOptions -- ^ Decoding options -> HasHeader -- ^ Data contains header that should be -- skipped -> Parser a decodeWith !opts hasHeader = case hasHeader of HasHeader -> go (decodeHeaderWith opts) NoHeader -> Many [] $ \ s -> decodeWithP parseRecord opts s where go (FailH rest msg) = Fail rest msg go (PartialH k) = Many [] $ \ s' -> go (k s') go (DoneH _ rest) = decodeWithP parseRecord opts rest ------------------------------------------------------------------------ -- | Efficiently deserialize CSV in an incremental fashion. The data -- is assumed to be preceeded by a header. Returns a 'HeaderParser' -- that when done produces a 'Parser' for parsing the actual records. -- Equivalent to @'decodeByNameWith' 'defaultDecodeOptions'@. decodeByName :: FromNamedRecord a => HeaderParser (Parser a) decodeByName = decodeByNameWith defaultDecodeOptions -- | Like 'decodeByName', but lets you customize how the CSV data is -- parsed. decodeByNameWith :: FromNamedRecord a => DecodeOptions -- ^ Decoding options -> HeaderParser (Parser a) decodeByNameWith !opts = go (decodeHeaderWith opts) where go (FailH rest msg) = FailH rest msg go (PartialH k) = PartialH $ \ s -> go (k s) go (DoneH hdr rest) = DoneH hdr (decodeWithP (parseNamedRecord . toNamedRecord hdr) opts rest) ------------------------------------------------------------------------ -- TODO: 'decodeWithP' should probably not take an initial -- 'B.ByteString' input. -- | Like 'decode', but lets you customize how the CSV data is parsed. decodeWithP :: (Record -> Conversion.Parser a) -> DecodeOptions -> B.ByteString -> Parser a decodeWithP p !opts = go Incomplete [] . parser where go !_ !acc (A.Fail rest _ msg) | null acc = Fail rest err | otherwise = Many (reverse acc) (\ s -> Fail (rest `B.append` s) err) where err = "parse error (" ++ msg ++ ")" go Incomplete acc (A.Partial k) = Many (reverse acc) cont where cont s = go m [] (k s) where m | B.null s = Complete | otherwise = Incomplete go Complete _ (A.Partial _) = moduleError "decodeWithP" msg where msg = "attoparsec should never return Partial in this case" go m acc (A.Done rest r) | B.null rest = case m of Complete -> Done (reverse acc') Incomplete -> Many (reverse acc') (cont []) | otherwise = go m acc' (parser rest) where cont acc'' s | B.null s = Done (reverse acc'') | otherwise = go Incomplete acc'' (parser s) acc' | blankLine r = acc | otherwise = let !r' = convert r in r' : acc parser = A.parse (record (decDelimiter opts) <* (endOfLine <|> endOfInput)) convert = runParser . p {-# INLINE decodeWithP #-} blankLine :: V.Vector B.ByteString -> Bool blankLine v = V.length v == 1 && (B.null (V.head v)) ------------------------------------------------------------------------ -- * Encoding -- | Efficiently serialize records in an incremental -- fashion. Equivalent to @'encodeWith' 'defaultEncodeOptions'@. encode :: ToRecord a => Builder a -> L.ByteString encode = encodeWith Encoding.defaultEncodeOptions -- | Like 'encode', but lets you customize how the CSV data is -- encoded. encodeWith :: ToRecord a => EncodeOptions -> Builder a -> L.ByteString encodeWith opts b = Builder.toLazyByteString $ runBuilder b (encQuoting opts) (encDelimiter opts) (encUseCrLf opts) -- | Encode a single record. encodeRecord :: ToRecord a => a -> Builder a encodeRecord r = Builder $ \ qtng delim useCrLf -> Encoding.encodeRecord qtng delim (toRecord r) <> recordSep useCrLf -- | A builder for building the CSV data incrementally. Just like the -- @ByteString@ builder, this builder should be used in a -- right-associative, 'foldr' style. Using '<>' to compose builders in -- a left-associative, `foldl'` style makes the building not be -- incremental. newtype Builder a = Builder { runBuilder :: Quoting -> Word8 -> Bool -> Builder.Builder } instance Monoid (Builder a) where mempty = Builder (\ _ _ _ -> mempty) mappend (Builder f) (Builder g) = Builder $ \ qtng delim useCrlf -> f qtng delim useCrlf <> g qtng delim useCrlf ------------------------------------------------------------------------ -- ** Index-based record conversion -- | Efficiently serialize named records in an incremental fashion, -- including the leading header. Equivalent to @'encodeWith' -- 'defaultEncodeOptions'@. The header is written before any records -- and dictates the field order. encodeByName :: ToNamedRecord a => Header -> NamedBuilder a -> L.ByteString encodeByName = encodeByNameWith Encoding.defaultEncodeOptions -- | Like 'encodeByName', but header and field order is dictated by -- the 'Conversion.headerOrder' method. encodeDefaultOrderedByName :: (DefaultOrdered a, ToNamedRecord a) => NamedBuilder a -> L.ByteString encodeDefaultOrderedByName = encodeDefaultOrderedByNameWith Encoding.defaultEncodeOptions -- | Like 'encodeByName', but lets you customize how the CSV data is -- encoded. encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> NamedBuilder a -> L.ByteString encodeByNameWith opts hdr b = Builder.toLazyByteString $ Encoding.encodeRecord (encQuoting opts) (encDelimiter opts) hdr <> recordSep (encUseCrLf opts) <> runNamedBuilder b hdr (encQuoting opts) (encDelimiter opts) (encUseCrLf opts) -- | Like 'encodeDefaultOrderedByName', but lets you customize how the -- CSV data is encoded. encodeDefaultOrderedByNameWith :: forall a. (DefaultOrdered a, ToNamedRecord a) => EncodeOptions -> NamedBuilder a -> L.ByteString encodeDefaultOrderedByNameWith opts b = Builder.toLazyByteString $ Encoding.encodeRecord (encQuoting opts) (encDelimiter opts) hdr <> recordSep (encUseCrLf opts) <> runNamedBuilder b hdr (encQuoting opts) (encDelimiter opts) (encUseCrLf opts) where hdr = Conversion.headerOrder (undefined :: a) -- | Encode a single named record. encodeNamedRecord :: ToNamedRecord a => a -> NamedBuilder a encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim useCrLf -> Encoding.encodeNamedRecord hdr qtng delim (Conversion.toNamedRecord nr) <> recordSep useCrLf -- | A builder for building the CSV data incrementally. Just like the -- @ByteString@ builder, this builder should be used in a -- right-associative, 'foldr' style. Using '<>' to compose builders in -- a left-associative, `foldl'` style makes the building not be -- incremental. newtype NamedBuilder a = NamedBuilder { runNamedBuilder :: Header -> Quoting -> Word8 -> Bool -> Builder.Builder } instance Monoid (NamedBuilder a) where mempty = NamedBuilder (\ _ _ _ _ -> mempty) mappend (NamedBuilder f) (NamedBuilder g) = NamedBuilder $ \ hdr qtng delim useCrlf -> f hdr qtng delim useCrlf <> g hdr qtng delim useCrlf ------------------------------------------------------------------------ moduleError :: String -> String -> a moduleError func msg = error $ "Data.Csv.Incremental." ++ func ++ ": " ++ msg {-# NOINLINE moduleError #-} cassava-0.4.4.0/Data/Csv/Parser.hs0000644000000000000000000001472112570632764014701 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP #-} -- | A CSV parser. The parser defined here is RFC 4180 compliant, with -- the following extensions: -- -- * Empty lines are ignored. -- -- * Non-escaped fields may contain any characters except -- double-quotes, commas, carriage returns, and newlines. -- -- * Escaped fields may contain any characters (but double-quotes -- need to be escaped). -- -- The functions in this module can be used to implement e.g. a -- resumable parser that is fed input incrementally. module Data.Csv.Parser ( DecodeOptions(..) , defaultDecodeOptions , csv , csvWithHeader , header , record , name , field ) where import Blaze.ByteString.Builder (fromByteString, toByteString) import Blaze.ByteString.Builder.Char.Utf8 (fromChar) import Control.Applicative (optional) import Data.Attoparsec.ByteString.Char8 (char, endOfInput) import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.Lazy as AL import qualified Data.Attoparsec.Zepto as Z import qualified Data.ByteString as S import qualified Data.ByteString.Unsafe as S import qualified Data.Vector as V import Data.Word (Word8) import Data.Csv.Types import Data.Csv.Util ((<$!>), blankLine, endOfLine, liftM2', cr, newline, doubleQuote) #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((<$>), (*>), (<*), pure) import Data.Monoid (mappend, mempty) #endif -- | Options that controls how data is decoded. These options can be -- used to e.g. decode tab-separated data instead of comma-separated -- data. -- -- To avoid having your program stop compiling when new fields are -- added to 'DecodeOptions', create option records by overriding -- values in 'defaultDecodeOptions'. Example: -- -- > myOptions = defaultDecodeOptions { -- > decDelimiter = fromIntegral (ord '\t') -- > } data DecodeOptions = DecodeOptions { -- | Field delimiter. decDelimiter :: {-# UNPACK #-} !Word8 } deriving (Eq, Show) -- | Decoding options for parsing CSV files. defaultDecodeOptions :: DecodeOptions defaultDecodeOptions = DecodeOptions { decDelimiter = 44 -- comma } -- | Parse a CSV file that does not include a header. csv :: DecodeOptions -> AL.Parser Csv csv !opts = do vals <- sepByEndOfLine1' (record (decDelimiter opts)) _ <- optional endOfLine endOfInput let nonEmpty = removeBlankLines vals return $! V.fromList nonEmpty {-# INLINE csv #-} -- | Specialized version of 'sepBy1'' which is faster due to not -- accepting an arbitrary separator. sepByDelim1' :: AL.Parser a -> Word8 -- ^ Field delimiter -> AL.Parser [a] sepByDelim1' p !delim = liftM2' (:) p loop where loop = do mb <- A.peekWord8 case mb of Just b | b == delim -> liftM2' (:) (A.anyWord8 *> p) loop _ -> pure [] {-# INLINE sepByDelim1' #-} -- | Specialized version of 'sepBy1'' which is faster due to not -- accepting an arbitrary separator. sepByEndOfLine1' :: AL.Parser a -> AL.Parser [a] sepByEndOfLine1' p = liftM2' (:) p loop where loop = do mb <- A.peekWord8 case mb of Just b | b == cr -> liftM2' (:) (A.anyWord8 *> A.word8 newline *> p) loop | b == newline -> liftM2' (:) (A.anyWord8 *> p) loop _ -> pure [] {-# INLINE sepByEndOfLine1' #-} -- | Parse a CSV file that includes a header. csvWithHeader :: DecodeOptions -> AL.Parser (Header, V.Vector NamedRecord) csvWithHeader !opts = do !hdr <- header (decDelimiter opts) vals <- map (toNamedRecord hdr) . removeBlankLines <$> sepByEndOfLine1' (record (decDelimiter opts)) _ <- optional endOfLine endOfInput let !v = V.fromList vals return (hdr, v) -- | Parse a header, including the terminating line separator. header :: Word8 -- ^ Field delimiter -> AL.Parser Header header !delim = V.fromList <$!> name delim `sepByDelim1'` delim <* endOfLine -- | Parse a header name. Header names have the same format as regular -- 'field's. name :: Word8 -> AL.Parser Name name !delim = field delim removeBlankLines :: [Record] -> [Record] removeBlankLines = filter (not . blankLine) -- | Parse a record, not including the terminating line separator. The -- terminating line separate is not included as the last record in a -- CSV file is allowed to not have a terminating line separator. You -- most likely want to use the 'endOfLine' parser in combination with -- this parser. record :: Word8 -- ^ Field delimiter -> AL.Parser Record record !delim = V.fromList <$!> field delim `sepByDelim1'` delim {-# INLINE record #-} -- | Parse a field. The field may be in either the escaped or -- non-escaped format. The return value is unescaped. field :: Word8 -> AL.Parser Field field !delim = do mb <- A.peekWord8 -- We purposely don't use <|> as we want to commit to the first -- choice if we see a double quote. case mb of Just b | b == doubleQuote -> escapedField _ -> unescapedField delim {-# INLINE field #-} escapedField :: AL.Parser S.ByteString escapedField = do _ <- dquote -- The scan state is 'True' if the previous character was a double -- quote. We need to drop a trailing double quote left by scan. s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote then Just (not s) else if s then Nothing else Just False) if doubleQuote `S.elem` s then case Z.parse unescape s of Right r -> return r Left err -> fail err else return s unescapedField :: Word8 -> AL.Parser S.ByteString unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote && c /= newline && c /= delim && c /= cr) dquote :: AL.Parser Char dquote = char '"' unescape :: Z.Parser S.ByteString unescape = toByteString <$!> go mempty where go acc = do h <- Z.takeWhile (/= doubleQuote) let rest = do start <- Z.take 2 if (S.unsafeHead start == doubleQuote && S.unsafeIndex start 1 == doubleQuote) then go (acc `mappend` fromByteString h `mappend` fromChar '"') else fail "invalid CSV escape sequence" done <- Z.atEnd if done then return (acc `mappend` fromByteString h) else rest cassava-0.4.4.0/Data/Csv/Streaming.hs0000644000000000000000000001450212570632764015373 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, DeriveFunctor #-} -- | This module allows for streaming decoding of CSV data. This is -- useful if you need to parse large amounts of input in constant -- space. The API also allows you to ignore type conversion errors on -- a per-record basis. module Data.Csv.Streaming ( -- * Usage example -- $example -- * Stream representation -- $stream-representation Records(..) -- * Decoding records -- $typeconversion -- ** Index-based record conversion -- $indexbased , HasHeader(..) , decode , decodeWith -- ** Name-based record conversion -- $namebased , decodeByName , decodeByNameWith ) where import Control.DeepSeq (NFData(rnf)) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Foldable (Foldable(..)) import Prelude hiding (foldr) import Data.Csv.Conversion import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith, decodeWith) import qualified Data.Csv.Incremental as I import Data.Csv.Parser import Data.Csv.Types #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((<$>), (<*>), pure) import Data.Traversable (Traversable(..)) #endif #if !MIN_VERSION_bytestring(0,10,0) import qualified Data.ByteString.Lazy.Internal as BL -- for constructors #endif -- $example -- -- A short usage example: -- -- > for_ (decode NoHeader "John,27\r\nJane,28\r\n") $ \ (name, age :: Int) -> -- > putStrLn $ name ++ " is " ++ show age ++ " years old" -- -- N.B. The 'Foldable' instance, which is used above, skips records -- that failed to convert. If you don't want this behavior, work -- directly with the 'Cons' and 'Nil' constructors. -- $stream-representation -- -- A stream of records is represented as a (lazy) list that may -- contain errors. -- $typeconversion -- -- Just like in the case of non-streaming decoding, there are two ways -- to convert CSV records to and from and user-defined data types: -- index-based conversion and name-based conversion. -- $indexbased -- -- See documentation on index-based conversion in "Data.Csv" for more -- information. -- $namebased -- -- See documentation on name-based conversion in "Data.Csv" for more -- information. -- | A stream of parsed records. If type conversion failed for the -- record, the error is returned as @'Left' errMsg@. data Records a = -- | A record or an error message, followed by more records. Cons (Either String a) (Records a) -- | End of stream, potentially due to a parse error. If a parse -- error occured, the first field contains the error message. -- The second field contains any unconsumed input. | Nil (Maybe String) BL.ByteString deriving (Eq, Functor, Show) -- | Skips records that failed to convert. instance Foldable Records where foldr = foldrRecords #if MIN_VERSION_base(4,6,0) foldl' = foldlRecords' #endif foldrRecords :: (a -> b -> b) -> b -> Records a -> b foldrRecords f = go where go z (Cons (Right x) rs) = f x (go z rs) go z _ = z {-# INLINE foldrRecords #-} #if MIN_VERSION_base(4,6,0) foldlRecords' :: (a -> b -> a) -> a -> Records b -> a foldlRecords' f = go where go z (Cons (Right x) rs) = let z' = f z x in z' `seq` go z' rs go z _ = z {-# INLINE foldlRecords' #-} #endif instance Traversable Records where traverse _ (Nil merr rest) = pure $ Nil merr rest traverse f (Cons x xs) = Cons <$> traverseElem x <*> traverse f xs where traverseElem (Left err) = pure $ Left err traverseElem (Right y) = Right <$> f y instance NFData a => NFData (Records a) where rnf (Cons r rs) = rnf r `seq` rnf rs #if MIN_VERSION_bytestring(0,10,0) rnf (Nil errMsg rest) = rnf errMsg `seq` rnf rest #else rnf (Nil errMsg rest) = rnf errMsg `seq` rnfLazyByteString rest rnfLazyByteString :: BL.ByteString -> () rnfLazyByteString BL.Empty = () rnfLazyByteString (BL.Chunk _ b) = rnfLazyByteString b #endif -- | Efficiently deserialize CSV records in a streaming fashion. -- Equivalent to @'decodeWith' 'defaultDecodeOptions'@. decode :: FromRecord a => HasHeader -- ^ Data contains header that should be -- skipped -> BL.ByteString -- ^ CSV data -> Records a decode = decodeWith defaultDecodeOptions -- | Like 'decode', but lets you customize how the CSV data is parsed. decodeWith :: FromRecord a => DecodeOptions -- ^ Decoding options -> HasHeader -- ^ Data contains header that should be -- skipped -> BL.ByteString -- ^ CSV data -> Records a decodeWith !opts hasHeader s0 = go (BL.toChunks s0) (I.decodeWith opts hasHeader) where go ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs go ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss)) go [] (Many xs k) = foldr Cons (go [] (k B.empty)) xs go (s:ss) (Many xs k) = foldr Cons (go ss (k s)) xs -- | Efficiently deserialize CSV in a streaming fashion. The data is -- assumed to be preceeded by a header. Returns @'Left' errMsg@ if -- parsing the header fails. Equivalent to @'decodeByNameWith' -- 'defaultDecodeOptions'@. decodeByName :: FromNamedRecord a => BL.ByteString -- ^ CSV data -> Either String (Header, Records a) decodeByName = decodeByNameWith defaultDecodeOptions -- TODO: Include something more in error messages? -- | Like 'decodeByName', but lets you customize how the CSV data is -- parsed. decodeByNameWith :: FromNamedRecord a => DecodeOptions -- ^ Decoding options -> BL.ByteString -- ^ CSV data -> Either String (Header, Records a) decodeByNameWith !opts s0 = go (BL.toChunks s0) (I.decodeByNameWith opts) where go ss (DoneH hdr p) = Right (hdr, go2 ss p) go ss (FailH rest err) = Left $ err ++ " at " ++ show (BL8.unpack . BL.fromChunks $ rest : ss) go [] (PartialH k) = go [] (k B.empty) go (s:ss) (PartialH k) = go ss (k s) go2 ss (Done xs) = foldr Cons (Nil Nothing (BL.fromChunks ss)) xs go2 ss (Fail rest err) = Nil (Just err) (BL.fromChunks (rest:ss)) go2 [] (Many xs k) = foldr Cons (go2 [] (k B.empty)) xs go2 (s:ss) (Many xs k) = foldr Cons (go2 ss (k s)) xs cassava-0.4.4.0/Data/Csv/Types.hs0000644000000000000000000000264212570632764014550 0ustar0000000000000000module Data.Csv.Types ( -- * Core CSV types Csv , Record , Header , Name , NamedRecord , Field , toNamedRecord -- * Header handling , HasHeader(..) ) where import qualified Data.ByteString as S import qualified Data.HashMap.Strict as HM import Data.Vector (Vector) import qualified Data.Vector as V -- | CSV data represented as a Haskell vector of vector of -- bytestrings. type Csv = Vector Record -- | A record corresponds to a single line in a CSV file. type Record = Vector Field -- | The header corresponds to the first line a CSV file. Not all CSV -- files have a header. type Header = Vector Name -- | A header has one or more names, describing the data in the column -- following the name. type Name = S.ByteString -- | A record corresponds to a single line in a CSV file, indexed by -- the column name rather than the column index. type NamedRecord = HM.HashMap S.ByteString S.ByteString -- | A single field within a record. type Field = S.ByteString -- | Convert a 'Record' to a 'NamedRecord' by attaching column names. -- The 'Header' and 'Record' must be of the same length. toNamedRecord :: Header -> Record -> NamedRecord toNamedRecord hdr v = HM.fromList . V.toList $ V.zip hdr v -- | Is the CSV data preceded by a header? data HasHeader = HasHeader -- ^ The CSV data is preceded by a header | NoHeader -- ^ The CSV data is not preceded by a header cassava-0.4.4.0/Data/Csv/Util.hs0000644000000000000000000000271212570632764014357 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-} module Data.Csv.Util ( (<$!>) , blankLine , liftM2' , endOfLine , doubleQuote , newline , cr ) where import Control.Applicative ((<|>)) import Data.Word (Word8) import Data.Attoparsec.ByteString.Char8 (string) import qualified Data.Attoparsec.ByteString as A import qualified Data.ByteString as B import qualified Data.Vector as V import Data.Attoparsec.ByteString (Parser) #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((*>)) #endif -- | A strict version of 'Data.Functor.<$>' for monads. (<$!>) :: Monad m => (a -> b) -> m a -> m b f <$!> m = do a <- m return $! f a {-# INLINE (<$!>) #-} infixl 4 <$!> -- | Is this an empty record (i.e. a blank line)? blankLine :: V.Vector B.ByteString -> Bool blankLine v = V.length v == 1 && (B.null (V.head v)) -- | A version of 'liftM2' that is strict in the result of its first -- action. liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c liftM2' f a b = do !x <- a y <- b return (f x y) {-# INLINE liftM2' #-} -- | Match either a single newline character @\'\\n\'@, or a carriage -- return followed by a newline character @\"\\r\\n\"@, or a single -- carriage return @\'\\r\'@. endOfLine :: Parser () endOfLine = (A.word8 newline *> return ()) <|> (string "\r\n" *> return ()) <|> (A.word8 cr *> return ()) {-# INLINE endOfLine #-} doubleQuote, newline, cr :: Word8 doubleQuote = 34 newline = 10 cr = 13 cassava-0.4.4.0/Data/Csv/Compat/0000755000000000000000000000000012570632764014327 5ustar0000000000000000cassava-0.4.4.0/Data/Csv/Compat/Monoid.hs0000644000000000000000000000036712570632764016116 0ustar0000000000000000{-# LANGUAGE CPP #-} module Data.Csv.Compat.Monoid ( (<>) ) where import Data.Monoid #if !MIN_VERSION_base(4,5,0) infixr 6 <> -- | An infix synonym for 'mappend'. (<>) :: Monoid m => m -> m -> m (<>) = mappend {-# INLINE (<>) #-} #endif cassava-0.4.4.0/Data/Csv/Conversion/0000755000000000000000000000000012570632764015231 5ustar0000000000000000cassava-0.4.4.0/Data/Csv/Conversion/Internal.hs0000644000000000000000000002375412570632764017354 0ustar0000000000000000module Data.Csv.Conversion.Internal ( decimal , realFloat ) where import Blaze.ByteString.Builder import Blaze.ByteString.Builder.Char8 import Data.Array.Base (unsafeAt) import Data.Array.IArray import qualified Data.ByteString as B import Data.Char (ord) import Data.Int import Data.Word import Data.Csv.Compat.Monoid ((<>)) ------------------------------------------------------------------------ -- Integers decimal :: Integral a => a -> B.ByteString decimal = toByteString . formatDecimal {-# INLINE decimal #-} -- TODO: Add an optimized version for Integer. formatDecimal :: Integral a => a -> Builder {-# RULES "formatDecimal/Int" formatDecimal = formatBoundedSigned :: Int -> Builder #-} {-# RULES "formatDecimal/Int8" formatDecimal = formatBoundedSigned :: Int8 -> Builder #-} {-# RULES "formatDecimal/Int16" formatDecimal = formatBoundedSigned :: Int16 -> Builder #-} {-# RULES "formatDecimal/Int32" formatDecimal = formatBoundedSigned :: Int32 -> Builder #-} {-# RULES "formatDecimal/Int64" formatDecimal = formatBoundedSigned :: Int64 -> Builder #-} {-# RULES "formatDecimal/Word" formatDecimal = formatPositive :: Word -> Builder #-} {-# RULES "formatDecimal/Word8" formatDecimal = formatPositive :: Word8 -> Builder #-} {-# RULES "formatDecimal/Word16" formatDecimal = formatPositive :: Word16 -> Builder #-} {-# RULES "formatDecimal/Word32" formatDecimal = formatPositive :: Word32 -> Builder #-} {-# RULES "formatDecimal/Word64" formatDecimal = formatPositive :: Word64 -> Builder #-} {-# NOINLINE formatDecimal #-} formatDecimal i | i < 0 = minus <> if i <= -128 then formatPositive (-(i `quot` 10)) <> digit (-(i `rem` 10)) else formatPositive (-i) | otherwise = formatPositive i formatBoundedSigned :: (Integral a, Bounded a) => a -> Builder {-# SPECIALIZE formatBoundedSigned :: Int -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int8 -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int16 -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int32 -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int64 -> Builder #-} formatBoundedSigned i | i < 0 = minus <> if i == minBound then formatPositive (-(i `quot` 10)) <> digit (-(i `rem` 10)) else formatPositive (-i) | otherwise = formatPositive i formatPositive :: Integral a => a -> Builder {-# SPECIALIZE formatPositive :: Int -> Builder #-} {-# SPECIALIZE formatPositive :: Int8 -> Builder #-} {-# SPECIALIZE formatPositive :: Int16 -> Builder #-} {-# SPECIALIZE formatPositive :: Int32 -> Builder #-} {-# SPECIALIZE formatPositive :: Int64 -> Builder #-} {-# SPECIALIZE formatPositive :: Word -> Builder #-} {-# SPECIALIZE formatPositive :: Word8 -> Builder #-} {-# SPECIALIZE formatPositive :: Word16 -> Builder #-} {-# SPECIALIZE formatPositive :: Word32 -> Builder #-} {-# SPECIALIZE formatPositive :: Word64 -> Builder #-} formatPositive = go where go n | n < 10 = digit n | otherwise = go (n `quot` 10) <> digit (n `rem` 10) minus :: Builder minus = fromWord8 45 zero :: Word8 zero = 48 digit :: Integral a => a -> Builder digit n = fromWord8 $! i2w (fromIntegral n) {-# INLINE digit #-} i2w :: Int -> Word8 i2w i = zero + fromIntegral i {-# INLINE i2w #-} ------------------------------------------------------------------------ -- Floating point numbers realFloat :: RealFloat a => a -> B.ByteString {-# SPECIALIZE realFloat :: Float -> B.ByteString #-} {-# SPECIALIZE realFloat :: Double -> B.ByteString #-} realFloat = toByteString . formatRealFloat Generic -- | Control the rendering of floating point numbers. data FPFormat = Exponent -- ^ Scientific notation (e.g. @2.3e123@). | Fixed -- ^ Standard decimal notation. | Generic -- ^ Use decimal notation for values between @0.1@ and -- @9,999,999@, and scientific notation otherwise. deriving (Enum, Read, Show) formatRealFloat :: RealFloat a => FPFormat -> a -> Builder {-# SPECIALIZE formatRealFloat :: FPFormat -> Float -> Builder #-} {-# SPECIALIZE formatRealFloat :: FPFormat -> Double -> Builder #-} formatRealFloat fmt x | isNaN x = fromString "NaN" | isInfinite x = if x < 0 then fromString "-Infinity" else fromString "Infinity" | x < 0 || isNegativeZero x = minus <> doFmt fmt (floatToDigits (-x)) | otherwise = doFmt fmt (floatToDigits x) where doFmt format (is, e) = let ds = map i2d is in case format of Generic -> doFmt (if e < 0 || e > 7 then Exponent else Fixed) (is,e) Exponent -> let show_e' = formatDecimal (e-1) in case ds of [48] -> fromString "0.0e0" [d] -> fromWord8 d <> fromString ".0e" <> show_e' (d:ds') -> fromWord8 d <> fromChar '.' <> fromWord8s ds' <> fromChar 'e' <> show_e' [] -> error "formatRealFloat/doFmt/Exponent: []" Fixed | e <= 0 -> fromString "0." <> fromByteString (B.replicate (-e) zero) <> fromWord8s ds | otherwise -> let f 0 s rs = mk0 (reverse s) <> fromChar '.' <> mk0 rs f n s [] = f (n-1) (zero:s) [] f n s (r:rs) = f (n-1) (r:s) rs in f e [] ds where mk0 ls = case ls of { [] -> fromWord8 zero ; _ -> fromWord8s ls} -- Based on "Printing Floating-Point Numbers Quickly and Accurately" -- by R.G. Burger and R.K. Dybvig in PLDI 96. -- This version uses a much slower logarithm estimator. It should be improved. -- | 'floatToDigits' takes a base and a non-negative 'RealFloat' number, -- and returns a list of digits and an exponent. -- In particular, if @x>=0@, and -- -- > floatToDigits base x = ([d1,d2,...,dn], e) -- -- then -- -- (1) @n >= 1@ -- -- (2) @x = 0.d1d2...dn * (base**e)@ -- -- (3) @0 <= di <= base-1@ floatToDigits :: (RealFloat a) => a -> ([Int], Int) {-# SPECIALIZE floatToDigits :: Float -> ([Int], Int) #-} {-# SPECIALIZE floatToDigits :: Double -> ([Int], Int) #-} floatToDigits 0 = ([0], 0) floatToDigits x = let (f0, e0) = decodeFloat x (minExp0, _) = floatRange x p = floatDigits x b = floatRadix x minExp = minExp0 - p -- the real minimum exponent -- Haskell requires that f be adjusted so denormalized numbers -- will have an impossibly low exponent. Adjust for this. (f, e) = let n = minExp - e0 in if n > 0 then (f0 `quot` (expt b n), e0+n) else (f0, e0) (r, s, mUp, mDn) = if e >= 0 then let be = expt b e in if f == expt b (p-1) then (f*be*b*2, 2*b, be*b, be) -- according to Burger and Dybvig else (f*be*2, 2, be, be) else if e > minExp && f == expt b (p-1) then (f*b*2, expt b (-e+1)*2, b, 1) else (f*2, expt b (-e)*2, 1, 1) k :: Int k = let k0 :: Int k0 = if b == 2 then -- logBase 10 2 is very slightly larger than 8651/28738 -- (about 5.3558e-10), so if log x >= 0, the approximation -- k1 is too small, hence we add one and need one fixup step less. -- If log x < 0, the approximation errs rather on the high side. -- That is usually more than compensated for by ignoring the -- fractional part of logBase 2 x, but when x is a power of 1/2 -- or slightly larger and the exponent is a multiple of the -- denominator of the rational approximation to logBase 10 2, -- k1 is larger than logBase 10 x. If k1 > 1 + logBase 10 x, -- we get a leading zero-digit we don't want. -- With the approximation 3/10, this happened for -- 0.5^1030, 0.5^1040, ..., 0.5^1070 and values close above. -- The approximation 8651/28738 guarantees k1 < 1 + logBase 10 x -- for IEEE-ish floating point types with exponent fields -- <= 17 bits and mantissae of several thousand bits, earlier -- convergents to logBase 10 2 would fail for long double. -- Using quot instead of div is a little faster and requires -- fewer fixup steps for negative lx. let lx = p - 1 + e0 k1 = (lx * 8651) `quot` 28738 in if lx >= 0 then k1 + 1 else k1 else -- f :: Integer, log :: Float -> Float, -- ceiling :: Float -> Int ceiling ((log (fromInteger (f+1) :: Float) + fromIntegral e * log (fromInteger b)) / log 10) --WAS: fromInt e * log (fromInteger b)) fixup n = if n >= 0 then if r + mUp <= expt 10 n * s then n else fixup (n+1) else if expt 10 (-n) * (r + mUp) <= s then n else fixup (n+1) in fixup k0 gen ds rn sN mUpN mDnN = let (dn, rn') = (rn * 10) `quotRem` sN mUpN' = mUpN * 10 mDnN' = mDnN * 10 in case (rn' < mDnN', rn' + mUpN' > sN) of (True, False) -> dn : ds (False, True) -> dn+1 : ds (True, True) -> if rn' * 2 < sN then dn : ds else dn+1 : ds (False, False) -> gen (dn:ds) rn' sN mUpN' mDnN' rds = if k >= 0 then gen [] r (s * expt 10 k) mUp mDn else let bk = expt 10 (-k) in gen [] (r * bk) s (mUp * bk) (mDn * bk) in (map fromIntegral (reverse rds), k) -- Exponentiation with a cache for the most common numbers. minExpt, maxExpt :: Int minExpt = 0 maxExpt = 1100 expt :: Integer -> Int -> Integer expt base n | base == 2 && n >= minExpt && n <= maxExpt = expts `unsafeAt` n | base == 10 && n <= maxExpt10 = expts10 `unsafeAt` n | otherwise = base^n expts :: Array Int Integer expts = array (minExpt,maxExpt) [(n,2^n) | n <- [minExpt .. maxExpt]] maxExpt10 :: Int maxExpt10 = 324 expts10 :: Array Int Integer expts10 = array (minExpt,maxExpt10) [(n,10^n) | n <- [minExpt .. maxExpt10]] -- | Unsafe conversion for decimal digits. {-# INLINE i2d #-} i2d :: Int -> Word8 i2d i = fromIntegral (ord '0' + i) cassava-0.4.4.0/examples/0000755000000000000000000000000012570632764013316 5ustar0000000000000000cassava-0.4.4.0/examples/IncrementalIndexedBasedDecode.hs0000644000000000000000000000144112570632764021457 0ustar0000000000000000{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} import Control.Monad import qualified Data.ByteString as B import Data.Csv.Incremental import System.Exit import System.IO main :: IO () main = withFile "salaries.csv" ReadMode $ \ csvFile -> do let loop !_ (Fail _ errMsg) = putStrLn errMsg >> exitFailure loop acc (Many rs k) = loop (acc + sumSalaries rs) =<< feed k loop acc (Done rs) = putStrLn $ "Total salaries: " ++ show (sumSalaries rs + acc) feed k = do isEof <- hIsEOF csvFile if isEof then return $ k B.empty else k `fmap` B.hGetSome csvFile 4096 loop 0 (decode NoHeader) where sumSalaries rs = sum [salary | Right (_ :: String, salary :: Int) <- rs] cassava-0.4.4.0/examples/IncrementalNamedBasedEncode.hs0000644000000000000000000000110712570632764021134 0ustar0000000000000000{-# LANGUAGE DeriveGeneric #-} import qualified Data.ByteString.Lazy as L import Data.Csv hiding (encodeDefaultOrderedByName) import Data.Csv.Incremental import Data.Monoid import GHC.Generics data Person = Person { name :: !String , salary :: !Int } deriving Generic instance FromNamedRecord Person instance ToNamedRecord Person instance DefaultOrdered Person persons :: [Person] persons = [Person "John" 50000, Person "Jane" 60000] main :: IO () main = L.putStrLn $ encodeDefaultOrderedByName (go persons) where go (x:xs) = encodeNamedRecord x <> go xs cassava-0.4.4.0/examples/IndexBasedDecode.hs0000644000000000000000000000061412570632764016765 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables #-} import qualified Data.ByteString.Lazy as BL import Data.Csv import qualified Data.Vector as V main :: IO () main = do csvData <- BL.readFile "salaries.csv" case decode NoHeader csvData of Left err -> putStrLn err Right v -> V.forM_ v $ \ (name, salary :: Int) -> putStrLn $ name ++ " earns " ++ show salary ++ " dollars" cassava-0.4.4.0/examples/IndexBasedGeneric.hs0000644000000000000000000000122012570632764017150 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables, DeriveGeneric #-} import qualified Data.ByteString.Lazy as BL import Data.Csv import qualified Data.Vector as V import GHC.Generics data Person = Person String Int deriving Generic instance FromRecord Person instance ToRecord Person persons :: [Person] persons = [Person "John" 50000, Person "Jane" 60000] main :: IO () main = do BL.writeFile "salaries.csv" $ encode persons csvData <- BL.readFile "salaries.csv" case decode NoHeader csvData of Left err -> putStrLn err Right v -> V.forM_ v $ \ (Person name salary) -> putStrLn $ name ++ " earns " ++ show salary ++ " dollars" cassava-0.4.4.0/examples/NamedBasedDecode.hs0000644000000000000000000000111112570632764016733 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} import Control.Applicative import qualified Data.ByteString.Lazy as BL import Data.Csv import qualified Data.Vector as V data Person = Person { name :: String , salary :: Int } instance FromNamedRecord Person where parseNamedRecord r = Person <$> r .: "name" <*> r .: "salary" main :: IO () main = do csvData <- BL.readFile "salaries.csv" case decodeByName csvData of Left err -> putStrLn err Right (_, v) -> V.forM_ v $ \ p -> putStrLn $ name p ++ " earns " ++ show (salary p) ++ " dollars" cassava-0.4.4.0/examples/NamedBasedGeneric.hs0000644000000000000000000000140412570632764017131 0ustar0000000000000000{-# LANGUAGE OverloadedStrings, DeriveGeneric #-} import qualified Data.ByteString.Lazy as BL import Data.Csv import Data.Text (Text) import qualified Data.Vector as V import GHC.Generics data Person = Person { name :: !String , salary :: !Int } deriving Generic instance FromNamedRecord Person instance ToNamedRecord Person instance DefaultOrdered Person persons :: [Person] persons = [Person "John" 50000, Person "Jane" 60000] main :: IO () main = do BL.writeFile "salaries.csv" $ encodeDefaultOrderedByName persons csvData <- BL.readFile "salaries.csv" case decodeByName csvData of Left err -> putStrLn err Right (_, v) -> V.forM_ v $ \ p -> putStrLn $ name p ++ " earns " ++ show (salary p) ++ " dollars" cassava-0.4.4.0/examples/StreamingIndexBasedDecode.hs0000644000000000000000000000063412570632764020641 0ustar0000000000000000{-# LANGUAGE ScopedTypeVariables #-} import qualified Data.ByteString.Lazy as BL import Data.Csv.Streaming import Data.Foldable (for_) main :: IO () main = do csvData <- BL.readFile "salaries.csv" -- N.B. The Foldable instance skips records that failed to -- convert. for_ (decode NoHeader csvData) $ \ (name, salary :: Int) -> putStrLn $ name ++ " earns " ++ show salary ++ " dollars" cassava-0.4.4.0/tests/0000755000000000000000000000000012570632764012642 5ustar0000000000000000cassava-0.4.4.0/tests/UnitTests.hs0000644000000000000000000003744412570632764015154 0ustar0000000000000000{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Main ( main ) where import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.HashMap.Strict as HM import Data.Int import qualified Data.Text as T import qualified Data.Text.Lazy as LT import qualified Data.Vector as V import Data.Word import Test.HUnit import Test.Framework as TF import Test.Framework.Providers.HUnit as TF import Test.QuickCheck import Test.Framework.Providers.QuickCheck2 as TF import Data.Csv hiding (record) import qualified Data.Csv.Streaming as S ------------------------------------------------------------------------ -- Parse tests decodesAs :: BL.ByteString -> [[B.ByteString]] -> Assertion decodesAs input expected = assertResult input expected $ decode NoHeader input decodesWithAs :: DecodeOptions -> BL.ByteString -> [[B.ByteString]] -> Assertion decodesWithAs opts input expected = assertResult input expected $ decodeWith opts NoHeader input assertResult :: BL.ByteString -> [[B.ByteString]] -> Either String (V.Vector (V.Vector B.ByteString)) -> Assertion assertResult input expected res = case res of Right r -> V.fromList (map V.fromList expected) @=? r Left err -> assertFailure $ " input: " ++ show (BL8.unpack input) ++ "\n" ++ "parse error: " ++ err encodesAs :: [[B.ByteString]] -> BL.ByteString -> Assertion encodesAs input expected = encode (map V.fromList input) @?= expected encodesWithAs :: EncodeOptions -> [[B.ByteString]] -> BL.ByteString -> Assertion encodesWithAs opts input expected = encodeWith opts (map V.fromList input) @?= expected namedEncodesAs :: [B.ByteString] -> [[(B.ByteString, B.ByteString)]] -> BL.ByteString -> Assertion namedEncodesAs hdr input expected = encodeByName (V.fromList hdr) (map HM.fromList input) @?= expected namedEncodesWithAs :: EncodeOptions -> [B.ByteString] -> [[(B.ByteString, B.ByteString)]] -> BL.ByteString -> Assertion namedEncodesWithAs opts hdr input expected = encodeByNameWith opts (V.fromList hdr) (map HM.fromList input) @?= expected namedDecodesAs :: BL.ByteString -> [B.ByteString] -> [[(B.ByteString, B.ByteString)]] -> Assertion namedDecodesAs input ehdr expected = case decodeByName input of Right r -> (V.fromList ehdr, expected') @=? r Left err -> assertFailure $ " input: " ++ show (BL8.unpack input) ++ "\n" ++ "parse error: " ++ err where expected' = V.fromList $ map HM.fromList expected recordsToList :: S.Records a -> Either String [a] recordsToList (S.Nil (Just err) _) = Left err recordsToList (S.Nil Nothing _) = Right [] recordsToList (S.Cons (Left err) _) = Left err recordsToList (S.Cons (Right x) rs) = case recordsToList rs of l@(Left _) -> l (Right xs) -> Right (x : xs) decodesStreamingAs :: BL.ByteString -> [[B.ByteString]] -> Assertion decodesStreamingAs input expected = assertResult input expected $ fmap (V.fromList . map V.fromList) $ recordsToList $ S.decode NoHeader input decodesWithStreamingAs :: DecodeOptions -> BL.ByteString -> [[B.ByteString]] -> Assertion decodesWithStreamingAs opts input expected = assertResult input expected $ fmap (V.fromList . map V.fromList) $ recordsToList $ S.decodeWith opts NoHeader input namedDecodesStreamingAs :: BL.ByteString -> [B.ByteString] -> [[(B.ByteString, B.ByteString)]] -> Assertion namedDecodesStreamingAs input ehdr expected = case S.decodeByName input of Right (hdr, rs) -> case recordsToList rs of Right xs -> (V.fromList ehdr, expected') @=? (hdr, xs) Left err -> assertFailure $ " input: " ++ show (BL8.unpack input) ++ "\n" ++ "conversion error: " ++ err Left err -> assertFailure $ " input: " ++ show (BL8.unpack input) ++ "\n" ++ "parse error: " ++ err where expected' = map HM.fromList expected positionalTests :: [TF.Test] positionalTests = [ testGroup "encode" $ map encodeTest [ ("simple", [["abc"]], "abc\r\n") , ("quoted", [["\"abc\""]], "\"\"\"abc\"\"\"\r\n") , ("quote", [["a\"b"]], "\"a\"\"b\"\r\n") , ("quotedQuote", [["\"a\"b\""]], "\"\"\"a\"\"b\"\"\"\r\n") , ("leadingSpace", [[" abc"]], "\" abc\"\r\n") , ("comma", [["abc,def"]], "\"abc,def\"\r\n") , ("twoFields", [["abc","def"]], "abc,def\r\n") , ("twoRecords", [["abc"], ["def"]], "abc\r\ndef\r\n") , ("newline", [["abc\ndef"]], "\"abc\ndef\"\r\n") ] , testGroup "encode" $ map encodeTestUnqtd [ ("simple", [["abc"]], "abc\r\n") , ("quoted", [["\"abc\""]], "\"abc\"\r\n") , ("quote", [["a\"b"]], "a\"b\r\n") , ("quotedQuote", [["\"a\"b\""]], "\"a\"b\"\r\n") , ("leadingSpace", [[" abc"]], " abc\r\n") , ("comma", [["abc,def"]], "abc,def\r\n") , ("twoFields", [["abc","def"]], "abc,def\r\n") , ("twoRecords", [["abc"], ["def"]], "abc\r\ndef\r\n") , ("newline", [["abc\ndef"]], "abc\ndef\r\n") ] , testGroup "encode" $ map encodeTestAllqtd [ ("simple", [["abc"]], "\"abc\"\r\n") , ("quoted", [["\"abc\""]], "\"\"\"abc\"\"\"\r\n") , ("quote", [["a\"b"]], "\"a\"\"b\"\r\n") , ("quotedQuote", [["\"a\"b\""]], "\"\"\"a\"\"b\"\"\"\r\n") , ("leadingSpace", [[" abc"]], "\" abc\"\r\n") , ("comma", [["abc,def"]], "\"abc,def\"\r\n") , ("twoFields", [["abc","def"]], "\"abc\",\"def\"\r\n") , ("twoRecords", [["abc"], ["def"]], "\"abc\"\r\n\"def\"\r\n") , ("newline", [["abc\ndef"]], "\"abc\ndef\"\r\n") ] , testGroup "encodeWith" [ testCase "tab-delim" $ encodesWithAs (defEnc { encDelimiter = 9 }) [["1", "2"]] "1\t2\r\n" , testCase "newline" $ encodesWithAs (defEnc {encUseCrLf = False}) [["1", "2"], ["3", "4"]] "1,2\n3,4\n" ] , testGroup "decode" $ map decodeTest decodeTests , testGroup "decodeWith" $ map decodeWithTest decodeWithTests , testGroup "streaming" [ testGroup "decode" $ map streamingDecodeTest decodeTests , testGroup "decodeWith" $ map streamingDecodeWithTest decodeWithTests ] ] where rfc4180Input = BL8.pack $ "#field1,field2,field3\n" ++ "\"aaa\",\"bb\n" ++ "b\",\"ccc\"\n" ++ "\"a,a\",\"b\"\"bb\",\"ccc\"\n" ++ "zzz,yyy,xxx\n" rfc4180Output = [["#field1", "field2", "field3"], ["aaa", "bb\nb", "ccc"], ["a,a", "b\"bb", "ccc"], ["zzz", "yyy", "xxx"]] decodeTests = [ ("simple", "a,b,c\n", [["a", "b", "c"]]) , ("crlf", "a,b\r\nc,d\r\n", [["a", "b"], ["c", "d"]]) , ("noEol", "a,b,c", [["a", "b", "c"]]) , ("blankLine", "a,b,c\n\nd,e,f\n\n", [["a", "b", "c"], ["d", "e", "f"]]) , ("leadingSpace", " a, b, c\n", [[" a", " b", " c"]]) , ("rfc4180", rfc4180Input, rfc4180Output) ] decodeWithTests = [ ("tab-delim", defDec { decDelimiter = 9 }, "1\t2", [["1", "2"]]) ] encodeTest (name, input, expected) = testCase name $ input `encodesAs` expected encodeTestUnqtd (name, input, expected) = testCase name $ encodesWithAs defEncNoneEnq input expected encodeTestAllqtd (name, input, expected) = testCase name $ encodesWithAs defEncAllEnq input expected decodeTest (name, input, expected) = testCase name $ input `decodesAs` expected decodeWithTest (name, opts, input, expected) = testCase name $ decodesWithAs opts input expected streamingDecodeTest (name, input, expected) = testCase name $ input `decodesStreamingAs` expected streamingDecodeWithTest (name, opts, input, expected) = testCase name $ decodesWithStreamingAs opts input expected defEnc = defaultEncodeOptions defEncNoneEnq = defaultEncodeOptions { encQuoting = QuoteNone } defEncAllEnq = defaultEncodeOptions { encQuoting = QuoteAll } defDec = defaultDecodeOptions nameBasedTests :: [TF.Test] nameBasedTests = [ testGroup "encode" $ map encodeTest [ ("simple", ["field"], [[("field", "abc")]], "field\r\nabc\r\n") , ("twoFields", ["field1", "field2"], [[("field1", "abc"), ("field2", "def")]], "field1,field2\r\nabc,def\r\n") , ("twoRecords", ["field"], [[("field", "abc")], [("field", "def")]], "field\r\nabc\r\ndef\r\n") ] , testGroup "encodeWith" $ map encodeWithTest [ ("no header", defEnc {encIncludeHeader = False}, ["field"], [[("field", "abc")]], "abc\r\n") ] , testGroup "decode" $ map decodeTest decodeTests , testGroup "streaming" [ testGroup "decode" $ map streamingDecodeTest decodeTests ] ] where decodeTests = [ ("simple", "field\r\nabc\r\n", ["field"], [[("field", "abc")]]) , ("twoFields", "field1,field2\r\nabc,def\r\n", ["field1", "field2"], [[("field1", "abc"), ("field2", "def")]]) , ("twoRecords", "field\r\nabc\r\ndef\r\n", ["field"], [[("field", "abc")], [("field", "def")]]) , ("cr header", "field\rabc", ["field"], [[("field", "abc")]]) , ("cr trailing", "field\rabc\r", ["field"], [[("field", "abc")]]) , ("cr separator", "field\rabc\rdef", ["field"], [[("field", "abc")],[("field","def")]]) ] encodeTest (name, hdr, input, expected) = testCase name $ namedEncodesAs hdr input expected encodeWithTest (name, opts, hdr, input, expected) = testCase name $ namedEncodesWithAs opts hdr input expected decodeTest (name, input, hdr, expected) = testCase name $ namedDecodesAs input hdr expected streamingDecodeTest (name, input, hdr, expected) = testCase name $ namedDecodesStreamingAs input hdr expected defEnc = defaultEncodeOptions ------------------------------------------------------------------------ -- Conversion tests instance Arbitrary B.ByteString where arbitrary = B.pack `fmap` arbitrary instance Arbitrary BL.ByteString where arbitrary = BL.fromChunks `fmap` arbitrary instance Arbitrary T.Text where arbitrary = T.pack `fmap` arbitrary instance Arbitrary LT.Text where arbitrary = LT.fromChunks `fmap` arbitrary -- A single column with an empty string is indistinguishable from an -- empty line (which we will ignore.) We therefore encode at least two -- columns. roundTrip :: (Eq a, FromField a, ToField a) => a -> Bool roundTrip x = Right (V.fromList record) == decode NoHeader (encode record) where record = [(x, dummy)] dummy = 'a' roundTripUnicode :: T.Text -> Assertion roundTripUnicode x = Right (V.fromList record) @=? decode NoHeader (encode record) where record = [(x, dummy)] dummy = 'a' boundary :: forall a. (Bounded a, Eq a, FromField a, ToField a) => a -> Bool boundary _dummy = roundTrip (minBound :: a) && roundTrip (maxBound :: a) partialDecode :: Parser a -> Assertion partialDecode p = case runParser p of Left _ -> return () Right _ -> assertFailure "expected partial field decode" expect :: (Eq a, Show a) => Parser a -> a -> Assertion expect p a0 = case runParser p of Right a -> a @=? a0 Left e -> assertFailure e conversionTests :: [TF.Test] conversionTests = [ testGroup "roundTrip" [ testProperty "Char" (roundTrip :: Char -> Bool) , testProperty "ByteString" (roundTrip :: B.ByteString -> Bool) , testProperty "Int" (roundTrip :: Int -> Bool) , testProperty "Integer" (roundTrip :: Integer -> Bool) , testProperty "Int8" (roundTrip :: Int8 -> Bool) , testProperty "Int16" (roundTrip :: Int16 -> Bool) , testProperty "Int32" (roundTrip :: Int32 -> Bool) , testProperty "Int64" (roundTrip :: Int64 -> Bool) , testProperty "Word" (roundTrip :: Word -> Bool) , testProperty "Word8" (roundTrip :: Word8 -> Bool) , testProperty "Word16" (roundTrip :: Word16 -> Bool) , testProperty "Word32" (roundTrip :: Word32 -> Bool) , testProperty "Word64" (roundTrip :: Word64 -> Bool) , testProperty "lazy ByteString" (roundTrip :: BL.ByteString -> Bool) , testProperty "Text" (roundTrip :: T.Text -> Bool) , testProperty "lazy Text" (roundTrip :: LT.Text -> Bool) ] , testGroup "boundary" [ testProperty "Int" (boundary (undefined :: Int)) , testProperty "Int8" (boundary (undefined :: Int8)) , testProperty "Int16" (boundary (undefined :: Int16)) , testProperty "Int32" (boundary (undefined :: Int32)) , testProperty "Int64" (boundary (undefined :: Int64)) , testProperty "Word" (boundary (undefined :: Word)) , testProperty "Word8" (boundary (undefined :: Word8)) , testProperty "Word16" (boundary (undefined :: Word16)) , testProperty "Word32" (boundary (undefined :: Word32)) , testProperty "Word64" (boundary (undefined :: Word64)) ] , testGroup "Unicode" [ testCase "Chinese" (roundTripUnicode "我能吞下玻璃而不伤身体。") , testCase "Icelandic" (roundTripUnicode "Sævör grét áðan því úlpan var ónýt.") , testCase "Turkish" (roundTripUnicode "Cam yiyebilirim, bana zararı dokunmaz.") ] , testGroup "Partial Decodes" [ testCase "Int" (partialDecode (parseField "12.7" :: Parser Int)) , testCase "Word" (partialDecode (parseField "12.7" :: Parser Word)) , testCase "Double" (partialDecode (parseField "1.0+" :: Parser Double)) , testCase "Integer" (partialDecode (parseField "1e6" :: Parser Integer)) ] , testGroup "Space trimming" [ testCase "_Int" (expect (parseField " 12" :: Parser Int) 12) , testCase "Int_" (expect (parseField "12 " :: Parser Int) 12) , testCase "_Int_" (expect (parseField " 12 " :: Parser Int) 12) , testCase "_Word" (expect (parseField " 12" :: Parser Word) 12) , testCase "Word_" (expect (parseField "12 " :: Parser Word) 12) , testCase "_Word_" (expect (parseField " 12 " :: Parser Word) 12) , testCase "_Double" (expect (parseField " 1.2e1" :: Parser Double) 12) , testCase "Double_" (expect (parseField "1.2e1 " :: Parser Double) 12) , testCase "_Double_" (expect (parseField " 1.2e1 " :: Parser Double) 12) ] ] ------------------------------------------------------------------------ -- Custom options tests customDelim :: Word8 -> B.ByteString -> B.ByteString -> Property customDelim delim f1 f2 = delim `notElem` [cr, nl, dquote] ==> (decodeWith decOpts NoHeader (encodeWith encOpts [V.fromList [f1, f2]]) == Right (V.fromList [V.fromList [f1, f2]])) where encOpts = defaultEncodeOptions { encDelimiter = delim } decOpts = defaultDecodeOptions { decDelimiter = delim } nl = 10 cr = 13 dquote = 34 customOptionsTests :: [TF.Test] customOptionsTests = [ testProperty "customDelim" customDelim ] ------------------------------------------------------------------------ -- Test harness allTests :: [TF.Test] allTests = [ testGroup "positional" positionalTests , testGroup "named" nameBasedTests , testGroup "conversion" conversionTests , testGroup "custom-options" customOptionsTests ] main :: IO () main = defaultMain allTests