s-cargot-0.1.6.0/0000755000000000000000000000000007346545000011564 5ustar0000000000000000s-cargot-0.1.6.0/CHANGELOG.md0000644000000000000000000000335507346545000013403 0ustar0000000000000000v0.1.6.0 ======= Fixes: * Some fixes to avoid trailing whitespace in pretty-printing. (Thank you, @kquick!) v0.1.5.0 ======= Fixes: * Bumped the `text` version to allow GHC 9.4 compatibility. v0.1.4.0 ======= Features: * Added `encodeLazy` and `encodeOneLazy` functions to producing lazy text Fixes: * Added the `Located` type for source location tracking for `atom` values (thanks ckoparkar!) * Added `unconstrainedPrint`, which does not try to restrict a printed s-expression to a fixed width but will attempt to indent it in a reasonable way nonetheless. v0.1.3.0 ======= Features: * Added the `Located` type for source location tracking for `atom` values (thanks ckoparkar!) * Added `unconstrainedPrint`, which does not try to restrict a printed s-expression to a fixed width but will attempt to indent it in a reasonable way nonetheless. Fixes: * Pretty-printing configurations created with `flatPrint` now use a _much_ more efficient pretty-printer. * Internally, pretty-printers use a richer type which improves performance somewhat by cutting down on repeated intermediate printing, and future work will build on this to make printing even more efficient. v0.1.2.0 ======= * Added `atom` and `mkAtomParser` helper functions for new user-defined atom types. * New parsers for various atom types: * Exported parsers for individual Haskell literals, to allow building new variations on the `HaskLike` atom type. * Added syntaxes for arbitrary-base numeric literals in the style of Common Lisp and M4 * Added a suite of basic QuickCheck tests * Compatibility fix: GHC 7.8 didn't allow type signatures on pattern synonyms. v0.1.1.0 ======= * Strongly considered but did not keep a changelog. …sorry. s-cargot-0.1.6.0/Data/0000755000000000000000000000000007346545000012435 5ustar0000000000000000s-cargot-0.1.6.0/Data/SCargot.hs0000644000000000000000000000654207346545000014342 0ustar0000000000000000module Data.SCargot ( -- * SCargot Basics -- $intro -- * Parsing and Printing decode , decodeOne , encode , encodeOne -- * Parser Construction -- ** Specifying a Parser , SExprParser , Reader , Comment , mkParser , setCarrier , addReader , setComment , asRich , asWellFormed , withQuote -- * Printer Construction -- * Specifying a Pretty-Printer , SExprPrinter , Indent(..) , basicPrint , flatPrint , unconstrainedPrint , setFromCarrier , setMaxWidth , removeMaxWidth , setIndentAmount , setIndentStrategy ) where import Data.SCargot.Parse import Data.SCargot.Print {- $intro The S-Cargot library is a library for parsing and emitting , designed to be as flexible as possible. Despite some efforts at , s-expressions are a general approach to describing a data format that can very often differ in subtle, incompatible ways: the s-expressions understood by Common Lisp are different from the s-expressions understood by Scheme, and even the different revisions of the Scheme language understand s-expressions in a slightly different way. To accomodate this, the S-Cargot library provides a toolbox for defining variations on s-expressions, complete with the ability to select various comment syntaxes, reader macros, and atom types. If all you want is to read some s-expressions and don't care about the edge cases of the format, or all you want is a new configuration format, try the "Data.SCargot.Language.Basic" or "Data.SCargot.Language.HaskLike" modules, which define an s-expression language whose atoms are plain strings and Haskell literals, respectively. The S-Cargot library works by specifying values which contain all the information needed to either parse or print an s-expression. The actual s-expression structure is parsed as a structure of as represented by the 'SExpr' type, but can alternately be exposed as the isomorphic 'RichSExpr' type or the less expressive but easier-to-work-with 'WellFormedSExpr' type. Modules devoted to each representation type (in "Data.SCargot.Repr.Basic", "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed") provide helper functions, lenses, and pattern synonyms to make creating and processing these values easier. The details of how to parse a given structure are represented by building up a 'SExprParser' value, which is defined in "Data.SCargot.Parse" and re-exported here. A minimal 'SExprParser' defines only how to parse the atoms of the language; helper functions can define comment syntaxes, reader macros, and transformations over the parsed structure. The details of how to print a given structure are represented by building up a 'SExprPrinter' value, which is defined in "Data.SCargot.Print" and re-exported here. A minimal 'SExprPrinter' defines only how to print the atoms of the language; helper functions help with the layout of the pretty-printed s-expression in terms of how to indent the surrounding expression. Other helper modules define useful primitives for building up s-expression languages: the "Data.SCargot.Common" module provides parsers for common literals, while the "Data.SCargot.Comments" module provides parsers for comment syntaxes borrowed from various other languages. -} s-cargot-0.1.6.0/Data/SCargot/0000755000000000000000000000000007346545000013777 5ustar0000000000000000s-cargot-0.1.6.0/Data/SCargot/Atom.hs0000644000000000000000000000216307346545000015235 0ustar0000000000000000module Data.SCargot.Atom ( -- $intro atom , mkAtomParser ) where import Data.SCargot.Parse (SExprParser, mkParser) import Data.SCargot.Repr (SExpr) import Text.Parsec (choice) import Text.Parsec.Text (Parser) -- | A convenience function for defining an atom parser from a wrapper -- function and a parser. This is identical to 'fmap' specialized to -- operate over 'Parser' values, and is provided as sugar. atom :: (t -> atom) -> Parser t -> Parser atom atom = fmap -- | A convenience function for defining a 'SExprSpec' from a list of -- possible atom parsers, which will be tried in sequence before failing. mkAtomParser :: [Parser atom] -> SExprParser atom (SExpr atom) mkAtomParser = mkParser . choice {- $intro This module defines small convenience functions for building an atom type from several individual parsers. This is easy to do without these functions, but these functions communicate intent more directly: > data Atom > = Ident Text > | Num Integer > > myParser :: SExprParser Atom (SExpr Atom) > myParser = mkAtomParser > [ atom Ident parseR7RSIdent > , atom Num signedDecNumber > ] -} s-cargot-0.1.6.0/Data/SCargot/Comments.hs0000644000000000000000000001343707346545000016130 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Data.SCargot.Comments ( -- $intro -- * Lisp-Style Syntax -- $lisp withLispComments -- * Other Existing Comment Syntaxes -- ** Scripting Language Syntax -- $script , withOctothorpeComments -- ** Prolog- or Matlab-Style Syntax , withPercentComments , withPercentBlockComments -- ** C-Style Syntax -- $clike , withCLikeLineComments , withCLikeBlockComments , withCLikeComments -- ** Haskell-Style Syntax -- $haskell , withHaskellLineComments , withHaskellBlockComments , withHaskellComments -- * Comment Syntax Helper Functions , lineComment , simpleBlockComment ) where import Text.Parsec ( (<|>) , anyChar , manyTill , noneOf , skipMany , string ) import Data.SCargot.Parse ( Comment , SExprParser , setComment ) -- | Given a string, produce a comment parser that matches that -- initial string and ignores everything until the end of the -- line. lineComment :: String -> Comment lineComment s = string s >> skipMany (noneOf "\n") >> return () -- | Given two strings, a begin and an end delimiter, produce a -- parser that matches the beginning delimiter and then ignores -- everything until it finds the end delimiter. This does not -- consider nesting, so, for example, a comment created with -- -- > curlyComment :: Comment -- > curlyComment = simpleBlockComment "{" "}" -- -- will consider -- -- > { this { comment } -- -- to be a complete comment, despite the apparent improper nesting. -- This is analogous to standard C-style comments in which -- -- > /* this /* comment */ -- -- is a complete comment. simpleBlockComment :: String -> String -> Comment simpleBlockComment begin end = string begin >> manyTill anyChar (string end) >> return () -- | Lisp-style line-oriented comments start with @;@ and last -- until the end of the line. This is usually the comment -- syntax you want. withLispComments :: SExprParser t a -> SExprParser t a withLispComments = setComment (lineComment ";") -- | C++-like line-oriented comment start with @//@ and last -- until the end of the line. withCLikeLineComments :: SExprParser t a -> SExprParser t a withCLikeLineComments = setComment (lineComment "//") -- | C-like block comments start with @/*@ and end with @*/@. -- They do not nest. withCLikeBlockComments :: SExprParser t a -> SExprParser t a withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/") -- | C-like comments include both line- and block-comments, the -- former starting with @//@ and the latter contained within -- @//* ... *//@. withCLikeComments :: SExprParser t a -> SExprParser t a withCLikeComments = setComment (lineComment "//" <|> simpleBlockComment "/*" "*/") -- | Haskell line-oriented comments start with @--@ and last -- until the end of the line. withHaskellLineComments :: SExprParser t a -> SExprParser t a withHaskellLineComments = setComment (lineComment "--") -- | Haskell block comments start with @{-@ and end with @-}@. -- They do not nest. withHaskellBlockComments :: SExprParser t a -> SExprParser t a withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}") -- | Haskell comments include both the line-oriented @--@ comments -- and the block-oriented @{- ... -}@ comments withHaskellComments :: SExprParser t a -> SExprParser t a withHaskellComments = setComment (lineComment "--" <|> simpleBlockComment "{-" "-}") -- | Many scripting and shell languages use these, which begin with -- @#@ and last until the end of the line. withOctothorpeComments :: SExprParser t a -> SExprParser t a withOctothorpeComments = setComment (lineComment "#") -- | MATLAB, Prolog, PostScript, and others use comments which begin -- with @%@ and last until the end of the line. withPercentComments :: SExprParser t a -> SExprParser t a withPercentComments = setComment (lineComment "%") -- | MATLAB block comments are started with @%{@ and end with @%}@. withPercentBlockComments :: SExprParser t a -> SExprParser t a withPercentBlockComments = setComment (simpleBlockComment "%{" "%}") {- $intro By default a 'SExprParser' will not understand any kind of comment syntax. Most varieties of s-expression will, however, want some kind of commenting capability, so the below functions will produce a new 'SExprParser' which understands various kinds of comment syntaxes. For example: > mySpec :: SExprParser Text (SExpr Text) > mySpec = asWellFormed $ mkParser (pack <$> many1 alphaNum) > > myLispySpec :: SExprParser Text (SExpr Text) > myLispySpec = withLispComments mySpec > > myCLikeSpec :: SExprParser Text (SExpr Text) > myCLikeSpec = withCLikeComment mySpec We can then use these to parse s-expressions with different kinds of comment syntaxes: >>> decode mySpec "(foo ; a lisp comment\n bar)\n" Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom" >>> decode myLispySpec "(foo ; a lisp comment\n bar)\n" Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] >>> decode mySpec "(foo /* a c-like\n comment */ bar)\n" Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom" >>> decode myCLikeSpec "(foo /* a c-like\n comment */ bar)\n" Right [WFSList [WFSAtom "foo", WFSAtom "bar"]] -} {- $lisp > (one ; a comment > two ; another one > three) -} {- $script > (one # a comment > two # another one > three) -} {- $clike > (one // a comment > two /* another > one */ > three) -} -- $haskell -- > (one -- a comment -- > two {- another -- > one -} -- > three) s-cargot-0.1.6.0/Data/SCargot/Common.hs0000644000000000000000000003516107346545000015571 0ustar0000000000000000module Data.SCargot.Common ( -- $intro -- * Identifier Syntaxes parseR5RSIdent , parseR6RSIdent , parseR7RSIdent , parseXIDIdentStrict , parseXIDIdentGeneral , parseHaskellIdent , parseHaskellVariable , parseHaskellConstructor -- * Numeric Literal Parsers , signed , prefixedNumber , signedPrefixedNumber , binNumber , signedBinNumber , octNumber , signedOctNumber , decNumber , signedDecNumber , dozNumber , signedDozNumber , hexNumber , signedHexNumber -- ** Numeric Literals for Arbitrary Bases , commonLispNumberAnyBase , gnuM4NumberAnyBase -- ** Source locations , Location(..), Located(..), located, dLocation ) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative hiding ((<|>), many) #endif import Control.Monad (guard) import Data.Char import Data.Text (Text) import qualified Data.Text as T import Text.Parsec import Text.Parsec.Pos (newPos) import Text.Parsec.Text (Parser) -- | Parse an identifier according to the R5RS Scheme standard. This -- will not normalize case, even though the R5RS standard specifies -- that all identifiers be normalized to lower case first. -- -- An R5RS identifier is, broadly speaking, alphabetic or numeric -- and may include various symbols, but no escapes. parseR5RSIdent :: Parser Text parseR5RSIdent = T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar) where initial = letter <|> oneOf "!$%&*/:<=>?^_~" subsequent = initial <|> digit <|> oneOf "+-.@" peculiar = string "+" <|> string "-" <|> string "..." hasCategory :: Char -> [GeneralCategory] -> Bool hasCategory c cs = generalCategory c `elem` cs -- | Parse an identifier according to the R6RS Scheme standard. An -- R6RS identifier may include inline hexadecimal escape sequences -- so that, for example, @foo@ is equivalent to @f\\x6f;o@, and is -- more liberal than R5RS as to which Unicode characters it may -- accept. parseR6RSIdent :: Parser Text parseR6RSIdent = T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar) where initial = constituent <|> oneOf "!$%&*/:<=>?^_~" <|> inlineHex constituent = letter <|> uniClass (\ c -> isLetter c || isSymbol c || hasCategory c [ NonSpacingMark , LetterNumber , OtherNumber , DashPunctuation , ConnectorPunctuation , OtherPunctuation , PrivateUse ]) inlineHex = (chr . fromIntegral) <$> (string "\\x" *> hexNumber <* char ';') subsequent = initial <|> digit <|> oneOf "+-.@" <|> uniClass (\ c -> hasCategory c [ DecimalNumber , SpacingCombiningMark , EnclosingMark ]) peculiar = string "+" <|> string "-" <|> string "..." <|> ((++) <$> string "->" <*> many subsequent) uniClass :: (Char -> Bool) -> Parser Char uniClass sp = satisfy (\ c -> c > '\x7f' && sp c) -- | Parse an identifier according to the R7RS Scheme standard. An -- R7RS identifier, in addition to a typical identifier format, -- can also be a chunk of text surrounded by vertical bars that -- can contain spaces and other characters. Unlike R6RS, it does -- not allow escapes to be included in identifiers unless those -- identifiers are surrounded by vertical bars. parseR7RSIdent :: Parser Text parseR7RSIdent = T.pack <$> ( (:) <$> initial <*> many subsequent <|> char '|' *> many1 symbolElement <* char '|' <|> peculiar ) where initial = letter <|> specInit specInit = oneOf "!$%&*/:<=>?^_~" subsequent = initial <|> digit <|> specSubsequent specSubsequent = expSign <|> oneOf ".@" expSign = oneOf "+-" symbolElement = noneOf "\\|" <|> hexEscape <|> mnemEscape <|> ('|' <$ string "\\|") hexEscape = chr . fromIntegral <$> (string "\\x" *> hexNumber <* char ';') mnemEscape = '\a' <$ string "\\a" <|> '\b' <$ string "\\b" <|> '\t' <$ string "\\t" <|> '\n' <$ string "\\n" <|> '\r' <$ string "\\r" peculiar = (:[]) <$> expSign <|> cons2 <$> expSign <*> signSub <*> many subsequent <|> cons3 <$> expSign <*> char '.' <*> dotSub <*> many subsequent <|> cons2 <$> char '.' <*> dotSub <*> many subsequent dotSub = signSub <|> char '.' signSub = initial <|> expSign <|> char '@' cons2 a b cs = a : b : cs cons3 a b c ds = a : b : c : ds -- | Parse a Haskell variable identifier: a sequence of alphanumeric -- characters, underscores, or single quote that begins with a -- lower-case letter. parseHaskellVariable :: Parser Text parseHaskellVariable = T.pack <$> ((:) <$> small <*> many (small <|> large <|> digit' <|> char '\'' <|> char '_')) where small = satisfy isLower large = satisfy isUpper digit' = satisfy isDigit -- | Parse a Haskell constructor: a sequence of alphanumeric -- characters, underscores, or single quote that begins with an -- upper-case letter. parseHaskellConstructor :: Parser Text parseHaskellConstructor = T.pack <$> ((:) <$> large <*> many (small <|> large <|> digit' <|> char '\'' <|> char '_')) where small = satisfy isLower large = satisfy isUpper digit' = satisfy isDigit -- | Parse a Haskell identifer: a sequence of alphanumeric -- characters, underscores, or a single quote. This matches both -- variable and constructor names. parseHaskellIdent :: Parser Text parseHaskellIdent = T.pack <$> ((:) <$> (large <|> small) <*> many (small <|> large <|> digit' <|> char '\'' <|> char '_')) where small = satisfy isLower large = satisfy isUpper digit' = satisfy isDigit -- Ensure that a given character has the given Unicode category hasCat :: [GeneralCategory] -> Parser Char hasCat cats = satisfy (flip hasCategory cats) xidStart :: [GeneralCategory] xidStart = [ UppercaseLetter , LowercaseLetter , TitlecaseLetter , ModifierLetter , OtherLetter , LetterNumber ] xidContinue :: [GeneralCategory] xidContinue = xidStart ++ [ NonSpacingMark , SpacingCombiningMark , DecimalNumber , ConnectorPunctuation ] -- | Parse an identifier of unicode characters of the form -- @ *@, which corresponds strongly -- to the identifiers found in most C-like languages. Note that -- the @XID_Start@ category does not include the underscore, -- so @__foo@ is not a valid XID identifier. To parse -- identifiers that may include leading underscores, use -- 'parseXIDIdentGeneral'. parseXIDIdentStrict :: Parser Text parseXIDIdentStrict = T.pack <$> ((:) <$> hasCat xidStart <*> many (hasCat xidContinue)) -- | Parse an identifier of unicode characters of the form -- @( | '_') *@, which corresponds -- strongly to the identifiers found in most C-like languages. -- Unlike 'parseXIDIdentStrict', this will also accept an -- underscore as leading character, which corresponds more -- closely to programming languages like C and Java, but -- deviates somewhat from the -- . parseXIDIdentGeneral :: Parser Text parseXIDIdentGeneral = T.pack <$> ((:) <$> (hasCat xidStart <|> char '_') <*> many (hasCat xidContinue)) -- | A helper function for defining parsers for arbitrary-base integers. -- The first argument will be the base, and the second will be the -- parser for the individual digits. number :: Integer -> Parser Char -> Parser Integer number base digits = foldl go 0 <$> many1 digits where go x d = base * x + toInteger (value d) value c | c >= 'a' && c <= 'z' = 0xa + (fromEnum c - fromEnum 'a') | c >= 'A' && c <= 'Z' = 0xa + (fromEnum c - fromEnum 'A') | c >= '0' && c <= '9' = fromEnum c - fromEnum '0' | c == '\x218a' = 0xa | c == '\x218b' = 0xb | otherwise = error ("Unknown letter in number: " ++ show c) digitsFor :: Int -> [Char] digitsFor n | n <= 10 = take n ['0'..'9'] | n <= 36 = take (n-10) ['A'..'Z'] ++ take (n-10) ['a'..'z'] ++ ['0'..'9'] | otherwise = error ("Invalid base for parser: " ++ show n) anyBase :: Integer -> Parser Integer anyBase n = number n (oneOf (digitsFor (fromIntegral n))) -- | A parser for Common Lisp's arbitrary-base number syntax, of -- the form @#[base]r[number]@, where the base is given in -- decimal. Note that this syntax begins with a @#@, which -- means it might conflict with defined reader macros. commonLispNumberAnyBase :: Parser Integer commonLispNumberAnyBase = do _ <- char '#' n <- decNumber guard (n >= 2 && n <= 36) _ <- char 'r' signed (anyBase n) -- | A parser for GNU m4's arbitrary-base number syntax, of -- the form @0r[base]:[number]@, where the base is given in -- decimal. gnuM4NumberAnyBase :: Parser Integer gnuM4NumberAnyBase = do _ <- string "0r" n <- decNumber guard (n >= 2 && n <= 36) _ <- char ':' signed (anyBase n) sign :: Num a => Parser (a -> a) sign = (pure id <* char '+') <|> (pure negate <* char '-') <|> pure id -- | Given a parser for some kind of numeric literal, this will attempt to -- parse a leading @+@ or a leading @-@ followed by the numeric literal, -- and if a @-@ is found, negate that literal. signed :: Num a => Parser a -> Parser a signed p = ($) <$> sign <*> p -- | Parses a number in the same way as 'prefixedNumber', with an optional -- leading @+@ or @-@. signedPrefixedNumber :: Parser Integer signedPrefixedNumber = signed prefixedNumber -- | Parses a number, determining which numeric base to use by examining -- the literal's prefix: @0x@ for a hexadecimal number, @0z@ for a -- dozenal number, @0o@ for an octal number, and @0b@ for a binary -- number (as well as the upper-case versions of the same.) If the -- base is omitted entirely, then it is treated as a decimal number. prefixedNumber :: Parser Integer prefixedNumber = (string "0x" <|> string "0X") *> hexNumber <|> (string "0o" <|> string "0O") *> octNumber <|> (string "0z" <|> string "0Z") *> dozNumber <|> (string "0b" <|> string "0B") *> binNumber <|> decNumber -- | A parser for non-signed binary numbers binNumber :: Parser Integer binNumber = number 2 (char '0' <|> char '1') -- | A parser for signed binary numbers, with an optional leading @+@ or @-@. signedBinNumber :: Parser Integer signedBinNumber = signed binNumber -- | A parser for non-signed octal numbers octNumber :: Parser Integer octNumber = number 8 (oneOf "01234567") -- | A parser for signed octal numbers, with an optional leading @+@ or @-@. signedOctNumber :: Parser Integer signedOctNumber = ($) <$> sign <*> octNumber -- | A parser for non-signed decimal numbers decNumber :: Parser Integer decNumber = number 10 digit -- | A parser for signed decimal numbers, with an optional leading @+@ or @-@. signedDecNumber :: Parser Integer signedDecNumber = ($) <$> sign <*> decNumber dozDigit :: Parser Char dozDigit = digit <|> oneOf "AaBb\x218a\x218b" -- | A parser for non-signed duodecimal (dozenal) numbers. This understands both -- the ASCII characters @'a'@ and @'b'@ and the Unicode characters @'\x218a'@ (↊) -- and @'\x218b'@ (↋) as digits with the decimal values @10@ and @11@ -- respectively. dozNumber :: Parser Integer dozNumber = number 12 dozDigit -- | A parser for signed duodecimal (dozenal) numbers, with an optional leading @+@ or @-@. signedDozNumber :: Parser Integer signedDozNumber = ($) <$> sign <*> dozNumber -- | A parser for non-signed hexadecimal numbers hexNumber :: Parser Integer hexNumber = number 16 hexDigit -- | A parser for signed hexadecimal numbers, with an optional leading @+@ or @-@. signedHexNumber :: Parser Integer signedHexNumber = ($) <$> sign <*> hexNumber -- | data Location = Span !SourcePos !SourcePos deriving (Eq, Ord, Show) -- | Add support for source locations while parsing S-expressions, as described in this -- -- thread. data Located a = At !Location a deriving (Eq, Ord, Show) -- | Adds a source span to a parser. located :: Parser a -> Parser (Located a) located parser = do begin <- getPosition result <- parser end <- getPosition return $ At (Span begin end) result -- | A default location value dLocation :: Location dLocation = Span dPos dPos where dPos = newPos "" 0 0 {- $intro This module contains a selection of parsers for different kinds of identifiers and literals, from which more elaborate parsers can be assembled. These can afford the user a quick way of building parsers for different atom types. -} s-cargot-0.1.6.0/Data/SCargot/Language/0000755000000000000000000000000007346545000015522 5ustar0000000000000000s-cargot-0.1.6.0/Data/SCargot/Language/Basic.hs0000644000000000000000000000657107346545000017110 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Data.SCargot.Language.Basic ( -- * Spec -- $descr basicParser , basicPrinter , locatedBasicParser , locatedBasicPrinter ) where import Control.Applicative ((<$>)) import Data.Char (isAlphaNum) import Text.Parsec (many1, satisfy) import Data.Text (Text, pack) import Data.Functor.Identity (Identity) import Text.Parsec.Prim (ParsecT) import Data.SCargot.Common (Located(..), located) import Data.SCargot.Repr.Basic (SExpr) import Data.SCargot ( SExprParser , SExprPrinter , mkParser , flatPrint ) isAtomChar :: Char -> Bool isAtomChar c = isAlphaNum c || c == '-' || c == '*' || c == '/' || c == '+' || c == '<' || c == '>' || c == '=' || c == '!' || c == '?' pToken :: ParsecT Text a Identity Text pToken = pack <$> many1 (satisfy isAtomChar) -- $descr -- The 'basicSpec' describes S-expressions whose atoms are simply -- text strings that contain alphanumeric characters and a small -- set of punctuation. It does no parsing of numbers or other data -- types, and will accept tokens that typical Lisp implementations -- would find nonsensical (like @77foo@). -- -- Atoms recognized by the 'basicSpec' are any string matching the -- regular expression @[A-Za-z0-9+*<>/=!?-]+@. -- | A 'SExprParser' that understands atoms to be sequences of -- alphanumeric characters as well as the punctuation -- characters @[-*/+<>=!?]@, and does no processing of them. -- -- >>> decode basicParser "(1 elephant)" -- Right [SCons (SAtom "1") (SCons (SAtom "elephant") SNil)] basicParser :: SExprParser Text (SExpr Text) basicParser = mkParser pToken -- | A 'basicParser' which produces 'Located' values -- -- >>> decode locatedBasicParser $ pack "(1 elephant)" -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) "1")) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) "elephant")) SNil)] -- -- >>> decode locatedBasicParser $ pack "(let ((x 1))\n x)" -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 5)) "let")) (SCons (SCons (SCons (SAtom (At (Span (line 1, column 8) (line 1, column 9)) "x")) (SCons (SAtom (At (Span (line 1, column 10) (line 1, column 11)) "1")) SNil)) SNil) (SCons (SAtom (At (Span (line 2, column 3) (line 2, column 4)) "x")) SNil))] locatedBasicParser :: SExprParser (Located Text) (SExpr (Located Text)) locatedBasicParser = mkParser $ located pToken -- | A 'SExprPrinter' that prints textual atoms directly (without quoting -- or any other processing) onto a single line. -- -- >>> encode basicPrinter [L [A "1", A "elephant"]] -- "(1 elephant)" basicPrinter :: SExprPrinter Text (SExpr Text) basicPrinter = flatPrint id -- | A 'SExprPrinter' for 'Located' values. Works exactly like 'basicPrinter' -- It ignores the location tags when printing the result. -- -- >>> let (Right dec) = decode locatedBasicParser $ pack "(1 elephant)" -- [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) "1")) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) "elephant")) SNil)] -- -- >>> encode locatedBasicPrinter dec -- "(1 elephant)" locatedBasicPrinter :: SExprPrinter (Located Text) (SExpr (Located Text)) locatedBasicPrinter = flatPrint unLoc where unLoc (At _loc e) = e s-cargot-0.1.6.0/Data/SCargot/Language/HaskLike.hs0000644000000000000000000001723207346545000017556 0ustar0000000000000000{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} module Data.SCargot.Language.HaskLike ( -- $info HaskLikeAtom(..) , haskLikeParser , haskLikePrinter , locatedHaskLikeParser , locatedHaskLikePrinter -- * Individual Parsers , parseHaskellString , parseHaskellFloat , parseHaskellInt ) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((<$>), (<$)) #endif import Data.Maybe (catMaybes) import Data.String (IsString(..)) import Data.Text (Text, pack) import Text.Parsec import Text.Parsec.Text (Parser) import Prelude hiding (concatMap) import Data.SCargot.Common import Data.SCargot.Repr.Basic (SExpr) import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint) {- $info This module is intended for simple, ad-hoc configuration or data formats that might not need their on rich structure but might benefit from a few various kinds of literals. The 'haskLikeParser' understands identifiers as defined by R5RS, as well as string, integer, and floating-point literals as defined by the Haskell 2010 spec. It does __not__ natively understand other data types, such as booleans, vectors, bitstrings. -} -- | An atom type that understands Haskell-like values as well as -- Scheme-like identifiers. data HaskLikeAtom = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme -- standard | HSString Text -- ^ A string, parsed according to the syntax for string -- literals in the Haskell report | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to -- the syntax for integer literals in the Haskell report | HSFloat Double -- ^ A double-precision floating-point value, parsed -- according to the syntax for floats in the Haskell -- report deriving (Eq, Show) instance IsString HaskLikeAtom where fromString = HSIdent . fromString instance IsString (Located HaskLikeAtom) where fromString = (At dLocation) . HSIdent . fromString -- | Parse a Haskell string literal as defined by the Haskell 2010 -- language specification. parseHaskellString :: Parser Text parseHaskellString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)) where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026') esc = do _ <- char '\\' Nothing <$ (gap <|> char '&') <|> Just <$> code gap = many1 space >> char '\\' code = eEsc <|> eNum <|> eCtrl <|> eAscii eCtrl = char '^' >> unCtrl <$> upper eNum = (toEnum . fromInteger) <$> (decNumber <|> (char 'o' >> octNumber) <|> (char 'x' >> hexNumber)) eEsc = choice [ char a >> return b | (a, b) <- escMap ] eAscii = choice [ try (string a >> return b) | (a, b) <- asciiMap ] unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1) escMap :: [(Char, Char)] escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'" asciiMap :: [(String, Char)] asciiMap = zip ["BS","HT","LF","VT","FF","CR","SO","SI","EM" ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX" ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3" ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"] ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++ "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++ "\SYN\ETB\CAN\SUB\ESC\DEL") -- | Parse a Haskell floating-point number as defined by the Haskell -- 2010 language specification. parseHaskellFloat :: Parser Double parseHaskellFloat = do n <- decNumber withDot n <|> noDot n where withDot n = do _ <- char '.' m <- decNumber e <- option 1.0 expn return ((fromIntegral n + asDec m 0) * e) noDot n = do e <- expn return (fromIntegral n * e) expn = do _ <- oneOf "eE" s <- power x <- decNumber return (10 ** s (fromIntegral x)) asDec 0 k = k asDec n k = asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1) power :: Num a => Parser (a -> a) power = negate <$ char '-' <|> id <$ char '+' <|> return id -- | Parse a Haskell integer literal as defined by the Haskell 2010 -- language specification. parseHaskellInt :: Parser Integer parseHaskellInt = do s <- power n <- pZeroNum <|> decNumber return (fromIntegral (s n)) pZeroNum :: Parser Integer pZeroNum = char '0' >> ( (oneOf "xX" >> hexNumber) <|> (oneOf "oO" >> octNumber) <|> decNumber <|> return 0 ) pHaskLikeAtom :: Parser HaskLikeAtom pHaskLikeAtom = HSFloat <$> (try parseHaskellFloat "float") <|> HSInt <$> (try parseHaskellInt "integer") <|> HSString <$> (parseHaskellString "string literal") <|> HSIdent <$> (parseR5RSIdent "token") sHaskLikeAtom :: HaskLikeAtom -> Text sHaskLikeAtom (HSIdent t) = t sHaskLikeAtom (HSString s) = pack (show s) sHaskLikeAtom (HSInt i) = pack (show i) sHaskLikeAtom (HSFloat f) = pack (show f) -- | This `SExprParser` understands s-expressions that contain -- Scheme-like tokens, as well as string literals, integer -- literals, and floating-point literals. Each of these values -- is parsed according to the lexical rules in the Haskell -- report, so the same set of string escapes, numeric bases, -- and floating-point options are available. This spec does -- not parse comments and does not understand any reader -- macros. -- -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")" -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)] haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom) haskLikeParser = mkParser pHaskLikeAtom -- | A 'haskLikeParser' which produces 'Located' values -- -- >>> decode locatedHaskLikeParser $ pack "(0x01 \"\\x65lephant\")" -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 6)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 7) (line 1, column 20)) (HSString "elephant"))) SNil)] -- -- >>> decode locatedHaskLikeParser $ pack "(1 elephant)" -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)] locatedHaskLikeParser :: SExprParser (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom)) locatedHaskLikeParser = mkParser $ located pHaskLikeAtom -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like -- tokens as well as string literals, integer literals, and floating-point -- literals, which will be emitted as the literals produced by Haskell's -- 'show' function. This printer will produce a flat s-expression with -- no indentation of any kind. -- -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]] -- "(1 \"elephant\")" haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom) haskLikePrinter = flatPrint sHaskLikeAtom -- | Ignore location tags when packing values into text sLocatedHasklikeAtom :: Located HaskLikeAtom -> Text sLocatedHasklikeAtom (At _loc e) = sHaskLikeAtom e -- | A 'SExprPrinter' for 'Located' values. Works exactly like 'haskLikePrinter' -- It ignores the location tags when printing the result. -- -- >>> let (Right dec) = decode locatedHaskLikeParser $ pack "(1 elephant)" -- [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)] -- -- >>> encode locatedHaskLikePrinter dec -- "(1 elephant)" locatedHaskLikePrinter :: SExprPrinter (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom)) locatedHaskLikePrinter = flatPrint sLocatedHasklikeAtom s-cargot-0.1.6.0/Data/SCargot/Parse.hs0000644000000000000000000002321307346545000015406 0ustar0000000000000000{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Data.SCargot.Parse ( -- * Parsing decode , decodeOne -- * Parsing Control , SExprParser , Reader , Comment , mkParser , setCarrier , addReader , setComment -- * Specific SExprParser Conversions , asRich , asWellFormed , withQuote ) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((<$>), (<*), pure) #endif import Control.Monad ((>=>)) import Data.Map.Strict (Map) import qualified Data.Map.Strict as M import Data.Text (Text) import Data.String (IsString) import Text.Parsec ( (<|>) , () , char , eof , lookAhead , many1 , runParser , skipMany ) import Text.Parsec.Char (anyChar, space) import Text.Parsec.Text (Parser) import Data.SCargot.Repr ( SExpr(..) , RichSExpr , WellFormedSExpr , toRich , toWellFormed ) type ReaderMacroMap atom = Map Char (Reader atom) -- | A 'Reader' represents a reader macro: it takes a parser for -- the S-Expression type and performs as much or as little -- parsing as it would like, and then returns an S-expression. type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom)) -- | A 'Comment' represents any kind of skippable comment. This -- parser __must__ be able to fail if a comment is not being -- recognized, and it __must__ not consume any input in case -- of failure. type Comment = Parser () -- | A 'SExprParser' describes a parser for a particular value -- that has been serialized as an s-expression. The @atom@ parameter -- corresponds to a Haskell type used to represent the atoms, -- and the @carrier@ parameter corresponds to the parsed S-Expression -- structure. data SExprParser atom carrier = SExprParser { sesPAtom :: Parser atom , readerMap :: ReaderMacroMap atom , comment :: Maybe Comment , postparse :: SExpr atom -> Either String carrier } -- | Create a basic 'SExprParser' when given a parser -- for an atom type. -- -- >>> import Text.Parsec (alphaNum, many1) -- >>> let parser = mkParser (many1 alphaNum) -- >>> decode parser "(ele phant)" -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)] mkParser :: Parser atom -> SExprParser atom (SExpr atom) mkParser parser = SExprParser { sesPAtom = parser , readerMap = M.empty , comment = Nothing , postparse = return } -- | Modify the carrier type for a 'SExprParser'. This is -- used internally to convert between various 'SExpr' representations, -- but could also be used externally to add an extra conversion layer -- onto a 'SExprParser'. -- -- >>> import Text.Parsec (alphaNum, many1) -- >>> import Data.SCargot.Repr (toRich) -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum)) -- >>> decode parser "(ele phant)" -- Right [RSlist [RSAtom "ele",RSAtom "phant"]] setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c setCarrier f spec = spec { postparse = postparse spec >=> f } -- | Convert the final output representation from the 'SExpr' type -- to the 'RichSExpr' type. -- -- >>> import Text.Parsec (alphaNum, many1) -- >>> let parser = asRich (mkParser (many1 alphaNum)) -- >>> decode parser "(ele phant)" -- Right [RSlist [RSAtom "ele",RSAtom "phant"]] asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b) asRich = setCarrier (return . toRich) -- | Convert the final output representation from the 'SExpr' type -- to the 'WellFormedSExpr' type. -- -- >>> import Text.Parsec (alphaNum, many1) -- >>> let parser = asWellFormed (mkParser (many1 alphaNum)) -- >>> decode parser "(ele phant)" -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]] asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b) asWellFormed = setCarrier toWellFormed -- | Add the ability to execute some particular reader macro, as -- defined by its initial character and the 'Parser' which returns -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which -- can be recursively called to parse more S-Expressions, and begins -- parsing after the reader character has been removed from the -- stream. -- -- >>> import Text.Parsec (alphaNum, char, many1) -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p) -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum)) -- >>> decode parser "(an [ele phant])" -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)] addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c addReader c reader spec = spec { readerMap = M.insert c reader (readerMap spec) } -- | Add the ability to ignore some kind of comment. This gets -- factored into whitespace parsing, and it's very important that -- the parser supplied __be able to fail__ (as otherwise it will -- cause an infinite loop), and also that it __not consume any input__ -- (which may require it to be wrapped in 'try'.) -- -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string) -- >>> let comment = string "//" *> manyTill anyChar newline *> pure () -- >>> let parser = setComment comment (mkParser (many1 alphaNum)) -- >>> decode parser "(ele //a comment\n phant)" -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)] setComment :: Comment -> SExprParser a c -> SExprParser a c setComment c spec = spec { comment = Just (c "comment") } -- | Add the ability to understand a quoted S-Expression. -- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This -- assumes that the underlying atom type implements the "IsString" -- class, and will create the @quote@ atom using @fromString "quote"@. -- -- >>> import Text.Parsec (alphaNum, many1) -- >>> let parser = withQuote (mkParser (many1 alphaNum)) -- >>> decode parser "'elephant" -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)] withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t) withQuote = addReader '\'' (fmap go) where go s = SCons "quote" (SCons s SNil) peekChar :: Parser (Maybe Char) peekChar = Just <$> lookAhead anyChar <|> pure Nothing parseGenericSExpr :: Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom) parseGenericSExpr atom reader skip = do let sExpr = parseGenericSExpr atom reader skip "s-expr" skip c <- peekChar r <- case c of Nothing -> fail "Unexpected end of input" Just '(' -> char '(' >> skip >> parseList sExpr skip Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr _ -> SAtom `fmap` atom skip return r parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom) parseList sExpr skip = do i <- peekChar case i of Nothing -> fail "Unexpected end of input" Just ')' -> char ')' >> return SNil _ -> do car <- sExpr skip c <- peekChar case c of Just '.' -> do _ <- char '.' cdr <- sExpr skip _ <- char ')' skip return (SCons car cdr) Just ')' -> do _ <- char ')' skip return (SCons car SNil) _ -> do cdr <- parseList sExpr skip return (SCons car cdr) -- | Given a CommentMap, create the corresponding parser to -- skip those comments (if they exist). buildSkip :: Maybe (Parser ()) -> Parser () buildSkip Nothing = skipMany space buildSkip (Just c) = alternate where alternate = skipMany space >> ((c >> alternate) <|> return ()) doParse :: Parser a -> Text -> Either String a doParse p t = case runParser p () "" t of Left err -> Left (show err) Right x -> Right x -- | Decode a single S-expression. If any trailing input is left after -- the S-expression (ignoring comments or whitespace) then this -- will fail: for those cases, use 'decode', which returns a list of -- all the S-expressions found at the top level. decodeOne :: SExprParser atom carrier -> Text -> Either String carrier decodeOne spec = doParse (parser <* eof) >=> (postparse spec) where parser = parseGenericSExpr (sesPAtom spec) (readerMap spec) (buildSkip (comment spec)) -- | Decode several S-expressions according to a given 'SExprParser'. This -- will return a list of every S-expression that appears at the top-level -- of the document. decode :: SExprParser atom carrier -> Text -> Either String [carrier] decode spec = doParse (many1 parser <* eof) >=> mapM (postparse spec) where parser = parseGenericSExpr (sesPAtom spec) (readerMap spec) (buildSkip (comment spec)) {- -- | Encode (without newlines) a single S-expression. encodeSExpr :: SExpr atom -> (atom -> Text) -> Text encodeSExpr SNil _ = "()" encodeSExpr (SAtom s) t = t s encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t) where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")" go SNil rs = "(" <> rs <> ")" go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t) -- | Emit an S-Expression in a machine-readable way. This does no -- pretty-printing or indentation, and produces no comments. encodeOne :: SExprParser atom carrier -> carrier -> Text encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec) encode :: SExprParser atom carrier -> [carrier] -> Text encode spec cs = T.concat (map (encodeOne spec) cs) -} s-cargot-0.1.6.0/Data/SCargot/Print.hs0000644000000000000000000004447007346545000015440 0ustar0000000000000000{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} module Data.SCargot.Print ( -- * Pretty-Printing encodeOne , encode , encodeOneLazy , encodeLazy -- * Pretty-Printing Control , SExprPrinter , Indent(..) , setFromCarrier , setMaxWidth , removeMaxWidth , setIndentAmount , setIndentStrategy -- * Default Printing Strategies , basicPrint , flatPrint , unconstrainedPrint ) where import qualified Data.Foldable as F import Data.Monoid ((<>)) import qualified Data.Sequence as Seq import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as B import qualified Data.Traversable as T import Data.SCargot.Repr -- | The 'Indent' type is used to determine how to indent subsequent -- s-expressions in a list, after printing the head of the list. This only -- applies if the entire list cannot be printed within the allowable -- 'SExprPrinter.maxWidth'; a sub-maxWidth printing will not add newlines or -- indentation. data Indent = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed -- amount more than the current line. -- -- > (foo -- > bar -- > baz -- > quux) -- -- Any 'SExprPrinter.indentAmount' applies relative to the entry at the -- head of the list; in the above example, the indentAmount is 1. | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the -- first @n@ expressions after the head on the same -- line as the head, and all after will be swung. -- 'SwingAfter' @0@ is equivalent to 'Swing'. -- -- > (foo bar -- > baz -- > quux) -- -- The 'SExprPrinter.indentAmount' is handled in the same way -- as for the 'Swing' setting. | Align -- ^ An 'Align' indent will print the first expression after the head -- on the same line, and subsequent expressions will be aligned with -- that one. Note that this ignores any 'SExprPrinter.indentAmount' -- specified for the printer. -- -- > (foo bar -- > baz -- > quux) deriving (Eq, Show) -- | A 'SExprPrinter' value describes how to print a given value as an -- s-expression. The @carrier@ type parameter indicates the value -- that will be printed, and the @atom@ parameter indicates the type -- that will represent tokens in an s-expression structure. data SExprPrinter atom carrier = SExprPrinter { atomPrinter :: atom -> Text -- ^ How to serialize a given atom to 'Text'. , fromCarrier :: carrier -> SExpr atom -- ^ How to turn a carrier type back into a 'Sexpr'. , swingIndent :: SExpr atom -> Indent -- ^ How to indent subsequent expressions, as determined by -- the head of the list. , indentAmount :: Int -- ^ How much to indent after a swung indentation, relative to the *head* -- element. , maxWidth :: Maybe Int -- ^ The maximum width (if any) If this is 'None' then the resulting -- s-expression might be printed on one line (if -- 'SExprPrinter.indentPrint' is 'False') and might be pretty-printed in -- the most naive way possible (if 'SExprPrinter.indentPrint' is 'True'). , indentPrint :: Bool -- ^ Whether to indent or not. } -- | A default 'SExprPrinter' struct that will always print a 'SExpr' -- as a single line. flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) flatPrint = (\p -> p { indentPrint = False}) . removeMaxWidth . basicPrint -- | A default 'SExprPrinter' struct that will always swing subsequent -- expressions onto later lines if they're too long, indenting them -- by two spaces, and uses a soft maximum width of 80 characters basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) basicPrint printer = SExprPrinter { atomPrinter = printer , fromCarrier = id , swingIndent = const Swing , indentAmount = 2 , maxWidth = Just 80 , indentPrint = True } -- | A default 'SExprPrinter' struct that will always swing subsequent -- expressions onto later lines if they're too long, indenting them by -- two spaces, but makes no effort to keep the pretty-printed sources -- inside a maximum width. In the case that we want indented printing -- but don't care about a "maximum" width, we can print more -- efficiently than in other situations. unconstrainedPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom) unconstrainedPrint = removeMaxWidth . basicPrint data Size = Size { sizeSum :: !Int , sizeMax :: !Int } deriving (Show) -- | This is an intermediate representation which is like (but not -- identical to) a RichSExpr representation. In particular, it has a -- special case for empty lists, and it also keeps a single piece of -- indent information around for each list data Intermediate = IAtom Text -- ^ An atom, already serialized | IList Indent Size Intermediate (Seq.Seq Intermediate) (Maybe Text) -- ^ A (possibly-improper) list, with the intended indentation -- strategy, the head of the list, the main set of elements, and the -- final improper element (if it exists) | IEmpty -- ^ An empty list deriving Show sizeOf :: Intermediate -> Size sizeOf IEmpty = Size 2 2 sizeOf (IAtom t) = Size len len where len = T.length t sizeOf (IList _ (Size n m) _ _ _) = Size (n + 2) (m + 2) concatSize :: Size -> Size -> Size concatSize l r = Size { sizeSum = sizeSum l + 1 + sizeSum r -- 1 for the ' ' between elements , sizeMax = sizeMax l `max` sizeMax r } toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate toIntermediate SExprPrinter { atomPrinter = printAtom , swingIndent = swing } = headOf where headOf (SAtom a) = IAtom (printAtom a) headOf SNil = IEmpty headOf (SCons x xs) = gather (swing x) hd Seq.empty xs (sizeOf hd) where hd = headOf x gather sw hd rs SNil sz = IList sw sz hd rs Nothing gather sw hd rs (SAtom a) sz = IList sw (sz `concatSize` aSize) hd rs (Just aStr) where aSize = Size aLen aLen aLen = T.length aStr + 2 -- 2 for the ". " between the pair aStr = printAtom a gather sw hd rs (SCons x xs) sz = gather sw hd (rs Seq.|> x') xs (sz `concatSize` sizeOf x') where x' = headOf x unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec where finalize = B.toLazyText . joinLinesS go :: Intermediate -> Seq.Seq B.Builder go (IAtom t) = Seq.singleton (B.fromText t) go IEmpty = Seq.singleton (B.fromString "()") -- this case should never be called with an empty argument to -- @values@, as that should have been translated to @IEmpty@ -- instead. go (IList iv _ initial values rest) -- if we're looking at an s-expression that has no nested -- s-expressions, then we might as well consider it flat and let -- it take the whole line | Just strings <- T.traverse ppBasic (initial Seq.<| values) = Seq.singleton (B.singleton '(' <> buildUnwords strings <> pTail rest) -- it's not "flat", so we might want to swing after the first thing | Swing <- iv = -- if this match fails, then it means we've failed to -- convert to an Intermediate correctly! let butLast = insertParen (go initial) <> fmap doIndent (F.foldMap go values) in handleTail rest butLast -- ...or after several things | SwingAfter n <- iv = let (hs, xs) = Seq.splitAt n (initial Seq.<| values) hd = B.singleton '(' <> buildUnwords (F.foldMap go hs) butLast = hd Seq.<| fmap doIndent (F.foldMap go xs) in handleTail rest butLast -- the 'align' choice is clunkier because we need to know how -- deep to indent, so we have to force the first builder to grab its size | otherwise = let -- so we grab that and figure out its length plus two (for -- the leading paren and the following space). This uses a -- max because it's possible the first thing is itself a -- multi-line s-expression (in which case it seems like -- using the Align strategy is a terrible idea, but who am -- I to quarrel with the wild fruits upon the Tree of -- Life?) len = 2 + F.maximum (fmap (TL.length . B.toLazyText) (go initial)) in case Seq.viewl values of -- if there's nothing after the head of the expression, then -- we simply close it Seq.EmptyL -> insertParen (insertCloseParen (go initial)) -- otherwise, we put the first two things on the same line -- with spaces and everything else gets indended the -- forementioned length y Seq.:< ys -> let hd = B.singleton '(' <> buildUnwords (F.foldMap go (Seq.fromList [initial, y])) butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys) in handleTail rest butLast doIndent :: B.Builder -> B.Builder doIndent = doIndentOf (indentAmount spec + 1) -- 1 for '(' doIndentOf :: Int -> B.Builder -> B.Builder doIndentOf n b = B.fromText (T.replicate n " ") <> b insertParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder insertParen s = case Seq.viewl s of Seq.EmptyL -> s x Seq.:< xs -> (B.singleton '(' <> x) Seq.<| xs handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder handleTail Nothing = insertCloseParen handleTail (Just t) = let txtInd = B.fromText $ T.replicate (indentAmount spec) " " sep = B.fromString " . " in (Seq.|> (txtInd <> sep <> B.fromText t <> B.singleton ')')) insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder insertCloseParen s = case Seq.viewr s of Seq.EmptyR -> Seq.singleton (B.singleton ')') xs Seq.:> x -> xs Seq.|> (x <> B.singleton ')') buildUnwords sq = case Seq.viewl sq of Seq.EmptyL -> mempty t Seq.:< ts -> t <> F.foldMap (\ x -> B.singleton ' ' <> x) ts pTail Nothing = B.singleton ')' pTail (Just t) = B.fromString " . " <> B.fromText t <> B.singleton ')' ppBasic (IAtom t) = Just (B.fromText t) ppBasic (IEmpty) = Just (B.fromString "()") ppBasic _ = Nothing -- | Modify the carrier type of a 'SExprPrinter' by describing how -- to convert the new type back to the previous type. For example, -- to pretty-print a well-formed s-expression, we can modify the -- 'SExprPrinter' value as follows: -- -- >>> let printer = setFromCarrier fromWellFormed (basicPrint id) -- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"]) -- "(ele phant)" setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc } -- | Dictate a maximum width for pretty-printed s-expressions. -- -- >>> let printer = setMaxWidth 8 (basicPrint id) -- >>> encodeOne printer (L [A "one", A "two", A "three"]) -- "(one \n two\n three)" setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier setMaxWidth n pr = pr { maxWidth = Just n } -- | Allow the serialized s-expression to be arbitrarily wide. This -- makes all pretty-printing happen on a single line. -- -- >>> let printer = removeMaxWidth (basicPrint id) -- >>> encodeOne printer (L [A "one", A "two", A "three"]) -- "(one two three)" removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier removeMaxWidth pr = pr { maxWidth = Nothing } -- | Set the number of spaces that a subsequent line will be indented -- after a swing indentation. -- -- >>> let printer = setMaxWidth 12 (basicPrint id) -- >>> encodeOne printer (L [A "elephant", A "pachyderm"]) -- "(elephant \n pachyderm)" -- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"]) -- "(elephant \n pachyderm)" setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier setIndentAmount n pr = pr { indentAmount = n } -- | Dictate how to indent subsequent lines based on the leading -- subexpression in an s-expression. For details on how this works, -- consult the documentation of the 'Indent' type. -- -- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing -- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id)) -- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ]) -- "(def (func arg)\n body)" -- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ]) -- "(elephant \n among\n pachyderms)" setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier setIndentStrategy st pr = pr { swingIndent = st } spaceDot :: B.Builder spaceDot = B.singleton ' ' <> B.singleton '.' <> B.singleton ' ' -- Indents a line by n spaces indent :: Int -> B.Builder -> B.Builder indent n ts = mconcat [ B.singleton ' ' | _ <- [1..n]] <> ts -- Sort of like 'unlines' but without the trailing newline joinLinesS :: Seq.Seq B.Builder -> B.Builder joinLinesS s = case Seq.viewl s of Seq.EmptyL -> "" t Seq.:< ts | F.null ts -> t | otherwise -> t <> B.fromString "\n" <> joinLinesS ts -- Sort of like 'unlines' but without the trailing newline unwordsS :: Seq.Seq B.Builder -> B.Builder unwordsS s = case Seq.viewl s of Seq.EmptyL -> "" t Seq.:< ts | F.null ts -> t | otherwise -> t <> " " <> unwordsS ts -- Indents every line n spaces, and adds a newline to the beginning -- used in swung indents indentAllS :: Int -> Seq.Seq B.Builder -> B.Builder indentAllS n s = if Seq.null s then "" else ("\n" <>) $ joinLinesS $ fmap (indent n) s -- Indents every line but the first by some amount -- used in aligned indents indentSubsequentS :: Int -> Seq.Seq B.Builder -> B.Builder indentSubsequentS n s = case Seq.viewl s of Seq.EmptyL -> "" t Seq.:< ts | F.null ts -> t | otherwise -> joinLinesS (t Seq.<| fmap (indent n) ts) -- oh god this code is so disgusting -- i'm sorry to everyone i let down by writing this -- i swear i'll do better in the future i promise i have to -- for my sake and for everyone's -- | Pretty-print a 'SExpr' according to the options in a -- 'LayoutOptions' value. prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of Nothing | indentPrint -> unboundIndentPrintSExpr pr (fromCarrier expr) | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr)) Just w -> indentPrintSExpr' w pr expr indentPrintSExpr' :: Int -> SExprPrinter a (SExpr a) -> SExpr a -> TL.Text indentPrintSExpr' maxAmt pr@SExprPrinter { .. } = B.toLazyText . pp 0 . toIntermediate pr where pp _ IEmpty = B.fromString "()" pp _ (IAtom t) = B.fromText t pp ind (IList i sz h values end) = -- we always are going to have a head, a (possibly empty) body, -- and a (possibly empty) tail in our list formats B.singleton '(' <> hd <> body <> tl <> B.singleton ')' where -- the tail is either nothing, or the final dotted pair tl = case end of Nothing -> mempty Just x -> B.fromString " . " <> B.fromText x -- the head is the pretty-printed head, with an ambient -- indentation of +1 to account for the left paren hd = pp (ind+1) h indented = case i of SwingAfter n -> let (l, ls) = Seq.splitAt n values t = unwordsS (fmap (pp (ind+1+1)) l) -- 1 for (, 1 for ' ' nextInd = ind + indentAmount + 1 -- 1 for ( ts = indentAllS nextInd (fmap (pp nextInd) ls) in B.singleton ' ' <> t <> ts Swing -> let nextInd = ind + indentAmount + 1 -- 1 for ( in indentAllS nextInd (fmap (pp nextInd) values) Align -> let headWidth = sizeSum (sizeOf h) nextInd = ind + headWidth + 1 + 1 -- 1 for (, 1 for ' ' below in B.singleton ' ' <> indentSubsequentS nextInd (fmap (pp nextInd) values) body -- if there's nothing here, then we don't have anything to -- indent | length values == 0 = mempty -- if we can't fit the whole next s-expression on the same -- line, then we use the indented form | sizeSum sz + ind > maxAmt = indented | otherwise = -- otherwise we print the whole thing on one line! B.singleton ' ' <> unwordsS (fmap (pp (ind + 1)) values) -- if we don't indent anything, then we can ignore a bunch of the -- details above flatPrintSExpr :: SExpr Text -> TL.Text flatPrintSExpr = B.toLazyText . pHead where pHead (SCons x xs) = B.singleton '(' <> pHead x <> pTail xs pHead (SAtom t) = B.fromText t pHead SNil = B.singleton '(' <> B.singleton ')' pTail (SCons x xs) = B.singleton ' ' <> pHead x <> pTail xs pTail (SAtom t) = spaceDot <> B.fromText t <> B.singleton ')' pTail SNil = B.singleton ')' -- | Turn a single s-expression into a string according to a given -- 'SExprPrinter'. encodeOne :: SExprPrinter atom carrier -> carrier -> Text encodeOne s@(SExprPrinter { .. }) = TL.toStrict . prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier -- | Turn a list of s-expressions into a single string according to -- a given 'SExprPrinter'. encode :: SExprPrinter atom carrier -> [carrier] -> Text encode spec = T.intercalate "\n\n" . map (encodeOne spec) -- | Turn a single s-expression into a lazy 'Text' according to a given -- 'SExprPrinter'. encodeOneLazy :: SExprPrinter atom carrier -> carrier -> TL.Text encodeOneLazy s@(SExprPrinter { .. }) = prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier -- | Turn a list of s-expressions into a lazy 'Text' according to -- a given 'SExprPrinter'. encodeLazy :: SExprPrinter atom carrier -> [carrier] -> TL.Text encodeLazy spec = TL.intercalate "\n\n" . map (encodeOneLazy spec) s-cargot-0.1.6.0/Data/SCargot/Repr.hs0000644000000000000000000001375207346545000015253 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE TypeFamilies #-} module Data.SCargot.Repr ( -- $reprs -- * Elementary SExpr representation SExpr(..) -- * Rich SExpr representation , RichSExpr(..) , toRich , fromRich -- * Well-Formed SExpr representation , WellFormedSExpr(..) , toWellFormed , fromWellFormed ) where import Data.Data (Data) import Data.Foldable (Foldable(..)) import Data.Traversable (Traversable(..)) import Data.Typeable (Typeable) import GHC.Exts (IsList(..), IsString(..)) #if !MIN_VERSION_base(4,8,0) import Prelude hiding (foldr) #endif -- | All S-Expressions can be understood as a sequence -- of @cons@ cells (represented here by 'SCons'), the -- empty list @nil@ (represented by 'SNil') or an -- @atom@. data SExpr atom = SCons (SExpr atom) (SExpr atom) | SAtom atom | SNil deriving (Eq, Show, Read, Functor, Data, Typeable, Foldable, Traversable) instance IsString atom => IsString (SExpr atom) where fromString = SAtom . fromString instance IsList (SExpr atom) where type Item (SExpr atom) = SExpr atom fromList = foldr SCons SNil toList = go where go (SCons x xs) = x : go xs go SNil = [] go (SAtom {}) = error "Unable to turn atom into list" -- | Sometimes the cons-based interface is too low -- level, and we'd rather have the lists themselves -- exposed. In this case, we have 'RSList' to -- represent a well-formed cons list, and 'RSDotted' -- to represent an improper list of the form -- @(a b c . d)@. This representation is based on -- the structure of the parsed S-Expression, and not on -- how it was originally represented: thus, @(a . (b))@ is going to -- be represented as @RSList[RSAtom a, RSAtom b]@ -- despite having been originally represented as a -- dotted list. data RichSExpr atom = RSList [RichSExpr atom] | RSDotted [RichSExpr atom] atom | RSAtom atom deriving (Eq, Show, Read, Functor, Data, Typeable, Foldable, Traversable) instance IsString atom => IsString (RichSExpr atom) where fromString = RSAtom . fromString instance IsList (RichSExpr atom) where type Item (RichSExpr atom) = RichSExpr atom fromList = RSList toList (RSList xs) = xs toList (RSDotted {}) = error "Unable to turn dotted list into haskell list" toList (RSAtom {}) = error "Unable to turn atom into Haskell list" -- | It should always be true that -- -- > fromRich (toRich x) == x -- -- and that -- -- > toRich (fromRich x) == x toRich :: SExpr atom -> RichSExpr atom toRich (SAtom a) = RSAtom a toRich (SCons x xs) = go xs (toRich x:) where go (SAtom a) rs = RSDotted (rs []) a go SNil rs = RSList (rs []) go (SCons y ys) rs = go ys (rs . (toRich y:)) toRich SNil = RSList [] -- | This follows the same laws as 'toRich'. fromRich :: RichSExpr atom -> SExpr atom fromRich (RSAtom a) = SAtom a fromRich (RSList xs) = foldr SCons SNil (map fromRich xs) fromRich (RSDotted xs x) = foldr SCons (SAtom x) (map fromRich xs) -- | A well-formed s-expression is one which does not -- contain any dotted lists. This means that not -- every value of @SExpr a@ can be converted to a -- @WellFormedSExpr a@, although the opposite is -- fine. data WellFormedSExpr atom = WFSList [WellFormedSExpr atom] | WFSAtom atom deriving (Eq, Show, Read, Functor, Data, Typeable, Foldable, Traversable) instance IsList (WellFormedSExpr atom) where type Item (WellFormedSExpr atom) = WellFormedSExpr atom fromList = WFSList toList (WFSList xs) = xs toList (WFSAtom {}) = error "Unable to turn atom into Haskell list" instance IsString atom => IsString (WellFormedSExpr atom) where fromString = WFSAtom . fromString -- | This will be @Nothing@ if the argument contains an -- improper list. It should hold that -- -- > toWellFormed (fromWellFormed x) == Right x -- -- and also (more tediously) that -- -- > case toWellFormed x of -- > Left _ -> True -- > Right y -> x == fromWellFormed y toWellFormed :: SExpr atom -> Either String (WellFormedSExpr atom) toWellFormed SNil = return (WFSList []) toWellFormed (SAtom a) = return (WFSAtom a) toWellFormed (SCons x xs) = do x' <- toWellFormed x go xs (x':) where go (SAtom _) _ = Left "Found atom in cdr position" go SNil rs = return (WFSList (rs [])) go (SCons y ys) rs = do y' <- toWellFormed y go ys (rs . (y':)) -- | Convert a WellFormedSExpr back into a SExpr. fromWellFormed :: WellFormedSExpr atom -> SExpr atom fromWellFormed (WFSAtom a) = SAtom a fromWellFormed (WFSList xs) = foldr SCons SNil (map fromWellFormed xs) {- $reprs This module contains several different representations for s-expressions. The s-cargot library underlying uses the 'SExpr' type as its representation type, which is a binary tree representation with an arbitrary type for its leaves. This type is not always convenient to manipulate in Haskell code, this module defines two alternate representations which turn a sequence of nested right-branching cons pairs into Haskell lists: that is to say, they transform between @ SCons a (SCons b (SCons c SNil)) \<=\> RSList [a, b, c] @ These two types differ in how they handle non-well-formed lists, i.e. lists that end with an atom. The 'RichSExpr' format handles this with a special constructor for lists that end in an atom: @ SCons a (SCons b (SAtom c)) \<=\> RSDotted [a, b] c @ On the other hand, the 'WellFormedSExpr' type elects not to handle this case. This is unusual for Lisp source code, but is a reasonable choice for configuration or data storage formats that use s-expressions, where non-well-formed lists would be an unnecessary complication. To make working with these types less verbose, there are other modules that export pattern aliases and helper functions: these can be found at "Data.SCargot.Repr.Basic", "Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed". -} s-cargot-0.1.6.0/Data/SCargot/Repr/0000755000000000000000000000000007346545000014707 5ustar0000000000000000s-cargot-0.1.6.0/Data/SCargot/Repr/Basic.hs0000644000000000000000000001725307346545000016274 0ustar0000000000000000{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} module Data.SCargot.Repr.Basic ( -- * Basic 'SExpr' representation R.SExpr(..) -- * Constructing and Deconstructing , cons , uncons -- * Shorthand Patterns , pattern (:::) , pattern A , pattern L , pattern DL , pattern Nil -- * Lenses , _car , _cdr -- * Useful processing functions , fromPair , fromList , fromAtom , asPair , asList , isAtom , asAtom , asAssoc ) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative (Applicative, (<$>), (<*>), pure) #endif import Data.SCargot.Repr as R -- | A traversal with access to the first element of a pair. -- -- >>> import Lens.Family -- >>> set _car (A "elephant") (A "one" ::: A "two" ::: A "three" ::: Nil) -- A "elelphant" ::: A "two" ::: A "three" ::: Nil -- >>> set _car (A "two" ::: A "three" ::: Nil) (A "one" ::: A "elephant") -- (A "two" ::: A "three" ::: Nil) ::: A "elephant" _car :: Applicative f => (SExpr a -> f (SExpr a)) -> SExpr a -> f (SExpr a) _car f (SCons x xs) = (:::) <$> f x <*> pure xs _car _ (SAtom a) = pure (A a) _car _ SNil = pure SNil -- | A traversal with access to the second element of a pair. -- -- >>> import Lens.Family -- >>> set _cdr (A "elephant") (A "one" ::: A "two" ::: A "three" ::: Nil) -- A "one" ::: A "elephant" -- >>> set _cdr (A "two" ::: A "three" ::: Nil) (A "one" ::: A "elephant") -- A "one" ::: A "two" ::: A "three" ::: Nil _cdr :: Applicative f => (SExpr a -> f (SExpr a)) -> SExpr a -> f (SExpr a) _cdr f (SCons x xs) = (:::) <$> pure x <*> f xs _cdr _ (SAtom a) = pure (A a) _cdr _ SNil = pure Nil -- | Produce the head and tail of the s-expression (if possible). -- -- >>> uncons (A "el" ::: A "eph" ::: A "ant" ::: Nil) -- Just (A "el",SCons (SAtom "eph") (SCons (SAtom "ant") SNil)) uncons :: SExpr a -> Maybe (SExpr a, SExpr a) uncons (SCons x xs) = Just (x, xs) uncons _ = Nothing -- | Combine the two s-expressions into a new one. -- -- >>> cons (A "el") (L ["eph", A "ant"]) -- SCons (SAtom "el) (SCons (SAtom "eph") (SCons (SAtom "ant") SNil)) cons :: SExpr a -> SExpr a -> SExpr a cons = SCons gatherDList :: SExpr a -> Maybe ([SExpr a], a) gatherDList SNil = Nothing gatherDList SAtom {} = Nothing gatherDList sx = go sx where go SNil = Nothing go (SAtom a) = return ([], a) go (SCons x xs) = do (ys, a) <- go xs return (x:ys, a) infixr 5 ::: -- | A shorter infix alias for `SCons` -- -- >>> A "pachy" ::: A "derm" -- SCons (SAtom "pachy") (SAtom "derm") #if MIN_VERSION_base(4,8,0) pattern (:::) :: SExpr a -> SExpr a -> SExpr a #endif pattern x ::: xs = SCons x xs -- | A shorter alias for `SAtom` -- -- >>> A "elephant" -- SAtom "elephant" #if MIN_VERSION_base(4,8,0) pattern A :: a -> SExpr a #endif pattern A x = SAtom x -- | A (slightly) shorter alias for `SNil` -- -- >>> Nil -- SNil #if MIN_VERSION_base(4,8,0) pattern Nil :: SExpr a #endif pattern Nil = SNil -- | An alias for matching a proper list. -- -- >>> L [A "pachy", A "derm"] -- SExpr (SAtom "pachy") (SExpr (SAtom "derm") SNil) #if MIN_VERSION_base(4,8,0) pattern L :: [SExpr a] -> SExpr a #endif pattern L xs <- (gatherList -> Right xs) #if MIN_VERSION_base(4,8,0) where L [] = SNil L (x:xs) = SCons x (L xs) #endif -- | An alias for matching a dotted list. -- -- >>> DL [A "pachy"] A "derm" -- SExpr (SAtom "pachy") (SAtom "derm") #if MIN_VERSION_base(4,8,0) pattern DL :: [SExpr a] -> a -> SExpr a #endif pattern DL xs x <- (gatherDList -> Just (xs, x)) #if MIN_VERSION_base(4,8,0) where DL [] a = SAtom a DL (x:xs) a = SCons x (DL xs a) #endif getShape :: SExpr a -> String getShape Nil = "empty list" getShape sx = go (0 :: Int) sx where go n SNil = "list of length " ++ show n go n SAtom {} = "dotted list of length " ++ show n go n (SCons _ xs) = go (n+1) xs -- | Utility function for parsing a pair of things. -- -- >>> fromPair (isAtom "pachy") (asAtom return) (A "pachy" ::: A "derm" ::: Nil) -- Right ((), "derm") -- >>> fromPair (isAtom "pachy") fromAtom (A "pachy" ::: Nil) -- Left "Expected two-element list" fromPair :: (SExpr t -> Either String a) -> (SExpr t -> Either String b) -> SExpr t -> Either String (a, b) fromPair pl pr (l ::: r ::: Nil) = (,) <$> pl l <*> pr r fromPair _ _ sx = Left ("fromPair: expected two-element list; found " ++ getShape sx) -- | Utility function for parsing a list of things. fromList :: (SExpr t -> Either String a) -> SExpr t -> Either String [a] fromList p (s ::: ss) = (:) <$> p s <*> fromList p ss fromList _ Nil = pure [] fromList _ sx = Left ("fromList: expected list; found " ++ getShape sx) -- | Utility function for parsing a single atom fromAtom :: SExpr t -> Either String t fromAtom (A a) = return a fromAtom sx = Left ("fromAtom: expected atom; found list" ++ getShape sx) gatherList :: SExpr t -> Either String [SExpr t] gatherList (x ::: xs) = (:) <$> pure x <*> gatherList xs gatherList Nil = pure [] gatherList sx = Left ("gatherList: expected list; found " ++ getShape sx) -- | Parse a two-element list (NOT a dotted pair) using the -- provided function. -- -- >>> let go (A l) (A r) = return (l ++ r); go _ _ = Left "expected atoms" -- >>> asPair go (A "pachy" ::: A "derm" ::: Nil) -- Right "pachyderm" -- >>> asPair go (A "elephant" ::: Nil) -- Left "asPair: expected two-element list; found list of length 1" asPair :: ((SExpr t, SExpr t) -> Either String a) -> SExpr t -> Either String a asPair f (l ::: r ::: SNil) = f (l, r) asPair _ sx = Left ("asPair: expected two-element list; found " ++ getShape sx) -- | Parse an arbitrary-length list using the provided function. -- -- >>> let go xs = concat <$> mapM fromAtom xs -- >>> asList go (A "el" ::: A "eph" ::: A "ant" ::: Nil) -- Right "elephant" -- >>> asList go (A "el" ::: A "eph" ::: A "ant") -- Left "asList: expected list; found dotted list of length 3" asList :: ([SExpr t] -> Either String a) -> SExpr t -> Either String a asList f ls = gatherList ls >>= f -- | Match a given literal atom, failing otherwise. -- -- >>> isAtom "elephant" (A "elephant") -- Right () -- >>> isAtom "elephant" (A "elephant" ::: Nil) -- Left "isAtom: expected atom; found list" isAtom :: Eq t => t -> SExpr t -> Either String () isAtom s (A s') | s == s' = return () | otherwise = Left "isAtom: failed to match atom" isAtom _ sx = Left ("isAtom: expected atom; found " ++ getShape sx) -- | Parse an atom using the provided function. -- -- >>> import Data.Char (toUpper) -- >>> asAtom (return . map toUpper) (A "elephant") -- Right "ELEPHANT" -- >>> asAtom (return . map toUpper) Nil -- Left "asAtom: expected atom; found empty list" asAtom :: (t -> Either String a) -> SExpr t -> Either String a asAtom f (A s) = f s asAtom _ sx = Left ("asAtom: expected atom; found " ++ getShape sx) -- | Parse an assoc-list using the provided function. -- -- >>> let def (x, y) = do { a <- fromAtom x; b <- fromAtom y; return (a ++ ": " ++ b) } -- >>> let defList xs = do { defs <- mapM def xs; return (unlines defs) } -- >>> asAssoc defList ((A "legs" ::: A "four" ::: Nil) ::: (A "trunk" ::: A "one" ::: Nil) ::: Nil) -- Right "legs: four\ntrunk: one\n" -- >>> asAssoc defList ((A "legs" ::: A "four" ::: Nil) ::: (A "elephant") ::: Nil) -- Left "asAssoc: expected pair; found list of length 1" asAssoc :: ([(SExpr t, SExpr t)] -> Either String a) -> SExpr t -> Either String a asAssoc f ss = gatherList ss >>= mapM go >>= f where go (a ::: b ::: Nil) = return (a, b) go sx = Left ("asAssoc: expected pair; found " ++ getShape sx) s-cargot-0.1.6.0/Data/SCargot/Repr/Rich.hs0000644000000000000000000002336307346545000016137 0ustar0000000000000000{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Data.SCargot.Repr.Rich ( -- * 'RichSExpr' representation R.RichSExpr(..) , R.toRich , R.fromRich -- * Constructing and Deconstructing , cons , uncons -- * Useful pattern synonyms , pattern (:::) , pattern A , pattern L , pattern DL , pattern Nil -- * Lenses , _car , _cdr -- * Useful processing functions , fromPair , fromList , fromAtom , asPair , asList , isAtom , isNil , asAtom , asAssoc , car , cdr ) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative (Applicative, (<$>), (<*>), pure) #endif import Data.SCargot.Repr as R -- | A traversal with access to the first element of a pair. -- -- >>> import Lens.Family -- >>> set _car (A "elephant") (L [A "one", A "two", A "three"]) -- L [A "elelphant",A "two",A "three"] -- >>> set _car (L [A "two", A "three"]) (DL [A "one"] "elephant") -- DL [L[A "two",A "three"]] "elephant" _car :: Applicative f => (RichSExpr a -> f (RichSExpr a)) -> RichSExpr a -> f (RichSExpr a) _car f (RSList (x:xs)) = (\ y -> L (y:xs)) `fmap` f x _car f (RSDotted (x:xs) a) = (\ y -> DL (y:xs) a) `fmap` f x _car _ (RSAtom a) = pure (A a) _car _ (RSList []) = pure Nil _car _ (RSDotted [] a) = pure (A a) -- | A traversal with access to the second element of a pair. Using -- this to modify an s-expression may result in changing the -- constructor used, changing a list to a dotted list or vice -- versa. -- -- >>> import Lens.Family -- >>> set _cdr (A "elephant") (L [A "one", A "two", A "three"]) -- DL [A "one"] "elephant" -- >>> set _cdr (L [A "two", A "three"]) (DL [A "one"] "elephant") -- L [A "one",A "two",A "three"] _cdr :: Applicative f => (RichSExpr a -> f (RichSExpr a)) -> RichSExpr a -> f (RichSExpr a) _cdr f (RSList (x:xs)) = let go (RSList []) = L [x] go (RSAtom a) = DL [x] a go (RSList xs') = L (x:xs') go (RSDotted ys a') = DL (x:ys) a' in go `fmap` f (L xs) _cdr f (RSDotted [x] a) = let go (RSList []) = L [x] go (RSAtom a') = DL [x] a' go (RSList xs) = L (x:xs) go (RSDotted ys a') = DL (x:ys) a' in go `fmap` f (A a) _cdr f (RSDotted (x:xs) a) = let go (RSList []) = L [x] go (RSAtom a') = DL [x] a' go (RSList ys) = L (x:ys) go (RSDotted ys a') = DL (x:ys) a' in go `fmap` f (DL xs a) _cdr _ (RSAtom a) = pure (A a) _cdr _ (RSList []) = pure Nil _cdr _ (RSDotted [] a) = pure (A a) -- | Produce the head and tail of the s-expression (if possible). -- -- >>> uncons (L [A "el", A "eph", A "ant"]) -- Just (A "el",L [A "eph",A "ant"]) uncons :: RichSExpr a -> Maybe (RichSExpr a, RichSExpr a) uncons (R.RSList (x:xs)) = Just (x, R.RSList xs) uncons (R.RSDotted (x:xs) a) = Just (x, R.RSDotted xs a) uncons _ = Nothing -- | Combine the two s-expressions into a new one. -- -- >>> cons (A "el") (L [A "eph", A "ant"]) -- L [A "el",A "eph",A "ant"] cons :: RichSExpr a -> RichSExpr a -> RichSExpr a cons x (R.RSList xs) = R.RSList (x:xs) cons x (R.RSDotted xs a) = R.RSDotted (x:xs) a cons x (R.RSAtom a) = R.RSDotted [x] a -- | A shorter infix alias to grab the head -- and tail of an `RSList`. -- -- >>> A "one" ::: L [A "two", A "three"] -- RSList [RSAtom "one",RSAtom "two",RSAtom "three"] #if MIN_VERSION_base(4,8,0) pattern (:::) :: RichSExpr a -> RichSExpr a -> RichSExpr a #endif pattern x ::: xs <- (uncons -> Just (x, xs)) #if MIN_VERSION_base(4,8,0) where x ::: xs = cons x xs #endif -- | A shorter alias for `RSAtom` -- -- >>> A "elephant" -- RSAtom "elephant" #if MIN_VERSION_base(4,8,0) pattern A :: a -> RichSExpr a #endif pattern A a = R.RSAtom a -- | A shorter alias for `RSList` -- -- >>> L [A "pachy", A "derm"] -- RSList [RSAtom "pachy",RSAtom "derm"] #if MIN_VERSION_base(4,8,0) pattern L :: [RichSExpr a] -> RichSExpr a #endif pattern L xs = R.RSList xs -- | A shorter alias for `RSDotted` -- -- >>> DL [A "pachy"] "derm" -- RSDotted [RSAtom "pachy"] "derm" #if MIN_VERSION_base(4,8,0) pattern DL :: [RichSExpr a] -> a -> RichSExpr a #endif pattern DL xs x = R.RSDotted xs x -- | A shorter alias for `RSList` @[]@ -- -- >>> Nil -- RSList [] #if MIN_VERSION_base(4,8,0) pattern Nil :: RichSExpr a #endif pattern Nil = R.RSList [] -- | Utility function for parsing a pair of things: this parses a two-element list, -- and not a cons pair. -- -- >>> fromPair (isAtom "pachy") (asAtom return) (L [A "pachy", A "derm"]) -- Right ((), "derm") -- >>> fromPair (isAtom "pachy") fromAtom (L [A "pachy"]) -- Left "Expected two-element list" fromPair :: (RichSExpr t -> Either String a) -> (RichSExpr t -> Either String b) -> RichSExpr t -> Either String (a, b) fromPair pl pr = asPair $ \(l,r) -> (,) <$> pl l <*> pr r -- | Utility function for parsing a proper list of things. -- -- >>> fromList fromAtom (L [A "this", A "that", A "the-other"]) -- Right ["this","that","the-other"] -- >>> fromList fromAtom (DL [A "this", A "that"] "the-other"]) -- Left "asList: expected proper list; found dotted list" fromList :: (RichSExpr t -> Either String a) -> RichSExpr t -> Either String [a] fromList p = asList $ \ss -> mapM p ss -- | Utility function for parsing a single atom -- -- >>> fromAtom (A "elephant") -- Right "elephant" -- >>> fromAtom (L [A "elephant"]) -- Left "fromAtom: expected atom; found list" fromAtom :: RichSExpr t -> Either String t fromAtom (RSList _) = Left "fromAtom: expected atom; found list" fromAtom (RSDotted _ _) = Left "fromAtom: expected atom; found dotted list" fromAtom (RSAtom a) = return a -- | Parses a two-element list using the provided function. -- -- >>> let go (A l) (A r) = return (l ++ r); go _ _ = Left "expected atoms" -- >>> asPair go (L [A "pachy", A "derm"]) -- Right "pachyderm" -- >>> asPair go (L [A "elephant"]) -- Left "asPair: expected two-element list; found list of length 1" asPair :: ((RichSExpr t, RichSExpr t) -> Either String a) -> RichSExpr t -> Either String a asPair f (RSList [l, r]) = f (l, r) asPair _ (RSList ls) = Left ("asPair: expected two-element list; found list of lenght " ++ show (length ls)) asPair _ RSDotted {} = Left ("asPair: expected two-element list; found dotted list") asPair _ RSAtom {} = Left ("asPair: expected two-element list; found atom") -- | Parse an arbitrary-length list using the provided function. -- -- >>> let go xs = concat <$> mapM fromAtom xs -- >>> asList go (L [A "el", A "eph", A "ant"]) -- Right "elephant" -- >>> asList go (DL [A "el", A "eph"] "ant") -- Left "asList: expected list; found dotted list" asList :: ([RichSExpr t] -> Either String a) -> RichSExpr t -> Either String a asList f (RSList ls) = f ls asList _ RSDotted {} = Left ("asList: expected list; found dotted list") asList _ RSAtom { } = Left ("asList: expected list; found dotted list") -- | Match a given literal atom, failing otherwise. -- -- >>> isAtom "elephant" (A "elephant") -- Right () -- >>> isAtom "elephant" (L [A "elephant"]) -- Left "isAtom: expected atom; found list" isAtom :: Eq t => t -> RichSExpr t -> Either String () isAtom s (RSAtom s') | s == s' = return () | otherwise = Left "isAtom: failed to match atom" isAtom _ RSList {} = Left "isAtom: expected atom; found list" isAtom _ RSDotted {} = Left "isAtom: expected atom; found dotted list" -- | Match an empty list, failing otherwise. -- -- >>> isNil (L []) -- Right () -- >>> isNil (A "elephant") -- Left "isNil: expected nil; found atom" isNil :: RichSExpr t -> Either String () isNil (RSList []) = return () isNil RSList {} = Left "isNil: expected nil; found non-nil list" isNil RSDotted {} = Left "isNil: expected nil; found dotted list" isNil RSAtom {} = Left "isNil: expected nil; found atom" -- | Parse an atom using the provided function. -- -- >>> import Data.Char (toUpper) -- >>> asAtom (return . map toUpper) (A "elephant") -- Right "ELEPHANT" -- >>> asAtom (return . map toUpper) (L []) -- Left "asAtom: expected atom; found list" asAtom :: (t -> Either String a) -> RichSExpr t -> Either String a asAtom f (RSAtom s) = f s asAtom _ RSList {} = Left ("asAtom: expected atom; found list") asAtom _ RSDotted {} = Left ("asAtom: expected atom; found dotted list") -- | Parse an assoc-list using the provided function. -- -- >>> let def (x, y) = do { a <- fromAtom x; b <- fromAtom y; return (a ++ ": " ++ b) } -- >>> let defList xs = do { defs <- mapM def xs; return (unlines defs) } -- >>> asAssoc defList (L [ L [A "legs", A "four"], L [ A "trunk", A "one"] ]) -- Right "legs: four\ntrunk: one\n" -- >>> asAssoc defList (L [ L [A "legs", A "four"], L [ A "elephant"] ]) -- Left "asAssoc: expected pair; found list of length 1" asAssoc :: ([(RichSExpr t, RichSExpr t)] -> Either String a) -> RichSExpr t -> Either String a asAssoc f (RSList ss) = gatherPairs ss >>= f where gatherPairs (RSList [a, b] : ts) = (:) <$> pure (a, b) <*> gatherPairs ts gatherPairs [] = pure [] gatherPairs (RSAtom {} : _) = Left ("asAssoc: expected pair; found atom") gatherPairs (RSDotted {} : _) = Left ("asAssoc: expected pair; found dotted list") gatherPairs (RSList ls : _) = Left ("asAssoc: expected pair; found list of length " ++ show (length ls)) asAssoc _ RSDotted {} = Left "asAssoc: expected assoc list; found dotted list" asAssoc _ RSAtom {} = Left "asAssoc: expected assoc list; found atom" car :: (RichSExpr t -> Either String t') -> [RichSExpr t] -> Either String t' car f (x:_) = f x car _ [] = Left "car: Taking car of zero-element list" cdr :: ([RichSExpr t] -> Either String t') -> [RichSExpr t] -> Either String t' cdr f (_:xs) = f xs cdr _ [] = Left "cdr: Taking cdr of zero-element list" s-cargot-0.1.6.0/Data/SCargot/Repr/WellFormed.hs0000644000000000000000000001731007346545000017305 0ustar0000000000000000{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Data.SCargot.Repr.WellFormed ( -- * 'WellFormedSExpr' representation R.WellFormedSExpr(..) , R.toWellFormed , R.fromWellFormed -- * Constructing and Deconstructing , cons , uncons -- * Useful pattern synonyms , pattern (:::) , pattern L , pattern A , pattern Nil -- * Useful processing functions , fromPair , fromList , fromAtom , asPair , asList , isAtom , isNil , asAtom , asAssoc , car , cdr ) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative ((<$>), (<*>), pure) #endif import Data.SCargot.Repr as R -- | Produce the head and tail of the s-expression (if possible). -- -- >>> uncons (L [A "el", A "eph", A "ant"]) -- Just (WFSAtom "el",WFSList [WFSAtom "eph",WFSAtom "ant"]) uncons :: WellFormedSExpr a -> Maybe (WellFormedSExpr a, WellFormedSExpr a) uncons R.WFSAtom {} = Nothing uncons (R.WFSList []) = Nothing uncons (R.WFSList (x:xs)) = Just (x, R.WFSList xs) -- | Combine the two-expressions into a new one. This will return -- @Nothing@ if the resulting s-expression is not well-formed. -- -- >>> cons (A "el") (L [A "eph", A "ant"]) -- Just (WFSList [WFSAtom "el",WFSAtom "eph",WFSAtom "ant"]) -- >>> cons (A "pachy") (A "derm")) -- Nothing cons :: WellFormedSExpr a -> WellFormedSExpr a -> Maybe (WellFormedSExpr a) cons _ (R.WFSAtom {}) = Nothing cons x (R.WFSList xs) = Just (R.WFSList (x:xs)) -- | A shorter infix alias to grab the head and tail of a `WFSList`. This -- pattern is unidirectional, because it cannot be guaranteed that it -- is used to construct well-formed s-expressions; use the function "cons" -- instead. -- -- >>> let sum (x ::: xs) = x + sum xs; sum Nil = 0 #if MIN_VERSION_base(4,8,0) pattern (:::) :: WellFormedSExpr a -> WellFormedSExpr a -> WellFormedSExpr a #endif pattern x ::: xs <- (uncons -> Just (x, xs)) -- | A shorter alias for `WFSList` -- -- >>> L [A "pachy", A "derm"] -- WFSList [WFSAtom "pachy",WFSAtom "derm"] #if MIN_VERSION_base(4,8,0) pattern L :: [WellFormedSExpr t] -> WellFormedSExpr t #endif pattern L xs = R.WFSList xs -- | A shorter alias for `WFSAtom` -- -- >>> A "elephant" -- WFSAtom "elephant" #if MIN_VERSION_base(4,8,0) pattern A :: t -> WellFormedSExpr t #endif pattern A a = R.WFSAtom a -- | A shorter alias for `WFSList` @[]@ -- -- >>> Nil -- WFSList [] #if MIN_VERSION_base(4,8,0) pattern Nil :: WellFormedSExpr t #endif pattern Nil = R.WFSList [] getShape :: WellFormedSExpr a -> String getShape WFSAtom {} = "atom" getShape (WFSList []) = "empty list" getShape (WFSList sx) = "list of length " ++ show (length sx) -- | Utility function for parsing a pair of things. -- -- >>> fromPair (isAtom "pachy") (asAtom return) (L [A "pachy", A "derm"]) -- Right ((), "derm") -- >>> fromPair (isAtom "pachy") fromAtom (L [A "pachy"]) -- Left "Expected two-element list" fromPair :: (WellFormedSExpr t -> Either String a) -> (WellFormedSExpr t -> Either String b) -> WellFormedSExpr t -> Either String (a, b) fromPair pl pr (L [l, r]) = (,) <$> pl l <*> pr r fromPair _ _ sx = Left ("fromPair: expected two-element list; found " ++ getShape sx) -- | Utility function for parsing a list of things. -- -- >>> fromList fromAtom (L [A "this", A "that", A "the-other"]) -- Right ["this","that","the-other"] -- >>> fromList fromAtom (A "pachyderm") -- Left "asList: expected proper list; found dotted list" fromList :: (WellFormedSExpr t -> Either String a) -> WellFormedSExpr t -> Either String [a] fromList p (L ss) = mapM p ss fromList _ sx = Left ("fromList: expected list; found " ++ getShape sx) -- | Utility function for parsing a single atom -- -- >>> fromAtom (A "elephant") -- Right "elephant" -- >>> fromAtom (L [A "elephant"]) -- Left "fromAtom: expected atom; found list" fromAtom :: WellFormedSExpr t -> Either String t fromAtom (A a) = return a fromAtom sx = Left ("fromAtom: expected atom; found " ++ getShape sx) -- | Parses a two-element list using the provided function. -- -- >>> let go (A l) (A r) = return (l ++ r); go _ _ = Left "expected atoms" -- >>> asPair go (L [A "pachy", A "derm"]) -- Right "pachyderm" -- >>> asPair go (L [A "elephant"]) -- Left "asPair: expected two-element list; found list of length 1" asPair :: ((WellFormedSExpr t, WellFormedSExpr t) -> Either String a) -> WellFormedSExpr t -> Either String a asPair f (L [l, r]) = f (l, r) asPair _ sx = Left ("asPair: expected two-element list; found " ++ getShape sx) -- | Parse an arbitrary-length list using the provided function. -- -- >>> let go xs = concat <$> mapM fromAtom xs -- >>> asList go (L [A "el", A "eph", A "ant"]) -- Right "elephant" -- >>> asList go (A "pachyderm") -- Left "asList: expected list; found atom" asList :: ([WellFormedSExpr t] -> Either String a) -> WellFormedSExpr t -> Either String a asList f (L ls) = f ls asList _ sx = Left ("asList: expected list; found " ++ getShape sx) -- | Match a given literal atom, failing otherwise. -- -- >>> isAtom "elephant" (A "elephant") -- Right () -- >>> isAtom "elephant" (L [A "elephant"]) -- Left "isAtom: expected atom; found list" isAtom :: Eq t => t -> WellFormedSExpr t -> Either String () isAtom s (A s') | s == s' = return () | otherwise = Left "isAtom: failed to match atom" isAtom _ sx = Left ("isAtom: expected atom; found " ++ getShape sx) -- | Match an empty list, failing otherwise. -- -- >>> isNil (L []) -- Right () -- >>> isNil (A "elephant") -- Left "isNil: expected nil; found atom" isNil :: WellFormedSExpr t -> Either String () isNil Nil = return () isNil sx = Left ("isNil: expected nil; found " ++ getShape sx) -- | Parse an atom using the provided function. -- -- >>> import Data.Char (toUpper) -- >>> asAtom (return . map toUpper) (A "elephant") -- Right "ELEPHANT" -- >>> asAtom (return . map toUpper) (L []) -- Left "asAtom: expected atom; found list" asAtom :: (t -> Either String a) -> WellFormedSExpr t -> Either String a asAtom f (A s) = f s asAtom _ sx = Left ("asAtom: expected atom; found " ++ getShape sx) -- | Parse an assoc-list using the provided function. -- -- >>> let def (x, y) = do { a <- fromAtom x; b <- fromAtom y; return (a ++ ": " ++ b) } -- >>> let defList xs = do { defs <- mapM def xs; return (unlines defs) } -- >>> asAssoc defList (L [ L [A "legs", A "four"], L [ A "trunk", A "one"] ]) -- Right "legs: four\ntrunk: one\n" -- >>> asAssoc defList (L [ L [A "legs", A "four"], L [ A "elephant"] ]) -- Left "asAssoc: expected pair; found list of length 1" asAssoc :: ([(WellFormedSExpr t, WellFormedSExpr t)] -> Either String a) -> WellFormedSExpr t -> Either String a asAssoc f (L ss) = gatherPairs ss >>= f where gatherPairs (L [a, b] : ts) = (:) <$> pure (a, b) <*> gatherPairs ts gatherPairs [] = pure [] gatherPairs (sx:_) = Left ("asAssoc: expected pair; found " ++ getShape sx) asAssoc _ sx = Left ("asAssoc: expected list; found " ++ getShape sx) -- | Run the parser on the first element of a Haskell list of "WellFormedSExpr" values, -- failing if the list is empty. This is useful in conjunction with the `asList` -- function. car :: (WellFormedSExpr t -> Either String t') -> [WellFormedSExpr t] -> Either String t' car f (x:_) = f x car _ [] = Left "car: Taking car of zero-element list" -- | Run the parser on all but the first element of a Haskell list of "WellFormedSExpr" values, -- failing if the list is empty. This is useful in conjunction with the `asList` -- function. cdr :: ([WellFormedSExpr t] -> Either String t') -> [WellFormedSExpr t] -> Either String t' cdr f (_:xs) = f xs cdr _ [] = Left "cdr: Taking cdr of zero-element list" s-cargot-0.1.6.0/LICENSE0000644000000000000000000000276207346545000012600 0ustar0000000000000000Copyright (c) 2014, Getty Ritter All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Getty Ritter nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. s-cargot-0.1.6.0/README.md0000644000000000000000000005551707346545000013060 0ustar0000000000000000[![Hackage](https://img.shields.io/hackage/v/s-cargot.svg)](https://hackage.haskell.org/package/s-cargot) ![stability: stable](https://img.shields.io/badge/stability-stable-green.svg) S-Cargot is a library for parsing and emitting S-expressions, designed to be flexible, customizable, and extensible. Different uses of S-expressions often understand subtly different variations on what an S-expression is. The goal of S-Cargot is to create several reusable components that can be repurposed to nearly any S-expression variant. S-Cargot does _not_ aim to be the fastest or most efficient s-expression library. If you need speed, then it would probably be best to roll your own [AttoParsec]() parser. Wherever there's a choice, S-Cargot errs on the side of maximum flexibility, which means that it should be easy to plug together components to understand various existing flavors of s-expressions or to extend it in various ways to accomodate new flavors. ## What Are S-Expressions? S-expressions were originally the data representation format in Lisp implementations, but have found broad uses outside of that as a data representation and storage format. S-expressions are often understood as a representation for binary trees with optional values in the leaf nodes: an empty leaf is represented with empty parens `()`, a non-empty leaf is represented as the scalar value it contains (often tokens like `x` or other programming language literals), and an internal node is represented as `(x . y)` where `x` and `y` are standing in for other s-expressions. In Lisp parlance, an internal node is called a _cons cell_, and the first and second elements inside it are called the _car_ and the _cdr_, for historical reasons. Non-empty lef nodes are referred to in the s-cargot library as _atoms_. Often, s-expressions are used to represent lists, in which case the list is treated as a right-branching tree with an empty leaf as the far right child of the tree. S-expression languages have a shorthand way of representing these lists: instead of writing successsively nested pairs, as in `(1 . (2 . (3 . ()))`, they allow the sugar `(1 2 3)`. This is the most common way of writing s-expressions, even in languages that allow raw cons cells (or "dotted pairs") to be written. The s-cargot library refers to expressions where every right-branching sequence ends in an empty leaf as _well-formed s-expressions_. Note that any s-expression which can be written without using a dotted pair is necessarily well-formed. Unfortunately, while in common use, s-expressions do not have a single formal standard. They are often defined in an ad-hoc way, which means that s-expressions used in different contexts will, despite sharing a common parentheses-delimited structure, differ in various respects. Additionally, because s-expressions are used as the concrete syntax for languages of the Lisp family, they often have conveniences (such as comment syntaxes) and other bits of syntactic sugar (such as _reader macros_, which are described more fully later) that make parsing them much more complicated. Even ignoring those features, the _atoms_ recognized by a given s-expression variation can differ widely. The s-cargot library was designed to accomodate several different kinds of s-expression formats, so that an s-expression format can be easily expressed as a combination of existing features. It includes a few basic variations on s-expression languages as well as the tools for parsing and emitting more elaborate s-expressions variations without having to reimplement the basic plumbing yourself. ## Using the Library The central way of interacting with the S-Cargot library is by creating and modifying datatypes which represent specifications for parsing and printing s-expressions. Each of those types has two type parameters, which are often called `atom` and `carrier`: ~~~~ +------ the type that represents an atom or value | | +- the Haskell representation of the SExpr itself | | parser :: SExprParser atom carrier printer :: SExprPrinter atom carrier ~~~~ Various functions will be provided that modify the carrier type (i.e. the output type of parsing or input type of serialization) or the language recognized by the parsing. ## Representing S-expressions There are three built-in representations of S-expression lists: two of them are isomorphic, as one or the other might be convenient for working with S-expression data in a particular circumstance, while the third represents only the "well-formed" subset of possible S-expressions, which is often convenient when using s-expressions for configuration or data storage. ~~~~.haskell -- cons-based representation data SExpr atom = SCons (SExpr atom) (SExpr atom) | SNil | SAtom atom -- list-based representation data RichSExpr atom = RSList [RichSExpr atom] | RSDotList [RichSExpr atom] atom | RSAtom atom -- well-formed representation data WellFormedSExpr atom = WFSList [WellFormedSExpr atom] | WFSAtom atom ~~~~ The `WellFormedSExpr` representation should be structurally identical to the `RichSExpr` representation in all cases where no improper lists appear in the source. Both of those representations are often more convenient than writing multiple nested `SCons` constructors, in the same way that the `[1,2,3]` syntax in Haskell is often less tedious than writing `1:2:3:[]`. Functions for converting back and forth between representations are provided, but you can also modify a `SExprSpec` to parse to or serialize from a particular representation using the `asRich` and `asWellFormed` functions. ~~~~.haskell >>> decode basicParser "(a b)" Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)] >>> decode (asRich basicParser) "(a b)" Right [RSList [RSAtom "a",RSAtom "b"]] >>> decode (asWellFormed basicParser) "(a b)" Right [WFSList [WFSAtom "a",WFSAtom "b"]] >>> decode basicParser "(a . b)" Right [SCons (SAtom "a") (SAtom "b")] >>> decode (asRich basicParser) "(a . b)" Right [RSDotted [RSAtom "a"] "b"] >>> decode (asWellFormed basicParser) "(a . b)" Left "Found atom in cdr position" ~~~~ These names and patterns can be quite long, especially when you're constructing or matching on S-expression representations in Haskell source, so S-Cargot also exports several pattern synonyms that can be used both as expressions and in pattern-matching. These are each contained in their own module, as their names conflict with each other, so it's recommended to only import the module corresponding to the type that you plan on working with: ~~~~.haskell >>> import Data.SCargot.Repr.Basic >>> A 2 ::: A 3 ::: A 4 ::: Nil SCons (SAtom 2) (SCons (SAtom 3) (SCons (SAtom 4) SNil)) ~~~~ ~~~~.haskell >>> import Data.SCargot.Repr.WellFormed >>> L [A 1,A 2,A 3] WFSList [WFSAtom 1,WFSAtom 2,WFSAtom 3] >>> let sexprSum (L xs) = sum (map sexprSum xs); sexprSum (A n) = n >>> :t sexprSum sexprSum :: Num a => WellFormedSExpr a -> a >>> sexprSum (L [A 2, L [A 3, A 4]]) 9 ~~~~ If you are using GHC 7.10 or later, several of these will be powerful bidirectional pattern synonyms that allow both constructing and pattern-matching on s-expressions in non-trivial ways: ~~~~.haskell >>> import Data.SCargot.Repr.Basic >>> L [ A 2, A 3, A 4 ] SCons (SAtom 2) (SCons (SAtom 3) (SCons (SAtom 4) SNil)) ~~~~ ## Atom Types Any type can serve as an underlying atom type in an S-expression parser or serializer, provided that it has a Parsec parser or a serializer (i.e. a way of turning it into `Text`.) For these examples, I'm going to use a very simple serializer that is roughly like the one found in `Data.SCargot.Basic`, which parses symbolic tokens of letters, numbers, and some punctuation characters. This means that the 'serializer' here is just the identity function which returns the relevant `Text` value: ~~~~.haskell parser :: SExprParser Text (SExpr Text) parser = mkParser (pack <$> many1 (alphaNum <|> oneOf "+-*/!?")) printer :: SExprPrinter Text (SExpr Text) printer = flatPrint id ~~~~ A more elaborate atom type might distinguish between different varieties of token. A small example (that understands just alphabetic identifiers and decimal numbers) would look like this: ~~~~.haskell import Data.Text (Text, pack) data Atom = Ident Text | Num Int deriving (Eq, Show) pAtom :: Parser Atom pAtom = ((Num . read) <$> many1 digit) <|> (Ident . pack) <$> takeWhile1 isAlpha) sAtom :: Atom -> Text sAtom (Ident t) = t sAtom (Num n) = pack (show n) myParser :: SExprParser Atom (SExpr Atom) myParser = mkParser pAtom myPrinter :: SExprPrinter Atom (SExpr Atom) myPrinter = flatPrint sAtom ~~~~ We can then use this newly created atom type within an S-expression for both parsing and serialization: ~~~~.haskell >>> decode myParser "(foo 1)" Right [SCons (SAtom (Ident "foo")) (SCons (SAtom (Num 1)) SNil)] >>> encode mySpec [L [A (Num 0), A (Ident "bar")]] "(0 bar)" ~~~~ Several common atom types appear in the module [`Data.SCargot.Common`](https://hackage.haskell.org/package/s-cargot-0.1.0.0/docs/Data-SCargot-Common.html), including various kinds of identifiers and number literals. The long-term plan for S-Cargot is to include more and more kinds of built-in atoms, in order to make putting together an S-Expression parser even easier. If you have a common syntax for an atom type that you think should be represented there, please [suggest it in an issue](https://github.com/aisamanra/s-cargot/issues)! To make it easier to build up parsers for atom types without having to use Parsec manually, S-Cargot also exports `Data.SCargot.Atom`, which provides a shorthand way of building up a `SExprParser` from a list of parser-constructor pairs: ~~~~.haskell import Data.SCargot.Atom (atom, mkParserFromAtoms) import Data.SCargot.Common (parseR7RSIdent, signedDecNumber) -- we want our atom type to understand R7RS identifiers and -- signed decimal numbers data Atom = Ident Text | Num Integer deriving (Eq, Show) myParser :: SExprParser Atom (SExpr Atom) myParser = mkAtomParser [ atom Ident parseR7RSIdent , atom Num signedDecNumber ] ~~~~ ## Carrier Types As pointed out above, there are three different "carrier" types that are used to represent S-expressions by the library, but you can use any type as a carrier type for a spec. This is particularly useful when you want to parse into your own custom tree-like type. For example, if we wanted to parse a small S-expression-based arithmetic language, we could define a data type and transformations from and to an S-expression type: ~~~~.haskell import Data.Char (isDigit) import Data.Text (Text) import qualified Data.Text as T data Expr = Add Expr Expr | Num Int deriving (Eq, Show) toExpr :: RichSExpr Text -> Either String Expr toExpr (L [A "+", l, r]) = Add <$> toExpr l <*> toExpr r toExpr (A c) | T.all isDigit c = pure (Num (read (T.unpack c))) | otherwise = Left "Non-numeric token as argument" toExpr _ = Left "Unrecognized s-expr" fromExpr :: Expr -> RichSExpr Text fromExpr (Add x y) = L [A "+", fromExpr x, fromExpr y] fromExpr (Num n) = A (T.pack (show n)) ~~~~ then we could use the `convertSpec` function to add this directly to the `SExprSpec`: ~~~~.haskell >>> let parser' = setCarrier toExpr (asRich myParser) >>> :t parser' SExprParser Atom Expr >>> decode parser' "(+ 1 2)" Right [Add (Num 1) (Num 2)] >>> decode parser' "(0 1 2)" Left "Unrecognized s-expr" ~~~~ ## Comments By default, an S-expression parser does not include a comment syntax, but the provided `withLispComments` function will cause it to understand traditional Lisp line-oriented comments that begin with a semicolon: ~~~~.haskell >>> decode basicParser "(this ; has a comment\n inside)\n" Left "(line 1, column 7):\nunexpected \";\"\nexpecting space or atom" >>> decode (withLispComments basicParser) "(this ; has a comment\n inside)\n" Right [SCons (SAtom "this") (SCons (SAtom "inside") SNil)] ~~~~ Additionally, you can provide your own comment syntax in the form of an Parsec parser. Any Parsec parser can be used, so long as it meets the following criteria: - it is capable of failing (as is called until SCargot believes that there are no more comments) - it does not consume any input in the case of failure, which may involve wrapping the parser in a call to `try` For example, the following adds C++-style comments to an S-expression format: ~~~~.haskell >>> let cppComment = string "//" >> manyTill newline >> return () >>> decode (setComment cppComment basicParser) "(a //comment\n b)\n" Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)] ~~~~ The [`Data.SCargot.Comments`](https://hackage.haskell.org/package/s-cargot/docs/Data-SCargot-Comments.html) module defines some helper functions for creating comment syntaxes, so the `cppComment` parser above could be defined as simply ~~~~.haskell >>> let cppComment = lineComment "//" >>> decode (setComment cppComment basicParser) "(a //comment\n b)\n" Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)] ~~~~ Additionally, a handful of common comment syntaxes are defined in [`Data.SCargot.Comments`](https://hackage.haskell.org/package/s-cargot/docs/Data-SCargot-Comments.html), including C-style, Haskell-style, and generic scripting-language-style comments, so in practice, we could write the above example as ~~~~.haskell >>> decode (withCLikeLineComments basicParser) "(a //comment\n b)\n" Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)] ~~~~ ## Reader Macros In Lisp variants, a _reader macro_ is a macro---a function that operates on syntactic structures---which is invoked during the _scanning_, or lexing, phase of a Lisp parser. This allows the _lexical_ syntax of a Lisp to be modified. A very common reader macro in most Lisp variants is the single quote, which allows the syntax `'expr` to stand as sugar for the literal s-expression `(quote expr)`. The S-Cargot library accomodates this by keeping a map from characters to Haskell functions that can be used analogously to reader macros. This is a common enough special case that there are shorthand ways of writing this, but we could support the `'expr` syntax by creating a Haskell function to turn `expr` into `(quote expr)` and adding that as a reader macro associated with the character `'`: ~~~~.haskell >>> let quote expr = SCons (SAtom "quote") (SCons expr SNil) >>> :t quote quote :: IsString atom => SExpr atom -> SExpr atom >>> let addQuoteReader = addReader '\'' (\ parse -> fmap quote parse) >>> addQuoteReader :: IsString atom => SExprParser atom c -> SExprParser atom c >>> decode (addQuoteReader basicParser) "'foo" Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)] ~~~~ A reader macro is passed the an s-expression parser so that it can perform recursive parse calls, and it can return any `SExpr` it would like. It may also take as much or as little of the remaining parse stream as it would like. For example, the following reader macro does not bother parsing anything else and merely returns a new token: ~~~~.haskell >>> let qmReader = addReader '?' (\ _ -> pure (SAtom "huh")) >>> decode (qmReader basicParser) "(?1 2)" Right [SCons (SAtom "huh") (SCons (SAtom "1") (SCons (SAtom "2") SNil))] ~~~~ We can define a similar reader macro directly in Common Lisp, although it's important to note that Common Lisp converts all identifiers to uppercase, and also that the quote in line `[3]` is necessary so that the Common Lisp REPL doesn't attempt to evaluate `(huh 1 2)` as code: ~~~~.lisp [1]> (defun qm-reader (stream char) 'huh) QM-READER [2]> (set-macro-character #\? #'qm-reader) T [3]> '(?1 2) (HUH 1 2) ~~~~ Reader macros in S-Cargot can be used to define bits of Lisp syntax that are not typically considered the purview of S-expression parsers. For example, some Lisp-derived languages allow square brackets as a subsitute for proper lists, and to support this we could define a reader macro that is indicated by the `[` character and repeatedly calls the parser until a `]` character is reached: ~~~~.haskell >>> let vec p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vec p) >>> :t vec vec :: Stream s m Char => ParsecT s u m (SExpr atom) -> ParsecT s u m (SExpr atom) >>> let withVecReader = addReader '[' vec >>> decode (asRich (withVecReader basicParser)) "(1 [2 3])" Right [RSList [RSAtom "1",RSList [RSAtom "2",RSAtom "3"]]] ~~~~ ## Pretty-Printing and Indentation The s-cargot library also includes a simple but often adequate pretty-printing system for S-expressions. A printer that prints a single-line s-expression is created with `flatPrint`: ~~~~.haskell >>> let printer = flatPrint id >>> :t printer SExprPrinter Text (SCargot Text) >>> Text.putStrLn $ encode printer [L [A "foo", A "bar"]] (foo bar) ~~~~ A printer that tries to pretty-print an s-expression to fit attractively within an 80-character limit can be created with `basicPrint`: ~~~~.haskell >>> let printer = basicPrint id >>> let sentence = "this stupendously preposterously supercalifragilisticexpialidociously long s-expression" >>> let longSexpr = L [A word | word <- Text.words sentence ] >>> Text.putStrLn $ encodeOne printer longSexpr (this stupendously preposterously supercalifragilisticexpialidociously long s-expression) ~~~~ A printer created with `basicPrint` will "swing" things that are too long onto the subsequent line, indenting it a fixed number of spaces. We can modify the number of spaces with `setIndentAmount`: ~~~~.haskell >>> let printer = setIndentAmount 4 (basicPrint id) >>> Text.putStrLn $ encodeOne printer longSexpr (this stupendously preposterously supercalifragilisticexpialidociously long s-expression) ~~~~ We can also modify what counts as the 'maximum width', which for a `basicPrint` printer is 80 by default: ~~~~.haskell >>> let printer = setMaxWidth 8 (basicPrint id) >>> Text.putStrLn $ encodeOne printer (L [A "one", A "two", A "three"]) (one two three) ~~~~ Or remove the maximum, which will always put the whole s-expression onto one line, regardless of its length: ~~~~.haskell >>> let printer = removeMaxWidth (basicPrint id) >>> Text.putStrLn $ encodeOne printer longSexpr (this stupendously preposterously supercalifragilisticexpialidociously long s-expression) ~~~~ We can also specify an _indentation strategy_, which decides how to indent subsequent expressions based on the head of a given expression. The default is to always "swing" subsequent expressions to the next line, but we could also specify the `Align` constructor, which will print the first two expressions on the same line and then any subsequent expressions horizontally aligned with the second one, like so: ~~~~.haskell >>> let printer = setIndentStrategy (\ _ -> Align) (setMaxWidth 8 (basicPrint id)) >>> Text.putStrLn $ encodeOne printer (L [A "one", A "two", A "three", A "four"]) (one two three four) ~~~~ Or we could choose to keep some number of expressions on the same line and afterwards swing the subsequent ones: ~~~~.haskell >>> let printer = setIndentStrategy (\ _ -> SwingAfter 1) (setMaxWidth 8 (basicPrint id)) >>> Text.putStrLn $ encodeOne printer (L [A "one", A "two", A "three", A "four"]) (one two three four) ~~~~ In many situations, we might want to choose a different indentation strategy based on the first expression within a proper list: for example, Common Lisp source code is often formatted so that, following a `defun` token, the function name and arguments are on the same line, and then the body of the function is indented a fixed amount. We can express an approximation of that strategy like this: ~~~~.haskell >>> let strategy (A ident) | "def" `Text.isPrefixOf` ident = SwingAfter 2; strategy _ = Align >>> let printer = setIndentStrategy strategy (setMaxWidth 20 (basicPrint id)) >>> let fact = L [A "defun", A "fact", L [A "x"], L [A "product", L [A "range", A "1", A "x"]]] >>> Text.putStrLn $ encodeOne printer fact (defun fact (x) (product (range 1 x))) >>> let app = L [A "apply", L [A "lambda", L [A "y"], L [A "fact", A "y"]], L [A "+", A "2", A "3"]] (apply (lambda (y) (fact y) (+ 2 3)) ~~~~ ## Putting It All Together Here is a final example which implements a limited arithmetic language with Haskell-style line comments and a special reader macro to understand hex literals: ~~~~.haskell {-# LANGUAGE OverloadedStrings #-} module SCargotExample where import Control.Applicative ((<|>)) import Data.Char (isDigit) import Data.SCargot import Data.SCargot.Repr.Basic import Data.Text (Text, pack) import Numeric (readHex) import Text.Parsec (anyChar, char, digit, many1, manyTill, newline, satisfy, string) import Text.Parsec.Text (Parser) -- Our operators are going to represent addition, subtraction, or -- multiplication data Op = Add | Sub | Mul deriving (Eq, Show) -- The atoms of our language are either one of the aforementioned -- operators, or positive integers data Atom = AOp Op | ANum Int deriving (Eq, Show) -- Once parsed, our language will consist of the applications of -- binary operators with literal integers at the leaves data Expr = EOp Op Expr Expr | ENum Int deriving (Eq, Show) -- Conversions to and from our Expr type toExpr :: SExpr Atom -> Either String Expr toExpr (A (AOp op) ::: l ::: r ::: Nil) = EOp op <$> toExpr l <*> toExpr r toExpr (A (ANum n)) = pure (ENum n) toExpr sexpr = Left ("Unable to parse expression: " ++ show sexpr) fromExpr :: Expr -> SExpr Atom fromExpr (EOp op l r) = A (AOp op) ::: fromExpr l ::: fromExpr r ::: Nil fromExpr (ENum n) = A (ANum n) ::: Nil -- Parser and serializer for our Atom type pAtom :: Parser Atom pAtom = ((ANum . read) <$> many1 digit) <|> (char '+' *> pure (AOp Add)) <|> (char '-' *> pure (AOp Sub)) <|> (char '*' *> pure (AOp Mul)) sAtom :: Atom -> Text sAtom (AOp Add) = "+" sAtom (AOp Sub) = "-" sAtom (AOp Mul) = "*" sAtom (ANum n) = pack (show n) -- Our comment syntax is going to be Haskell-like: hsComment :: Parser () hsComment = string "--" >> manyTill anyChar newline >> return () -- Our custom reader macro: grab the parse stream and read a -- hexadecimal number from it: hexReader :: Reader Atom hexReader _ = (A . ANum . rd) <$> many1 (satisfy isHexDigit) where isHexDigit c = isDigit c || c `elem` hexChars rd = fst . head . readHex hexChars :: String hexChars = "AaBbCcDdEeFf" -- Our final s-expression parser and printer: myLangParser :: SExprParser Atom Expr myLangParser = setComment hsComment -- set comment syntax to be Haskell-style $ addReader '#' hexReader -- add hex reader $ setCarrier toExpr -- convert final repr to Expr $ mkParser pAtom -- create spec with Atom type mkLangPrinter :: SExprPrinter Atom Expr mkLangPrinter = setFromCarrier fromExpr $ setIndentStrategy (const Align) $ basicPrint sAtom >>> decode myLangParser "(+ (* 2 20) 10) (* 10 10)" [EOp Add (EOp Mul (ENum 2) (ENum 20)) (ENum 10),EOp Mul (ENum 10) (ENum 10)] ~~~~ Keep in mind that you often won't need to write all this by hand, as you can often use a variety of built-in atom types, reader macros, comment types, and representations, but it's a useful illustration of all the options that are available to you should you need them! s-cargot-0.1.6.0/Setup.hs0000644000000000000000000000005607346545000013221 0ustar0000000000000000import Distribution.Simple main = defaultMain s-cargot-0.1.6.0/example/0000755000000000000000000000000007346545000013217 5ustar0000000000000000s-cargot-0.1.6.0/example/example.hs0000644000000000000000000000554207346545000015214 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Main where import Control.Applicative ((<|>)) import Data.Char (isDigit) import Data.SCargot import Data.SCargot.Repr.Basic import Data.Text (Text, pack) import Numeric (readHex) import System.Environment (getArgs) import Text.Parsec (anyChar, char, digit, many1, manyTill, newline, satisfy, string) import Text.Parsec.Text (Parser) -- Our operators are going to represent addition, subtraction, or -- multiplication data Op = Add | Sub | Mul deriving (Eq, Show) -- The atoms of our language are either one of the aforementioned -- operators, or positive integers data Atom = AOp Op | ANum Int deriving (Eq, Show) -- Once parsed, our language will consist of the applications of -- binary operators with literal integers at the leaves data Expr = EOp Op Expr Expr | ENum Int deriving (Eq, Show) -- Conversions to and from our Expr type toExpr :: SExpr Atom -> Either String Expr toExpr (A (AOp op) ::: l ::: r ::: Nil) = EOp op <$> toExpr l <*> toExpr r toExpr (A (ANum n)) = pure (ENum n) toExpr sexpr = Left ("Unable to parse expression: " ++ show sexpr) fromExpr :: Expr -> SExpr Atom fromExpr (EOp op l r) = A (AOp op) ::: fromExpr l ::: fromExpr r ::: Nil fromExpr (ENum n) = A (ANum n) ::: Nil -- Parser and serializer for our Atom type pAtom :: Parser Atom pAtom = ((ANum . read) <$> many1 digit) <|> (char '+' *> pure (AOp Add)) <|> (char '-' *> pure (AOp Sub)) <|> (char '*' *> pure (AOp Mul)) sAtom :: Atom -> Text sAtom (AOp Add) = "+" sAtom (AOp Sub) = "-" sAtom (AOp Mul) = "*" sAtom (ANum n) = pack (show n) -- Our comment syntax is going to be Haskell-like: hsComment :: Parser () hsComment = string "--" >> manyTill anyChar newline >> return () -- Our custom reader macro: grab the parse stream and read a -- hexadecimal number from it: hexReader :: Reader Atom hexReader _ = (A . ANum . rd) <$> many1 (satisfy isHexDigit) where isHexDigit c = isDigit c || c `elem` hexChars rd = fst . head . readHex hexChars :: String hexChars = "AaBbCcDdEeFf" -- Our final s-expression parser and printer: myLangParser :: SExprParser Atom Expr myLangParser = setComment hsComment -- set comment syntax to be Haskell-style $ addReader '#' hexReader -- add hex reader $ setCarrier toExpr -- convert final repr to Expr $ mkParser pAtom -- create spec with Atom type mkLangPrinter :: SExprPrinter Atom Expr mkLangPrinter = setFromCarrier fromExpr $ setIndentStrategy (const Align) $ basicPrint sAtom main :: IO () main = do sExprText <- pack <$> getContents either putStrLn print (decode myLangParser sExprText) {- Example usage: $ dist/build/example/example < -- you can put comments in the code! > (+ 10 (* 20 20)) > -- and more than one s-expression! > (* 10 10) > EOF [EOp Add (ENum 10) (EOp Mul (ENum 20) (ENum 20)),EOp Mul (ENum 10) (ENum 10)] -} s-cargot-0.1.6.0/s-cargot.cabal0000644000000000000000000000704007346545000014270 0ustar0000000000000000name: s-cargot version: 0.1.6.0 synopsis: A flexible, extensible s-expression library. homepage: https://github.com/aisamanra/s-cargot description: S-Cargot is a library for working with s-expressions in a modular and extensible way, opting for genericity and flexibility instead of speed. Instead of understanding one particular form of s-expression, the S-Cargot library exposes tools for parsing or emitting different kinds of s-expressions, including features not normally included in an s-expression library like reader macros or tight control over indentation in pretty-printing. license: BSD3 license-file: LICENSE author: Getty Ritter maintainer: Getty Ritter copyright: ©2018 Getty Ritter category: Data build-type: Simple cabal-version: >=1.10 bug-reports: https://github.com/aisamanra/s-cargot/issues extra-source-files: README.md, CHANGELOG.md, test/*.sexp tested-with: GHC == 8.8.4, GHC == 8.10.7, GHC == 9.0.2, GHC == 9.2.4, GHC == 9.4.2 source-repository head type: git location: git://github.com/aisamanra/s-cargot.git flag build-example description: Build example application default: False library exposed-modules: Data.SCargot, Data.SCargot.Repr, Data.SCargot.Repr.Basic, Data.SCargot.Repr.Rich, Data.SCargot.Repr.WellFormed, Data.SCargot.Parse, Data.SCargot.Print, Data.SCargot.Atom, Data.SCargot.Comments, Data.SCargot.Common, Data.SCargot.Language.Basic, Data.SCargot.Language.HaskLike build-depends: base >=4.7 && <5, parsec >=3.1 && <4, text >=1.2 && <3, containers >=0.5 && <1 default-language: Haskell2010 default-extensions: CPP ghc-options: -Wall executable example if !flag(build-example) buildable: False main-is: example.hs hs-source-dirs: example build-depends: base >=4.7 && <5, containers >=0.5 && <1, parsec >=3.1 && <4, s-cargot , text >=1.2 && <3 default-language: Haskell2010 ghc-options: -threaded -rtsopts -with-rtsopts=-N test-suite s-cargot-qc default-language: Haskell2010 type: exitcode-stdio-1.0 hs-source-dirs: test main-is: SCargotQC.hs build-depends: s-cargot, base >=4.7 && <5, parsec >=3.1 && <4, QuickCheck >=2.8 && <3, text >=1.2 && <3 test-suite s-cargot-printparse default-language: Haskell2010 type: exitcode-stdio-1.0 hs-source-dirs: test main-is: SCargotPrintParse.hs build-depends: s-cargot, base >=4.7 && <5, parsec >=3.1 && <4, HUnit >=1.6 && <1.7, text >=1.2 && <3 s-cargot-0.1.6.0/test/0000755000000000000000000000000007346545000012543 5ustar0000000000000000s-cargot-0.1.6.0/test/SCargotPrintParse.hs0000644000000000000000000005422107346545000016455 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} module Main where import Data.Either import Data.SCargot import Data.SCargot.Comments import Data.SCargot.Repr import Data.Semigroup import qualified Data.Text as T import qualified Data.Text.IO as TIO import System.Exit import Test.HUnit import Text.Parsec as P import Text.Parsec.Text (Parser) import Text.Printf ( printf ) main = do putStrLn "Parsing a large S-expression" srcs <- mapM (\n -> (,) n <$> TIO.readFile n) [ "test/small-sample.sexp" , "test/med-sample.sexp" , "test/med2-sample.sexp" , "test/big-sample.sexp" ] counts <- runTestTT $ TestList $ let -- l2p = list of 2 pairs l2p = (SCons (SCons (SAtom (AIdent "hi")) (SAtom (AIdent "hallo"))) (SCons (SAtom (AIdent "world")) (SAtom (AIdent "welt")))) -- l3sp = list of 3 starting in a pair l3sp = (SCons (SCons (SAtom (AIdent "hi")) (SAtom (AIdent "world"))) (SCons (SAtom (AIdent "hallo")) (SCons (SAtom (AIdent "welt")) SNil))) -- l3ep = list of 3 ending in a pair l3ep = (SCons (SAtom (AIdent "hi")) (SCons (SAtom (AIdent "world")) (SCons (SAtom (AIdent "hallo")) (SAtom (AIdent "welt"))))) -- l3 = list of 3 l3 = (SCons (SAtom (AIdent "hi")) (SCons (SAtom (AIdent "world")) (SCons (SAtom (AIdent "hallo")) SNil))) -- l5p = list of 5 pairs l5p = (SCons (SCons (SAtom (AIdent "hi")) (SAtom (AIdent "world"))) (SCons (SCons (SAtom (AIdent "hallo")) (SAtom (AIdent "welt"))) (SCons (SCons (SAtom (AIdent "bonjour")) (SAtom (AIdent "monde"))) (SCons (SCons (SAtom (AIdent "hola")) (SAtom (AIdent "mundo"))) (SCons (SAtom (AIdent "ciao")) (SAtom (AIdent "mundo"))))))) -- l2 = list of 2 l2 = (SCons (SAtom (AIdent "hi")) (SCons (SAtom (AIdent "world")) SNil)) -- l1 = list of 1 l1 = (SCons (SAtom (AIdent "hi")) SNil) pair = (SCons (SAtom (AIdent "hi")) (SAtom (AIdent "world"))) -- linl = list within a list linl = (SCons (SAtom (AIdent "hi")) (SCons (SCons (SAtom (AIdent "world")) (SCons (SAtom (AIdent "and")) (SCons (SAtom (AIdent "people")) SNil))) (SCons (SAtom (AIdent "hallo")) (SCons (SAtom (AIdent "welt")) (SCons (SAtom (AIdent "und")) (SCons (SAtom (AIdent "leute")) SNil)))))) in [ TestLabel "basic checks" $ TestList [ TestLabel "flat print" $ TestList [ TestLabel "flatprint SNil" $ "()" ~=? printSExpr SNil , TestLabel "flatprint SAtom" $ "hi" ~=? printSExpr (SAtom (AIdent "hi")) , TestLabel "flatprint pair" $ "(hi . world)" ~=? printSExpr pair , TestLabel "flatprint list of 1" $ "(hi)" ~=? printSExpr l1 , TestLabel "flatprint list of 2" $ "(hi world)" ~=? printSExpr l2 , TestLabel "flatprint list of 2 pairs" $ "((hi . hallo) world . welt)" ~=? printSExpr l2p , TestLabel "flatprint list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world) hallo welt)" ~=? printSExpr l3sp , TestLabel "flatprint list of 3 ending in a pair" $ "(hi world hallo . welt)" ~=? printSExpr l3ep , TestLabel "flatprint list of 3" $ "(hi world hallo)" ~=? printSExpr l3 , TestLabel "flatprint pair of list of 4" $ "(hi (world and people) hallo welt und leute)" ~=? printSExpr linl , TestLabel "flatprint list of 5 pairs" $ "((hi . world) (hallo . welt) (bonjour . monde) (hola . mundo) ciao . mundo)" ~=? printSExpr l5p ] , TestLabel "pretty print width 40" $ let pprintIt = pprintSExpr 40 Swing in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ "((hi . hallo) world . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world) hallo welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ "(hi world hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi world hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ "(hi\n (world and people)\n hallo\n welt\n und\n leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world)\n (hallo . welt)\n (bonjour . monde)\n (hola . mundo)\n ciao . mundo)" ~=? pprintIt l5p ] , TestLabel "pretty print width 10" $ let pprintIt = pprintSExpr 10 Swing in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ "((hi . hallo)\n world . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world)\n hallo\n welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ "(hi\n world\n hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi\n world\n hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ "(hi\n (world\n and\n people)\n hallo\n welt\n und\n leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world)\n (hallo . welt)\n (bonjour . monde)\n (hola . mundo)\n ciao . mundo)" ~=? pprintIt l5p ] , TestLabel "pretty print width 10 indent 5" $ let pprintIt = encodeOne (setIndentStrategy (const Swing) $ setIndentAmount 5 $ setMaxWidth 10 $ basicPrint printAtom) in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ "((hi . hallo)\n world . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world)\n hallo\n welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ "(hi\n world\n hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi\n world\n hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ "(hi\n (world\n and\n people)\n hallo\n welt\n und\n leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world)\n (hallo . welt)\n (bonjour . monde)\n (hola . mundo)\n ciao . mundo)" ~=? pprintIt l5p ] , TestLabel "pretty print width 10 swing-after 3" $ let pprintIt = pprintSExpr 10 (SwingAfter 3) in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ -- pairs are not split internally "((hi . hallo) world . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world) hallo welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ -- pairs are not split internally "(hi world hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi world hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ -- atom list pair\n ... "(hi (world and people) hallo welt\n und\n leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world) (hallo . welt) (bonjour . monde) (hola . mundo)\n ciao . mundo)" ~=? pprintIt l5p ] , TestLabel "pretty print width 10 aligned" $ let pprintIt = pprintSExpr 10 Align in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ "((hi . hallo) world . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world) hallo\n welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ "(hi world\n hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi world\n hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ "(hi (world and\n people)\n hallo\n welt\n und\n leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world) (hallo . welt)\n (bonjour . monde)\n (hola . mundo)\n ciao . mundo)" ~=? pprintIt l5p ] , TestLabel "pretty print width 400" $ let pprintIt = pprintSExpr 400 Swing in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ -- No Swing if it fits on a line "((hi . hallo) world . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world) hallo welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ "(hi world hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi world hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ "(hi (world and people) hallo welt und leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world) (hallo . welt) (bonjour . monde) (hola . mundo) ciao . mundo)" ~=? pprintIt l5p ] , TestLabel "unconstrained print" $ let pprintIt = ucPrintSExpr Swing in TestList [ TestLabel "pretty print SNil" $ "()" ~=? pprintIt SNil , TestLabel "pretty print SAtom" $ "hi" ~=? pprintIt (SAtom (AIdent "hi")) , TestLabel "pretty print pair" $ "(hi . world)" ~=? pprintIt pair , TestLabel "pretty print list of 1" $ "(hi)" ~=? pprintIt l1 , TestLabel "pretty print list of 2" $ "(hi world)" ~=? pprintIt l2 , TestLabel "pretty print list of 2 pairs" $ "((hi . hallo)\n world\n . welt)" ~=? pprintIt l2p , TestLabel "pretty print list of 3 starting in a pair" $ -- pairs count as a single element "((hi . world)\n hallo\n welt)" ~=? pprintIt l3sp , TestLabel "pretty print list of 3 ending in a pair" $ "(hi world hallo . welt)" ~=? pprintIt l3ep , TestLabel "pretty print list of 3" $ "(hi world hallo)" ~=? pprintIt l3 , TestLabel "pretty print pair of list of 4" $ "(hi\n (world and people)\n hallo\n welt\n und\n leute)" ~=? pprintIt linl , TestLabel "pretty print list of 5 pairs" $ "((hi . world)\n (hallo . welt)\n (bonjour . monde)\n (hola . mundo)\n ciao\n . mundo)" ~=? pprintIt l5p ] ] , TestLabel "round-trip" $ TestList $ concatMap (\t -> map t srcs) $ [ testParsePrint ] ] if errors counts + failures counts > 0 then exitFailure else exitSuccess testParsePrint :: (String, T.Text) -> Test testParsePrint (n,s) = TestList [ testParseFlatPrint n s , testParseUnconstrainedPrint Swing n s , testParseUnconstrainedPrint Align n s , testParsePPrint 80 Swing n s , testParsePPrint 60 Swing n s , testParsePPrint 40 Swing n s , testParsePPrint 20 Swing n s , testParsePPrint 15 Swing n s , testParsePPrint 10 Swing n s , testParsePPrint 80 Align n s , testParsePPrint 40 Align n s , testParsePPrint 10 Align n s ] testParseFlatPrint testName src = testRoundTrip (testName <> " flat print") (fromRight (error "Failed parse") . parseSExpr) printSExpr stripAllText src testParseUnconstrainedPrint indentStyle testName src = testRoundTrip (testName <> " unconstrained print") (fromRight (error "Failed parse") . parseSExpr) (ucPrintSExpr indentStyle) stripAllText src testParsePPrint width indentStyle testName src = testRoundTrip (testName <> " pretty print") (fromRight (error "Failed parse") . parseSExpr) (pprintSExpr width indentStyle) stripAllText src stripAllText = T.unwords . concatMap T.words . T.lines testRoundTrip nm there back prep src = TestList [ TestLabel (nm <> " round trip") $ TestCase $ (prep src) @=? (prep $ back $ there src) , TestLabel (nm <> " round trip twice") $ TestCase $ (prep src) @=? (prep $ back $ there $ back $ there src) ] ------------------------------------------------------------------------ data FAtom = AIdent String | AQuoted String | AString String | AInt Integer | ABV Int Integer deriving (Eq, Show) string :: String -> SExpr FAtom string = SAtom . AString -- | Lift an unquoted identifier. ident :: String -> SExpr FAtom ident = SAtom . AIdent -- | Lift a quoted identifier. quoted :: String -> SExpr FAtom quoted = SAtom . AQuoted -- | Lift an integer. int :: Integer -> SExpr FAtom int = SAtom . AInt printAtom :: FAtom -> T.Text printAtom a = case a of AIdent s -> T.pack s AQuoted s -> T.pack ('\'' : s) AString s -> T.pack (show s) AInt i -> T.pack (show i) ABV w val -> formatBV w val printSExpr :: SExpr FAtom -> T.Text printSExpr = encodeOne (flatPrint printAtom) pprintSExpr :: Int -> Indent -> SExpr FAtom -> T.Text pprintSExpr w i = encodeOne (setIndentStrategy (const i) $ setMaxWidth w $ setIndentAmount 1 $ basicPrint printAtom) ucPrintSExpr :: Indent -> SExpr FAtom -> T.Text ucPrintSExpr i = encodeOne (setIndentStrategy (const i) $ setIndentAmount 1 $ unconstrainedPrint printAtom) getIdent :: FAtom -> Maybe String getIdent (AIdent s) = Just s getIdent _ = Nothing formatBV :: Int -> Integer -> T.Text formatBV w val = T.pack (prefix ++ printf fmt val) where (prefix, fmt) | w `rem` 4 == 0 = ("#x", "%0" ++ show (w `div` 4) ++ "x") | otherwise = ("#b", "%0" ++ show w ++ "b") parseIdent :: Parser String parseIdent = (:) <$> first <*> P.many rest where first = P.letter P.<|> P.oneOf "+-=<>_" rest = P.letter P.<|> P.digit P.<|> P.oneOf "+-=<>_" parseString :: Parser String parseString = do _ <- P.char '"' s <- P.many (P.noneOf ['"']) _ <- P.char '"' return s parseBV :: Parser (Int, Integer) parseBV = P.char '#' >> ((P.char 'b' >> parseBin) P.<|> (P.char 'x' >> parseHex)) where parseBin = P.oneOf "10" >>= \d -> parseBin' (1, if d == '1' then 1 else 0) parseBin' :: (Int, Integer) -> Parser (Int, Integer) parseBin' (bits, x) = do P.optionMaybe (P.oneOf "10") >>= \case Just d -> parseBin' (bits + 1, x * 2 + (if d == '1' then 1 else 0)) Nothing -> return (bits, x) parseHex = (\s -> (length s * 4, read ("0x" ++ s))) <$> P.many1 P.hexDigit parseAtom :: Parser FAtom parseAtom = AIdent <$> parseIdent P.<|> AQuoted <$> (P.char '\'' >> parseIdent) P.<|> AString <$> parseString P.<|> AInt . read <$> P.many1 P.digit P.<|> uncurry ABV <$> parseBV parserLL :: SExprParser FAtom (SExpr FAtom) parserLL = withLispComments (mkParser parseAtom) parseSExpr :: T.Text -> Either String (SExpr FAtom) parseSExpr = decodeOne parserLL s-cargot-0.1.6.0/test/SCargotQC.hs0000644000000000000000000001360307346545000014670 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Main where import Data.SCargot import Data.SCargot.Comments import Data.SCargot.Repr import Data.Monoid ((<>)) import Data.Text (Text) import qualified Data.Text as T import Test.QuickCheck import Text.Parsec (char) instance Arbitrary a => Arbitrary (SExpr a) where arbitrary = sized $ \n -> if n <= 0 then pure SNil else oneof [ SAtom <$> arbitrary , do k <- choose (0, n) elems <- sequence [ resize (n-k) arbitrary | _ <- [0..k] ] rest <- oneof [ SAtom <$> arbitrary , pure SNil ] pure (foldr SCons rest elems) ] instance Arbitrary a => Arbitrary (RichSExpr a) where arbitrary = toRich `fmap` arbitrary instance Arbitrary a => Arbitrary (WellFormedSExpr a) where arbitrary = sized $ \n -> oneof [ WFSAtom <$> arbitrary , do k <- choose (0, n) WFSList <$> sequence [ resize (n-k) arbitrary | _ <- [0..k] ] ] data EncodedSExpr = EncodedSExpr { encoding :: Text , original :: SExpr () } deriving (Eq, Show) instance Arbitrary EncodedSExpr where arbitrary = do sexpr :: SExpr () <- arbitrary let chunks = T.words (encodeOne printer sexpr) whitespace <- sequence [ mkWs | _ <- chunks ] pure (EncodedSExpr { encoding = T.concat (zipWith (<>) chunks whitespace) , original = sexpr }) where mkWs = do n :: Int <- choose (1, 10) T.pack <$> sequence [ elements " \t\r\n" | _ <- [0..n] ] parser :: SExprParser () (SExpr ()) parser = mkParser (() <$ char 'X') printer :: SExprPrinter () (SExpr ()) printer = flatPrint (const "X") prettyPrinter :: SExprPrinter () (SExpr ()) prettyPrinter = basicPrint (const "X") widePrinter :: SExprPrinter () (SExpr ()) widePrinter = unconstrainedPrint (const "X") richIso :: SExpr () -> Bool richIso s = fromRich (toRich s) == s richIsoBk :: RichSExpr () -> Bool richIsoBk s = toRich (fromRich s) == s wfIso :: SExpr () -> Bool wfIso s = case toWellFormed s of Left _ -> True Right y -> s == fromWellFormed y wfIsoBk :: WellFormedSExpr () -> Bool wfIsoBk s = toWellFormed (fromWellFormed s) == Right s encDec :: SExpr () -> Bool encDec s = decodeOne parser (encodeOne printer s) == Right s encDecPretty :: SExpr () -> Bool encDecPretty s = decodeOne parser (encodeOne prettyPrinter s) == Right s encDecWide :: SExpr () -> Bool encDecWide s = decodeOne parser (encodeOne widePrinter s) == Right s decEnc :: EncodedSExpr -> Bool decEnc s = decodeOne parser (encoding s) == Right (original s) encDecRich :: RichSExpr () -> Bool encDecRich s = decodeOne (asRich parser) (encodeOne printer (fromRich s)) == Right s encDecRichPretty :: RichSExpr () -> Bool encDecRichPretty s = decodeOne (asRich parser) (encodeOne prettyPrinter (fromRich s)) == Right s encDecRichWide :: RichSExpr () -> Bool encDecRichWide s = decodeOne (asRich parser) (encodeOne widePrinter (fromRich s)) == Right s decEncRich :: EncodedSExpr -> Bool decEncRich s = decodeOne (asRich parser) (encoding s) == Right (toRich (original s)) encDecWF :: WellFormedSExpr () -> Bool encDecWF s = decodeOne (asWellFormed parser) (encodeOne printer (fromWellFormed s)) == Right s encDecWFPretty :: WellFormedSExpr () -> Bool encDecWFPretty s = decodeOne (asWellFormed parser) (encodeOne prettyPrinter (fromWellFormed s)) == Right s encDecWFWide :: WellFormedSExpr () -> Bool encDecWFWide s = decodeOne (asWellFormed parser) (encodeOne widePrinter (fromWellFormed s)) == Right s decEncWF :: EncodedSExpr -> Bool decEncWF s = decodeOne (asWellFormed parser) (encoding s) == toWellFormed (original s) insertComments :: Text -> Text -> Text -> Text insertComments lc rc sexpr = T.replace " " (" " <> lc <> "blahblahblah" <> rc <> " ") sexpr encDecLineComments :: SExpr () -> Bool encDecLineComments s = decodeOne (withLispComments parser) (insertComments ";" "\n" (encodeOne printer s)) == Right s encDecBlockComments :: SExpr () -> Bool encDecBlockComments s = decodeOne (withHaskellBlockComments parser) (insertComments "{-" "-}" (encodeOne printer s)) == Right s -- Sometimes we generate really huge test cases, which can take a really -- long time to process---especially when we're modifying the whitespace -- to produce weird anomalous S-expressions. So, we make the size parameter -- a bit smaller for good measure. reallyQuickCheck :: Testable prop => prop -> IO () reallyQuickCheck = quickCheckWith stdArgs { maxSize = 25 } main :: IO () main = do putStrLn "The SExpr <--> Rich translation should be isomorphic" quickCheck richIso quickCheck richIsoBk putStrLn "The SExpr <--> WF translation should be near-isomorphic" quickCheck wfIso quickCheck wfIsoBk putStrLn "This should be true when parsing, as well" quickCheck encDec reallyQuickCheck decEnc quickCheck encDecRich reallyQuickCheck decEncRich quickCheck encDecWF reallyQuickCheck decEncWF putStrLn "And it should be true if pretty-printed" reallyQuickCheck encDecPretty reallyQuickCheck encDecRichPretty reallyQuickCheck encDecWFPretty putStrLn "And it should be true if pretty-printed using the wide-format printer" reallyQuickCheck encDecWide reallyQuickCheck encDecRichWide reallyQuickCheck encDecWFWide putStrLn "Comments should not affect parsing" reallyQuickCheck encDecLineComments reallyQuickCheck encDecBlockComments s-cargot-0.1.6.0/test/big-sample.sexp0000644000000000000000000002667107346545000015500 0ustar0000000000000000((operands ((rD . 'GPR) (setcc . 'Cc_out) (predBits . 'Pred) (mimm . 'Mod_imm) (rN . 'GPR))) (in (mimm setcc rN 'CPSR 'PC)) (defs (('PC (ite ((_ call "arm.is_r15") rD) (ite (bveq #b0 ((_ extract 0 0) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))))) (bvand #xfffffffe ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000)))) (ite (bveq #b0 ((_ extract 1 1) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))))) (bvand #xfffffffd ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000)))) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))))) (bvadd 'PC #x00000004))) ('CPSR (ite (ite (andp (bveq #b1 ((_ extract 0 0) predBits)) (bvne predBits #xf)) (notp (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite (andp (bveq setcc #b1) (notp ((_ call "arm.is_r15") rD))) (concat (concat ((_ extract 31 31) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000)))) (concat (ite (bveq ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))) #x00000000) #b1 #b0) (concat ((_ extract 32 32) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))) (bvand ((_ extract 31 31) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000)))) ((_ extract 32 32) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))))))) ((_ extract 27 0) (ite ((_ call "arm.is_r15") rD) (ite (bveq #b0 ((_ extract 0 0) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))))) (bvand #xfeffffff (bvor #x00000020 'CPSR)) 'CPSR) 'CPSR))) (ite ((_ call "arm.is_r15") rD) (ite (bveq #b0 ((_ extract 0 0) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000))))) (bvand #xfeffffff (bvor #x00000020 'CPSR)) 'CPSR) 'CPSR)) 'CPSR)) (rD (ite (ite (andp (bveq #b1 ((_ extract 0 0) predBits)) (bvne predBits #xf)) (notp (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite ((_ call "arm.is_r15") rD) rD ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (ite (bveq (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) #x00000000) ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) (bvor (bvshl (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvsub #x00000020 (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020))) (bvlshr (bvshl #x00000001 ((_ zero_extend 28) ((_ call "a32.modimm_rot") mimm))) (bvurem ((_ zero_extend 24) ((_ call "a32.modimm_imm") mimm)) #x00000020)))))) ((_ zero_extend 1) #x00000000)))) rD)))))s-cargot-0.1.6.0/test/med-sample.sexp0000644000000000000000000000115107346545000015466 0ustar0000000000000000((operands ((rA . 'Gprc) (rS . 'Gprc) (rB . 'Gprc))) (in ('XER 'CR rB rS 'IP)) (defs (('CR (bvor (bvand 'CR (bvnot (bvshl #x0000000f (bvmul ((_ zero_extend 29) #b000) #x00000004)))) (bvshl ((_ zero_extend 28) (concat (ite (bvslt (bvxor rS rB) #x00000000) #b100 (ite (bvsgt (bvxor rS rB) #x00000000) #b010 #b001)) ((_ extract 0 0) 'XER))) (bvmul ((_ zero_extend 29) #b000) #x00000004)))) (rA (bvxor rS rB)) ('IP (bvadd 'IP #x00000004)))))s-cargot-0.1.6.0/test/med2-sample.sexp0000644000000000000000000001647407346545000015566 0ustar0000000000000000((operands ((rD . 'GPR) (setcc . 'Cc_out) (predBits . 'Pred) (rM . 'GPR) (rN . 'GPR))) (in (setcc rN rM 'CPSR 'PC)) (defs (('PC (ite ((_ call "arm.is_r15") rD) (ite (bveq #b0 ((_ extract 0 0) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))))) (bvand #xfffffffe ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001)))) (ite (bveq #b0 ((_ extract 1 1) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))))) (bvand #xfffffffd ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001)))) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))))) (bvadd 'PC #x00000004))) ('CPSR (ite (ite (andp (bveq #b1 ((_ extract 0 0) predBits)) (bvne predBits #xf)) (notp (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite (andp (bveq setcc #b1) (notp ((_ call "arm.is_r15") rD))) (concat (concat ((_ extract 31 31) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001)))) (concat (ite (bveq ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))) #x00000000) #b1 #b0) (concat ((_ extract 32 32) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))) (bvand ((_ extract 31 31) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001)))) ((_ extract 32 32) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))))))) ((_ extract 27 0) (ite ((_ call "arm.is_r15") rD) (ite (bveq #b0 ((_ extract 0 0) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))))) (bvand #xfeffffff (bvor #x00000020 'CPSR)) 'CPSR) 'CPSR))) (ite ((_ call "arm.is_r15") rD) (ite (bveq #b0 ((_ extract 0 0) ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001))))) (bvand #xfeffffff (bvor #x00000020 'CPSR)) 'CPSR) 'CPSR)) 'CPSR)) (rD (ite (ite (andp (bveq #b1 ((_ extract 0 0) predBits)) (bvne predBits #xf)) (notp (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite (bveq ((_ extract 3 1) predBits) #b000) (bveq #b1 ((_ extract 30 30) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b001) (bveq #b1 ((_ extract 29 29) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b010) (bveq #b1 ((_ extract 31 31) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b011) (bveq #b1 ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b100) (andp (bveq #b1 ((_ extract 29 29) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (ite (bveq ((_ extract 3 1) predBits) #b101) (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (ite (bveq ((_ extract 3 1) predBits) #b110) (andp (bveq ((_ extract 31 31) 'CPSR) ((_ extract 28 28) 'CPSR)) (notp (bveq #b1 ((_ extract 30 30) 'CPSR)))) (bveq #b0 #b0))))))))) (ite ((_ call "arm.is_r15") rD) rD ((_ extract 31 0) (bvadd (bvadd ((_ zero_extend 1) rN) ((_ zero_extend 1) (bvnot rM))) ((_ zero_extend 1) #x00000001)))) rD))))) s-cargot-0.1.6.0/test/small-sample.sexp0000644000000000000000000000014407346545000016032 0ustar0000000000000000((operands ((rT . 'Gprc) (rA . 'Gprc))) (in (rA 'IP)) (defs ((rT rA) ('IP (bvadd 'IP #x00000004)))))