hsemail-1.7.2/0000755000000000000000000000000011764377107011346 5ustar0000000000000000hsemail-1.7.2/LICENSE0000644000000000000000000000260111764377107012352 0ustar0000000000000000Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. hsemail-1.7.2/hsemail.cabal0000644000000000000000000000236611764377107013763 0ustar0000000000000000Name: hsemail Version: 1.7.2 Copyright: (c) 2012 Peter Simons License: BSD3 License-File: LICENSE Author: Peter Simons , Gero Kriependorf , Marty Pauley Maintainer: Peter Simons Homepage: http://gitorious.org/hsemail Category: Parsing Synopsis: Internet Message Parsers Description: Parsers for the syntax defined in RFC2821 and 2822 Cabal-Version: >= 1.6 Build-Type: Simple Tested-With: GHC == 7.0.4, GHC == 7.4.1 Extra-Source-Files: example/message-test.hs example/message-test.input example/smtp-test.hs example/smtp-test.input Source-Repository head Type: git Location: git://gitorious.org/hsemail/mainline.git Library Build-Depends: base >= 3 && < 5, mtl, parsec, old-time Exposed-Modules: Text.ParserCombinators.Parsec.Rfc2234 Text.ParserCombinators.Parsec.Rfc2821 Text.ParserCombinators.Parsec.Rfc2822 Ghc-Options: -Wall hsemail-1.7.2/Setup.lhs0000644000000000000000000000017411764377107013160 0ustar0000000000000000#!/usr/bin/env runhaskell > module Main (main) where > > import Distribution.Simple > > main :: IO () > main = defaultMain hsemail-1.7.2/Text/0000755000000000000000000000000011764377107012272 5ustar0000000000000000hsemail-1.7.2/Text/ParserCombinators/0000755000000000000000000000000011764377107015727 5ustar0000000000000000hsemail-1.7.2/Text/ParserCombinators/Parsec/0000755000000000000000000000000011764377107017144 5ustar0000000000000000hsemail-1.7.2/Text/ParserCombinators/Parsec/Rfc2821.hs0000644000000000000000000003764411764377107020545 0ustar0000000000000000{- | Module : Text.ParserCombinators.Parsec.Rfc2821 Copyright : (c) 2012 Peter Simons License : BSD3 Maintainer : simons@cryp.to Stability : provisional Portability : portable This module exports parser combinators for the grammar described in RFC2821, \"Simple Mail Transfer Protocol\", . -} module Text.ParserCombinators.Parsec.Rfc2821 where import Control.Exception ( assert ) import Control.Monad.State import Text.ParserCombinators.Parsec import Data.List ( intercalate ) import Data.Char ( toLower ) import Text.ParserCombinators.Parsec.Rfc2234 ---------------------------------------------------------------------- -- * ESMTP State Machine ---------------------------------------------------------------------- data SessionState = Unknown | HaveHelo | HaveMailFrom | HaveRcptTo | HaveData | HaveQuit deriving (Enum, Bounded, Eq, Ord, Show) data Event = Greeting -- ^ reserved for the user | SayHelo String | SayHeloAgain String | SayEhlo String | SayEhloAgain String | SetMailFrom Mailbox | AddRcptTo Mailbox | StartData | Deliver -- ^ reserved for the user | NeedHeloFirst | NeedMailFromFirst | NeedRcptToFirst | NotImplemened -- ^ 'Turn', 'Send', 'Soml', 'Saml', 'Vrfy', and 'Expn'. | ResetState | SayOK -- ^ Triggered in case of 'Noop' or when 'Rset' is -- used before we even have a state. | SeeksHelp String -- ^ The parameter may be @[]@. | Shutdown | SyntaxErrorIn String | Unrecognized String deriving (Eq, Show) type SmtpdFSM = Control.Monad.State.State SessionState Event -- |Parse a line of SMTP dialogue and run 'handleSmtpCmd' to -- determine the 'Event'. In case of syntax errors, -- 'SyntaxErrorIn' or 'Unrecognized' will be returned. -- Inputs must be terminated with 'crlf'. See 'fixCRLF'. smtpdFSM :: String -> SmtpdFSM smtpdFSM str = either (\_ -> return (Unrecognized str)) (handleSmtpCmd) (parse smtpCmd "" str) -- |For those who want to parse the 'SmtpCmd' themselves. -- Calling this function in 'HaveQuit' or 'HaveData' will -- fail an assertion. If 'assert' is disabled, it will -- return respectively 'Shutdown' and 'StartData' again. handleSmtpCmd :: SmtpCmd -> SmtpdFSM handleSmtpCmd cmd = get >>= \st -> match st cmd where match :: SessionState -> SmtpCmd -> SmtpdFSM match HaveQuit _ = assert False (event Shutdown) match HaveData _ = assert False (trans (HaveData, StartData)) match _ (WrongArg c _) = event (SyntaxErrorIn c) match _ Quit = trans (HaveQuit, Shutdown) match _ Noop = event SayOK match _ Turn = event NotImplemened match _ (Send _) = event NotImplemened match _ (Soml _) = event NotImplemened match _ (Saml _) = event NotImplemened match _ (Vrfy _) = event NotImplemened match _ (Expn _) = event NotImplemened match _ (Help x) = event (SeeksHelp x) match Unknown Rset = event SayOK match HaveHelo Rset = event SayOK match _ Rset = trans (HaveHelo, ResetState) match Unknown (Helo x) = trans (HaveHelo, SayHelo x) match _ (Helo x) = trans (HaveHelo, SayHeloAgain x) match Unknown (Ehlo x) = trans (HaveHelo, SayEhlo x) match _ (Ehlo x) = trans (HaveHelo, SayEhloAgain x) match Unknown (MailFrom _) = event NeedHeloFirst match _ (MailFrom x) = trans (HaveMailFrom, SetMailFrom x) match Unknown (RcptTo _) = event NeedHeloFirst match HaveHelo (RcptTo _) = event NeedMailFromFirst match _ (RcptTo x) = trans (HaveRcptTo, AddRcptTo x) match Unknown Data = event NeedHeloFirst match HaveHelo Data = event NeedMailFromFirst match HaveMailFrom Data = event NeedRcptToFirst match HaveRcptTo Data = trans (HaveData, StartData) event :: Event -> SmtpdFSM event = return trans :: (SessionState, Event) -> SmtpdFSM trans (st,e) = put st >> event e ---------------------------------------------------------------------- -- * Data Types for SMTP Commands ---------------------------------------------------------------------- -- |The 'smtpCmd' parser will create this data type from a -- string. Note that /all/ command parsers expect their -- input to be terminated with 'crlf'. data SmtpCmd = Helo String | Ehlo String | MailFrom Mailbox -- ^ Might be 'nullPath'. | RcptTo Mailbox -- ^ Might be 'postmaster'. | Data | Rset | Send Mailbox | Soml Mailbox | Saml Mailbox | Vrfy String | Expn String | Help String -- ^ Might be @[]@. | Noop -- ^ Optional argument ignored. | Quit | Turn | WrongArg String ParseError -- ^ When a valid command has been recognized, but the -- argument parser fails, then this type will be -- returned. The 'String' contains the name of the -- command (in all upper-case) and the 'ParseError' -- is, obviously, the error description. instance Show SmtpCmd where show (Helo str) = "HELO " ++ str show (Ehlo str) = "EHLO " ++ str show (MailFrom mbox) = "MAIL FROM:" ++ show mbox show (RcptTo mbox) = "RCPT TO:" ++ show mbox show (Data) = "DATA" show (Rset) = "RSET" show (Send mbox) = "SEND " ++ show mbox show (Soml mbox) = "SOML " ++ show mbox show (Saml mbox) = "SAML " ++ show mbox show (Vrfy str) = "VRFY " ++ str show (Expn str) = "EXPN " ++ str show (Noop) = "NOOP" show (Quit) = "QUIT" show (Turn) = "TURN" show (Help t) | t == [] = "HELP" | otherwise = "HELP " ++ t show (WrongArg str _) = "Syntax error in argument of " ++ str ++ "." -- |The most general e-mail address has the form: -- @\<[\@route,...:]user\@domain\>@. This type, too, -- supports 'show' and 'read'. Note that a \"shown\" address -- is /always/ enclosed in angular brackets. When comparing -- two mailboxes for equality, the hostname is case-insensitive. data Mailbox = Mailbox [String] String String instance Eq Mailbox where lhs == rhs = norm lhs == norm rhs where norm (Mailbox rt lp hp) = (rt, lp, map toLower hp) instance Show Mailbox where show (Mailbox [] [] []) = "<>" show (Mailbox [] "postmaster" []) = "" show (Mailbox p u d) = let route = intercalate "," . map ((:) '@') $ p mbox = u ++ "@" ++ d in if null route then "<" ++ mbox ++ ">" else "<" ++ route ++ ":" ++ mbox ++ ">" instance Read Mailbox where readsPrec _ = parsec2read (path <|> mailbox) readList = error "reading [Mailbox] is not supported" -- |@nullPath@ @=@ @'Mailbox' [] \"\" \"\" = \"\<\>\"@ nullPath :: Mailbox nullPath = Mailbox [] [] [] -- |@postmaster@ @=@ @'Mailbox' [] \"postmaster\" \"\" = \"\\"@ postmaster :: Mailbox postmaster = Mailbox [] "postmaster" [] ---------------------------------------------------------------------- -- * Data Types for SMTP Replies ---------------------------------------------------------------------- -- |An SMTP reply is a three-digit return code plus some -- waste of bandwidth called \"comments\". This is what the -- list of strings is for; one string per line in the reply. -- 'show' will append an \"@\\r\\n@\" end-of-line marker to -- each entry in that list, so that the resulting string is -- ready to be sent back to the peer. -- -- Here is an example: -- -- > *Rfc2821> print $ Reply (Code Success MailSystem 0) -- > ["worked", "like", "a charm" ] -- > 250-worked -- > 250-like -- > 250 a charm -- -- If the message is @[]@, you'll get a really helpful -- default text. data SmtpReply = Reply SmtpCode [String] data SmtpCode = Code SuccessCode Category Int data SuccessCode = Unused0 | PreliminarySuccess | Success | IntermediateSuccess | TransientFailure | PermanentFailure deriving (Enum, Bounded, Eq, Ord, Show) data Category = Syntax | Information | Connection | Unspecified3 | Unspecified4 | MailSystem deriving (Enum, Bounded, Eq, Ord, Show) instance Show SmtpReply where show (Reply c@(Code suc cat _) []) = let msg = show suc ++ " in category " ++ show cat in show $ Reply c [msg] show (Reply code msg) = let prefixCon = show code ++ "-" prefixEnd = show code ++ " " fmt p l = p ++ l ++ "\r\n" (x:xs) = reverse msg msgCon = map (fmt prefixCon) xs msgEnd = fmt prefixEnd x msg' = reverse (msgEnd:msgCon) in concat msg' instance Show SmtpCode where show (Code suc cat n) = assert (n >= 0 && n <= 9) $ (show . fromEnum) suc ++ (show . fromEnum) cat ++ show n -- |Construct a 'Reply'. Fails 'assert' if invalid numbers -- are given. reply :: Int -> Int -> Int -> [String] -> SmtpReply reply suc c n msg = assert (suc >= 0 && suc <= 5) $ assert (c >= 0 && c <= 5) $ assert (n >= 0 && n <= 9) $ Reply (Code (toEnum suc) (toEnum c) n) msg -- |A reply constitutes \"success\" if the status code is -- any of 'PreliminarySuccess', 'Success', or -- 'IntermediateSuccess'. isSuccess :: SmtpReply -> Bool isSuccess (Reply (Code PreliminarySuccess _ _) _) = True isSuccess (Reply (Code Success _ _) _) = True isSuccess (Reply (Code IntermediateSuccess _ _) _) = True isSuccess _ = False -- |A reply constitutes \"failure\" if the status code is -- either 'PermanentFailure' or 'TransientFailure'. isFailure :: SmtpReply -> Bool isFailure (Reply (Code PermanentFailure _ _) _) = True isFailure (Reply (Code TransientFailure _ _) _) = True isFailure _ = False -- |The replies @221@ and @421@ signify 'Shutdown'. isShutdown :: SmtpReply -> Bool isShutdown (Reply (Code Success Connection 1) _) = True isShutdown (Reply (Code TransientFailure Connection 1) _) = True isShutdown _ = False ---------------------------------------------------------------------- -- * Command Parsers ---------------------------------------------------------------------- -- |The SMTP parsers defined here correspond to the commands -- specified in RFC2821, so I won't document them -- individually. type SmtpParser st = CharParser st SmtpCmd -- |This parser recognizes any of the SMTP commands defined -- below. Note that /all/ command parsers expect their input -- to be terminated with 'crlf'. smtpCmd :: SmtpParser st smtpCmd = choice [ smtpData, rset, noop, quit, turn , helo, mail, rcpt, send, soml, saml , vrfy, expn, help, ehlo ] -- |The parser name \"data\" was taken. smtpData :: SmtpParser st rset, quit, turn, helo, ehlo, mail :: SmtpParser st rcpt, send, soml, saml, vrfy, expn :: SmtpParser st help :: SmtpParser st -- |May have an optional 'word' argument, but it is ignored. noop :: SmtpParser st smtpData = mkCmd0 "DATA" Data rset = mkCmd0 "RSET" Rset quit = mkCmd0 "QUIT" Quit turn = mkCmd0 "TURN" Turn helo = mkCmd1 "HELO" Helo domain ehlo = mkCmd1 "EHLO" Ehlo domain mail = mkCmd1 "MAIL" MailFrom from_path rcpt = mkCmd1 "RCPT" RcptTo to_path send = mkCmd1 "SEND" Send from_path soml = mkCmd1 "SOML" Soml from_path saml = mkCmd1 "SAML" Saml from_path vrfy = mkCmd1 "VRFY" Vrfy word expn = mkCmd1 "EXPN" Expn word help = try (mkCmd0 "HELP" (Help [])) <|> mkCmd1 "HELP" Help (option [] word) noop = try (mkCmd0 "NOOP" Noop) <|> mkCmd1 "NOOP" (\_ -> Noop) (option [] word) ---------------------------------------------------------------------- -- * Argument Parsers ---------------------------------------------------------------------- from_path :: CharParser st Mailbox from_path = do caseString "from:" (try (string "<>" >> return nullPath) <|> path) "from-path" to_path :: CharParser st Mailbox to_path = do caseString "to:" (try (caseString "" >> return postmaster) <|> path) "to-path" path :: CharParser st Mailbox path = between (char '<') (char '>') (p "path") where p = do r1 <- option [] (a_d_l >>= \r -> char ':' >> return r) (Mailbox _ l d) <- mailbox return (Mailbox r1 l d) mailbox :: CharParser st Mailbox mailbox = p "mailbox" where p = do r1 <- local_part _ <- char '@' r2 <- domain return (Mailbox [] r1 r2) local_part :: CharParser st String local_part = (dot_string <|> quoted_string) "local-part" domain :: CharParser st String domain = choice [ tokenList subdomain '.' "domain" , address_literal "address literal" ] a_d_l :: CharParser st [String] a_d_l = sepBy1 at_domain (char ',') "route-list" at_domain :: CharParser st String at_domain = (char '@' >> domain) "at-domain" -- |/TODO/: Add IPv6 address and general literals address_literal :: CharParser st String address_literal = ipv4_literal "IPv4 address literal" ipv4_literal :: CharParser st String ipv4_literal = do rs <- between (char '[') (char ']') ipv4addr return ('[': reverse (']': reverse rs)) ipv4addr :: CharParser st String ipv4addr = p "IPv4 address literal" where p = do r1 <- snum r2 <- char '.' >> snum r3 <- char '.' >> snum r4 <- char '.' >> snum return (r1 ++ "." ++ r2 ++ "." ++ r3 ++ "." ++ r4) subdomain :: CharParser st String subdomain = p "domain name" where p = do r <- many1 (alpha <|> digit <|> char '-') if last r == '-' then fail "subdomain must not end with hyphen" else return r dot_string :: CharParser st String dot_string = tokenList atom '.' "dot_string" atom :: CharParser a String atom = many1 atext "atom" where atext = alpha <|> digit <|> oneOf "!#$%&'*+-/=?^_`{|}~" snum :: CharParser st String snum = do r <- manyNtoM 1 3 digit if (read r :: Int) > 255 then fail "IP address parts must be 0 <= x <= 255" else return r number :: CharParser st String number = many1 digit -- |This is a useful addition: The parser accepts an 'atom' -- or a 'quoted_string'. word :: CharParser st String word = (atom <|> fmap show quoted_string) "word or quoted-string" ---------------------------------------------------------------------- -- * Helper Functions ---------------------------------------------------------------------- -- |Make the string 'crlf' terminated no matter what. -- \'@\\n@\' is expanded, otherwise 'crlf' is appended. Note -- that if the string was terminated incorrectly before, it -- still is. This function is useful when reading input with -- 'System.IO.hGetLine' which removes the end-of-line -- delimiter. fixCRLF :: String -> String fixCRLF ('\r' :'\n':[]) = fixCRLF [] fixCRLF ( x :'\n':[]) = x : fixCRLF [] fixCRLF ( x : xs ) = x : fixCRLF xs fixCRLF [ ] = "\r\n" -- |Construct a parser for a command without arguments. -- Expects 'crlf'! mkCmd0 :: String -> a -> CharParser st a mkCmd0 str cons = (do try (caseString str) _ <- skipMany wsp >> crlf return cons) str -- |Construct a parser for a command with an argument, which -- the given parser will handle. The result of the argument -- parser will be applied to the type constructor before it -- is returned. Expects 'crlf'! mkCmd1 :: String -> (a -> SmtpCmd) -> CharParser st a -> CharParser st SmtpCmd mkCmd1 str cons p = do try (caseString str) _ <- wsp input <- getInput st <- getState let eol = skipMany wsp >> crlf p' = between (many wsp) eol p str r = runParser p' st "" input case r of Left e -> return (WrongArg str e) Right a -> return (cons a) -- @tokenList p '.'@ will parse a token of the form -- \"@p.p@\", or \"@p.p.p@\", and so on. Used in 'domain' -- and 'dot_string', for example. tokenList :: CharParser st String -> Char -> CharParser st String tokenList p c = fmap (intercalate [c]) (sepBy1 p (char c)) hsemail-1.7.2/Text/ParserCombinators/Parsec/Rfc2234.hs0000644000000000000000000001340611764377107020531 0ustar0000000000000000{- | Module : Text.ParserCombinators.Parsec.Rfc2234 Copyright : (c) 2012 Peter Simons License : BSD3 Maintainer : simons@cryp.to Stability : provisional Portability : portable This module provides parsers for the grammar defined in RFC2234, \"Augmented BNF for Syntax Specifications: ABNF\", . The terminal called @char@ in the RFC is called 'character' here to avoid conflicts with Parsec's 'char' function. -} module Text.ParserCombinators.Parsec.Rfc2234 where import Text.ParserCombinators.Parsec import Data.Char ( toUpper, chr, ord ) import Control.Monad ( liftM2 ) ---------------------------------------------------------------------- -- * Parser Combinators ---------------------------------------------------------------------- -- |Case-insensitive variant of Parsec's 'char' function. caseChar :: Char -> CharParser st Char caseChar c = satisfy (\x -> toUpper x == toUpper c) -- |Case-insensitive variant of Parsec's 'string' function. caseString :: String -> CharParser st () caseString cs = mapM_ caseChar cs cs -- |Match a parser at least @n@ times. manyN :: Int -> GenParser a b c -> GenParser a b [c] manyN n p | n <= 0 = return [] | otherwise = liftM2 (++) (count n p) (many p) -- |Match a parser at least @n@ times, but no more than @m@ times. manyNtoM :: Int -> Int -> GenParser a b c -> GenParser a b [c] manyNtoM n m p | n < 0 = return [] | n > m = return [] | n == m = count n p | n == 0 = foldr (<|>) (return []) (map (\x -> try $ count x p) (reverse [1..m])) | otherwise = liftM2 (++) (count n p) (manyNtoM 0 (m-n) p) -- |Helper function to generate 'Parser'-based instances for -- the 'Read' class. parsec2read :: Parser a -> String -> [(a, String)] parsec2read f x = either (error . show) id (parse f' "" x) where f' = do { a <- f; res <- getInput; return [(a,res)] } ---------------------------------------------------------------------- -- * Primitive Parsers ---------------------------------------------------------------------- -- |Match any character of the alphabet. alpha :: CharParser st Char alpha = satisfy (\c -> c `elem` (['A'..'Z'] ++ ['a'..'z'])) "alphabetic character" -- |Match either \"1\" or \"0\". bit :: CharParser st Char bit = oneOf "01" "bit ('0' or '1')" -- |Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is). character :: CharParser st Char character = satisfy (\c -> (c >= chr 1) && (c <= chr 127)) "7-bit character excluding NUL" -- |Match the carriage return character @\\r@. cr :: CharParser st Char cr = char '\r' "carriage return" -- |Match returns the linefeed character @\\n@. lf :: CharParser st Char lf = char '\n' "linefeed" -- |Match the Internet newline @\\r\\n@. crlf :: CharParser st String crlf = do c <- cr l <- lf return [c,l] "carriage return followed by linefeed" -- |Match any US-ASCII control character. That is -- any character with a decimal value in the range of [0..31,127]. ctl :: CharParser st Char ctl = satisfy (\c -> ord c `elem` ([0..31] ++ [127])) "control character" -- |Match the double quote character \"@\"@\". dquote :: CharParser st Char dquote = char (chr 34) "double quote" -- |Match any character that is valid in a hexadecimal number; -- [\'0\'..\'9\'] and [\'A\'..\'F\',\'a\'..\'f\'] that is. hexdig :: CharParser st Char hexdig = hexDigit "hexadecimal digit" -- |Match the tab (\"@\\t@\") character. htab :: CharParser st Char htab = char '\t' "horizontal tab" -- |Match \"linear white-space\". That is any number of consecutive -- 'wsp', optionally followed by a 'crlf' and (at least) one more -- 'wsp'. lwsp :: CharParser st String lwsp = do r <- choice [ many1 wsp , try (liftM2 (++) crlf (many1 wsp)) ] rs <- option [] lwsp return (r ++ rs) "linear white-space" -- |Match /any/ character. octet :: CharParser st Char octet = anyChar "any 8-bit character" -- |Match the space. sp :: CharParser st Char sp = char ' ' "space" -- |Match any printable ASCII character. (The \"v\" stands for -- \"visible\".) That is any character in the decimal range of -- [33..126]. vchar :: CharParser st Char vchar = satisfy (\c -> (c >= chr 33) && (c <= chr 126)) "printable character" -- |Match either 'sp' or 'htab'. wsp :: CharParser st Char wsp = sp <|> htab "white-space" -- ** Useful additions -- |Match a \"quoted pair\". Any characters (excluding CR and -- LF) may be quoted. quoted_pair :: CharParser st String quoted_pair = do _ <- char '\\' r <- noneOf "\r\n" return ['\\',r] "quoted pair" -- |Match a quoted string. The specials \"@\\@\" and -- \"@\"@\" must be escaped inside a quoted string; CR and -- LF are not allowed at all. quoted_string :: CharParser st String quoted_string = do _ <- dquote r <- many qcont _ <- dquote return ("\"" ++ concat r ++ "\"") "quoted string" where qtext = noneOf "\\\"\r\n" qcont = many1 qtext <|> quoted_pair hsemail-1.7.2/Text/ParserCombinators/Parsec/Rfc2822.hs0000644000000000000000000015123011764377107020532 0ustar0000000000000000{- | Module : Text.ParserCombinators.Parsec.Rfc2822 Copyright : (c) 2012 Peter Simons License : BSD3 Maintainer : simons@cryp.to Stability : provisional Portability : portable This module provides parsers for the grammar defined in RFC2822, \"Internet Message Format\", . /Please note:/ The module is not particularly well tested. -} module Text.ParserCombinators.Parsec.Rfc2822 where import System.Time import Data.Char ( ord ) import Data.List ( intercalate ) import Control.Monad ( liftM ) import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Rfc2234 hiding ( quoted_pair, quoted_string ) -- * Useful parser combinators -- |Return @Nothing@ if the given parser doesn't match. This -- combinator is included in the latest parsec distribution as -- @optionMaybe@, but ghc-6.6.1 apparently doesn't have it. maybeOption :: GenParser tok st a -> GenParser tok st (Maybe a) maybeOption p = option Nothing (liftM Just p) -- |@unfold@ @=@ @between (optional cfws) (optional cfws)@ unfold :: CharParser a b -> CharParser a b unfold = between (optional cfws) (optional cfws) -- |Construct a parser for a message header line from the -- header's name and a parser for the body. header :: String -> CharParser a b -> CharParser a b header n p = let nameString = caseString (n ++ ":") in between nameString crlf p (n ++ " header line") -- |Like 'header', but allows the obsolete white-space rules. obs_header :: String -> CharParser a b -> CharParser a b obs_header n p = let nameString = caseString n >> many wsp >> char ':' in between nameString crlf p ("obsolete " ++ n ++ " header line") -- ** Primitive Tokens (section 3.2.1) -- |Match any US-ASCII non-whitespace control character. no_ws_ctl :: CharParser a Char no_ws_ctl = satisfy (\c -> ord c `elem` ([1..8] ++ [11,12] ++ [14..31] ++ [127])) "US-ASCII non-whitespace control character" -- |Match any US-ASCII character except for @\r@, @\n@. text :: CharParser a Char text = satisfy (\c -> ord c `elem` ([1..9] ++ [11,12] ++ [14..127])) "US-ASCII character (excluding CR and LF)" -- |Match any of the RFC's \"special\" characters: @()\<\>[]:;\@,.\\\"@. specials :: CharParser a Char specials = oneOf "()<>[]:;@,.\\\"" "one of ()<>[]:;@,.\\\"" -- ** Quoted characters (section 3.2.2) -- |Match a \"quoted pair\". All characters matched by 'text' may be -- quoted. Note that the parsers returns /both/ characters, the -- backslash and the actual content. quoted_pair :: CharParser a String quoted_pair = do { _ <- char '\\'; r <- text; return ['\\',r] } "quoted pair" -- ** Folding white space and comments (section 3.2.3) -- |Match \"folding whitespace\". That is any combination of 'wsp' and -- 'crlf' followed by 'wsp'. fws :: CharParser a String fws = do r <- many1 $ choice [ blanks, linebreak] return (concat r) where blanks = many1 wsp linebreak = try $ do { r1 <- crlf; r2 <- blanks; return (r1 ++ r2) } -- |Match any non-whitespace, non-control character except for \"@(@\", -- \"@)@\", and \"@\\@\". This is used to describe the legal content of -- 'comment's. -- -- /Note/: This parser accepts 8-bit characters, even though this is -- not legal according to the RFC. Unfortunately, 8-bit content in -- comments has become fairly common in the real world, so we'll just -- accept the fact. ctext :: CharParser a Char ctext = no_ws_ctl <|> satisfy (\c -> ord c `elem` ([33..39] ++ [42..91] ++ [93..126] ++ [128..255])) "any regular character (excluding '(', ')', and '\\')" -- |Match a \"comments\". That is any combination of 'ctext', -- 'quoted_pair's, and 'fws' between brackets. Comments may nest. comment :: CharParser a String comment = do _ <- char '(' r1 <- many ccontent r2 <- option [] fws _ <- char ')' return ("(" ++ concat r1 ++ r2 ++ ")") "comment" where ccontent = try $ do r1 <- option [] fws r2 <- choice [many1 ctext, quoted_pair, comment] return (r1 ++ r2) -- |Match any combination of 'fws' and 'comments'. cfws :: CharParser a String cfws = do r <- many1 $ choice [ fws, comment ] return (concat r) -- ** Atom (section 3.2.4) -- |Match any US-ASCII character except for control characters, -- 'specials', or space. 'atom' and 'dot_atom' are made up of this. atext :: CharParser a Char atext = alpha <|> digit <|> oneOf "!#$%&'*+-/=?^_`{|}~" "US-ASCII character (excluding controls, space, and specials)" -- |Match one or more 'atext' characters and skip any preceeding or -- trailing 'cfws'. atom :: CharParser a String atom = unfold (many1 atext "atom") -- |Match 'dot_atom_text' and skip any preceeding or trailing 'cfws'. dot_atom :: CharParser a String dot_atom = unfold (dot_atom_text "dot atom") -- |Match two or more 'atext's interspersed by dots. dot_atom_text :: CharParser a String dot_atom_text = fmap (intercalate ".") (sepBy1 (many1 atext) (char '.')) "dot atom content" -- ** Quoted strings (section 3.2.5) -- |Match any non-whitespace, non-control US-ASCII character except -- for \"@\\@\" and \"@\"@\". qtext :: CharParser a Char qtext = no_ws_ctl <|> satisfy (\c -> ord c `elem` ([33] ++ [35..91] ++ [93..126])) "US-ASCII character (excluding '\\', and '\"')" -- |Match either 'qtext' or 'quoted_pair'. qcontent :: CharParser a String qcontent = many1 qtext <|> quoted_pair "quoted string content" -- |Match any number of 'qcontent' between double quotes. Any 'cfws' -- preceeding or following the \"atom\" is skipped automatically. quoted_string :: CharParser a String quoted_string = unfold (do _ <- dquote r1 <- many (do r1 <- option [] fws r2 <- qcontent return (r1 ++ r2)) r2 <- option [] fws _ <- dquote return ("\"" ++ concat r1 ++ r2 ++ "\"")) "quoted string" -- * Miscellaneous tokens (section 3.2.6) -- |Match either 'atom' or 'quoted_string'. word :: CharParser a String word = unfold (atom <|> quoted_string) "word" -- |Match either one or more 'word's or an 'obs_phrase'. phrase :: CharParser a [String] phrase = {- many1 word "phrase" <|> -} obs_phrase -- |Match any non-whitespace, non-control US-ASCII character except -- for \"@\\@\" and \"@\"@\". utext :: CharParser a Char utext = no_ws_ctl <|> satisfy (\c -> ord c `elem` [33..126]) "regular US-ASCII character (excluding '\\', and '\"')" -- |Match any number of 'utext' tokens. -- -- \"Unstructured text\" is used in free text fields such as 'subject'. -- Please note that any comments or whitespace that prefaces or -- follows the actual 'utext' is /included/ in the returned string. unstructured :: CharParser a String unstructured = do r1 <- option [] fws r2 <- many (do r3 <- utext r4 <- option [] fws return (r3 : r4)) return (r1 ++ concat r2) "unstructured text" -- * Date and Time Specification (section 3.3) -- |Parse a date and time specification of the form -- -- > Thu, 19 Dec 2002 20:35:46 +0200 -- -- where the weekday specification \"@Thu,@\" is optional. The parser -- returns a 'CalendarTime', which is set to the appropriate values. -- Note, though, that not all fields of 'CalendarTime' will -- necessarily be set correctly! Obviously, when no weekday has been -- provided, the parser will set this field to 'Monday' - regardless -- of whether the day actually is a monday or not. Similarly, the day -- of the year will always be returned as @0@. The timezone name will -- always be empty: @\"\"@. -- -- Nor will the 'date_time' parser perform /any/ consistency checking. -- It will accept -- -- > 40 Apr 2002 13:12 +0100 -- -- as a perfectly valid date. -- -- In order to get all fields set to meaningful values, and in order -- to verify the date's consistency, you will have to feed it into any -- of the conversion routines provided in "System.Time", such as -- 'toClockTime'. (When doing this, keep in mind that most functions -- return /local time/. This will not necessarily be the time you're -- expecting.) date_time :: CharParser a CalendarTime date_time = do wd <- option Monday (try (do wd <- day_of_week _ <- char ',' return wd)) (y,m,d) <- date _ <- fws (td,z) <- time optional cfws return (CalendarTime y m d (tdHour td) (tdMin td) (tdSec td) 0 wd 0 "" z False) "date/time specification" -- |This parser will match a 'day_name', optionally wrapped in folding -- whitespace, or an 'obs_day_of_week' and return it's 'Day' value. day_of_week :: CharParser a Day day_of_week = try (between (optional fws) (optional fws) day_name "name of a day-of-the-week") <|> obs_day_of_week -- |This parser will the abbreviated weekday names (\"@Mon@\", \"@Tue@\", ...) -- and return the appropriate 'Day' value. day_name :: CharParser a Day day_name = do { caseString "Mon"; return Monday } <|> do { try (caseString "Tue"); return Tuesday } <|> do { caseString "Wed"; return Wednesday } <|> do { caseString "Thu"; return Thursday } <|> do { caseString "Fri"; return Friday } <|> do { try (caseString "Sat"); return Saturday } <|> do { caseString "Sun"; return Sunday } "name of a day-of-the-week" -- |This parser will match a date of the form \"@dd:mm:yyyy@\" and return -- a tripple of the form (Int,Month,Int) - corresponding to -- (year,month,day). date :: CharParser a (Int,Month,Int) date = do d <- day m <- month y <- year return (y,m,d) "date specification" -- |This parser will match a four digit number and return it's integer -- value. No range checking is performed. year :: CharParser a Int year = do y <- manyN 4 digit return (read y :: Int) "year" -- |This parser will match a 'month_name', optionally wrapped in -- folding whitespace, or an 'obs_month' and return it's 'Month' -- value. month :: CharParser a Month month = try (between (optional fws) (optional fws) month_name "month name") <|> obs_month -- |This parser will the abbreviated month names (\"@Jan@\", \"@Feb@\", ...) -- and return the appropriate 'Month' value. month_name :: CharParser a Month month_name = do { try (caseString "Jan"); return January } <|> do { caseString "Feb"; return February } <|> do { try (caseString "Mar"); return March } <|> do { try (caseString "Apr"); return April } <|> do { caseString "May"; return May } <|> do { try (caseString "Jun"); return June } <|> do { caseString "Jul"; return July } <|> do { caseString "Aug"; return August } <|> do { caseString "Sep"; return September } <|> do { caseString "Oct"; return October } <|> do { caseString "Nov"; return November } <|> do { caseString "Dec"; return December } "month name" -- |Match either an 'obs_day', or a one or two digit number and return it. day :: CharParser a Int day = try (do { optional fws; r <- manyNtoM 1 2 digit; return (read r :: Int) }) <|> obs_day "day" -- |This parser will match a 'time_of_day' specification followed by a -- 'zone'. It returns the tuple (TimeDiff,Int) corresponding to the -- return values of either parser. time :: CharParser a (TimeDiff,Int) time = do t <- time_of_day _ <- fws z <- zone return (t,z) "time and zone specification" -- |This parser will match a time-of-day specification of \"@hh:mm@\" or -- \"@hh:mm:ss@\" and return the corrsponding time as a 'TimeDiff'. time_of_day :: CharParser a TimeDiff time_of_day = do h <- hour _ <- char ':' m <- minute s <- option 0 (do { _ <- char ':'; second } ) return (TimeDiff 0 0 0 h m s 0) "time specification" -- |This parser will match a two-digit number and return it's integer -- value. No range checking is performed. hour :: CharParser a Int hour = do r <- count 2 digit return (read r :: Int) "hour" -- |This parser will match a two-digit number and return it's integer -- value. No range checking is performed. minute :: CharParser a Int minute = do r <- count 2 digit return (read r :: Int) "minute" -- |This parser will match a two-digit number and return it's integer -- value. No range checking takes place. second :: CharParser a Int second = do r <- count 2 digit return (read r :: Int) "second" -- |This parser will match a timezone specification of the form -- \"@+hhmm@\" or \"@-hhmm@\" and return the zone's offset to UTC in -- seconds as an integer. 'obs_zone' is matched as well. zone :: CharParser a Int zone = ( do _ <- char '+' h <- hour m <- minute return (((h*60)+m)*60) <|> do _ <- char '-' h <- hour m <- minute return (-((h*60)+m)*60) "time zone" ) <|> obs_zone -- * Address Specification (section 3.4) -- |A NameAddr is composed of an optional realname a mandatory -- e-mail 'address'. data NameAddr = NameAddr { nameAddr_name :: Maybe String , nameAddr_addr :: String } deriving (Show,Eq) -- |Parse a single 'mailbox' or an address 'group' and return the -- address(es). address :: CharParser a [NameAddr] address = try (do { r <- mailbox; return [r] }) <|> group "address" -- |Parse a 'name_addr' or an 'addr_spec' and return the -- address. mailbox :: CharParser a NameAddr mailbox = try name_addr <|> fmap (NameAddr Nothing) addr_spec "mailbox" -- |Parse an 'angle_addr', optionally prefaced with a 'display_name', -- and return the address. name_addr :: CharParser a NameAddr name_addr = do name <- maybeOption display_name addr <- angle_addr return (NameAddr name addr) "name address" -- |Parse an 'angle_addr' or an 'obs_angle_addr' and return the address. angle_addr :: CharParser a String angle_addr = try (unfold (do _ <- char '<' r <- addr_spec _ <- char '>' return r) "angle address" ) <|> obs_angle_addr -- |Parse a \"group\" of addresses. That is a 'display_name', followed -- by a colon, optionally followed by a 'mailbox_list', followed by a -- semicolon. The found address(es) are returned - what may be none. -- Here is an example: -- -- > parse group "" "my group: user1@example.org, user2@example.org;" -- -- This input comes out as: -- -- > Right ["user1@example.org","user2@example.org"] group :: CharParser a [NameAddr] group = do _ <- display_name _ <- char ':' r <- option [] mailbox_list _ <- unfold $ char ';' return r "address group" -- |Parse and return a 'phrase'. display_name :: CharParser a String display_name = fmap unwords phrase "display name" -- |Parse a list of 'mailbox' addresses, every two addresses being -- separated by a comma, and return the list of found address(es). mailbox_list :: CharParser a [NameAddr] mailbox_list = sepBy mailbox (char ',') "mailbox list" -- |Parse a list of 'address' addresses, every two addresses being -- separated by a comma, and return the list of found address(es). address_list :: CharParser a [NameAddr] address_list = do { r <-sepBy address (char ','); return (concat r) } "address list" -- ** Addr-spec specification (section 3.4.1) -- |Parse an \"address specification\". That is a 'local_part', followed -- by an \"@\@@\" character, followed by a 'domain'. Return the complete -- address as 'String', ignoring any whitespace or any comments. addr_spec :: CharParser a String addr_spec = do r1 <- local_part _ <- char '@' r2 <- domain return (r1 ++ "@" ++ r2) "address specification" -- |Parse and return a \"local part\" of an 'addr_spec'. That is either -- a 'dot_atom' or a 'quoted_string'. local_part :: CharParser a String local_part = dot_atom <|> quoted_string "address' local part" -- |Parse and return a \"domain part\" of an 'addr_spec'. That is either -- a 'dot_atom' or a 'domain_literal'. domain :: CharParser a String domain = dot_atom <|> domain_literal "address' domain part" -- |Parse a \"domain literal\". That is a \"@[@\" character, followed by -- any amount of 'dcontent', followed by a terminating \"@]@\" -- character. The complete string is returned verbatim. domain_literal :: CharParser a String domain_literal = unfold (do _ <- char '[' r <- many $ do { optional fws; dcontent } optional fws _ <- char ']' return ("[" ++ concat r ++ "]")) "domain literal" -- |Parse and return any characters that are legal in a -- 'domain_literal'. That is 'dtext' or a 'quoted_pair'. dcontent :: CharParser a String dcontent = many1 dtext <|> quoted_pair "domain literal content" -- |Parse and return any ASCII characters except \"@[@\", \"@]@\", and -- \"@\\@\". dtext :: CharParser a Char dtext = no_ws_ctl <|> satisfy (\c -> ord c `elem` ([33..90] ++ [94,127])) "character (excluding '[', ']', and '\\')" -- * Overall message syntax (section 3.5) -- |This data type repesents a parsed Internet Message as defined in -- this RFC. It consists of an arbitrary number of header lines, -- represented in the 'Field' data type, and a message body, which may -- be empty. data GenericMessage a = Message [Field] a deriving Show type Message = GenericMessage String -- |Parse a complete message as defined by this RFC and it broken down -- into the separate header fields and the message body. Header lines, -- which contain syntax errors, will not cause the parser to abort. -- Rather, these headers will appear as 'OptionalField's (which are -- unparsed) in the resulting 'Message'. A message must be really, -- really badly broken for this parser to fail. -- -- This behaviour was chosen because it is impossible to predict what -- the user of this module considers to be a fatal error; -- traditionally, parsers are very forgiving when it comes to Internet -- messages. -- -- If you want to implement a really strict parser, you'll have to put -- the appropriate parser together yourself. You'll find that this is -- rather easy to do. Refer to the 'fields' parser for further details. message :: CharParser a Message message = do f <- fields b <- option [] (do _ <- crlf; body) return (Message f b) -- |This parser will return a message body as specified by this RFC; -- that is basically any number of 'text' characters, which may be -- divided into separate lines by 'crlf'. body :: CharParser a String body = do r1 <- many (try (do line <- many text eol <- crlf return (line ++ eol))) r2 <- many text return (concat r1 ++ r2) -- * Field definitions (section 3.6) -- |This data type represents any of the header fields defined in this -- RFC. Each of the various instances contains with the return value -- of the corresponding parser. data Field = OptionalField String String | From [NameAddr] | Sender NameAddr | ReturnPath String | ReplyTo [NameAddr] | To [NameAddr] | Cc [NameAddr] | Bcc [NameAddr] | MessageID String | InReplyTo [String] | References [String] | Subject String | Comments String | Keywords [[String]] | Date CalendarTime | ResentDate CalendarTime | ResentFrom [NameAddr] | ResentSender NameAddr | ResentTo [NameAddr] | ResentCc [NameAddr] | ResentBcc [NameAddr] | ResentMessageID String | ResentReplyTo [NameAddr] | Received ([(String,String)], CalendarTime) | ObsReceived [(String,String)] deriving (Show) -- |This parser will parse an arbitrary number of header fields as -- defined in this RFC. For each field, an appropriate 'Field' value -- is created, all of them making up the 'Field' list that this parser -- returns. -- -- If you look at the implementation of this parser, you will find -- that it uses Parsec's 'try' modifier around /all/ of the fields. -- The idea behind this is that fields, which contain syntax errors, -- fall back to the catch-all 'optional_field'. Thus, this parser will -- hardly ever return a syntax error -- what conforms with the idea -- that any message that can possibly be accepted /should/ be. fields :: CharParser a [Field] fields = many ( try (do { r <- from; return (From r) }) <|> try (do { r <- sender; return (Sender r) }) <|> try (do { r <- return_path; return (ReturnPath r) }) <|> try (do { r <- reply_to; return (ReplyTo r) }) <|> try (do { r <- to; return (To r) }) <|> try (do { r <- cc; return (Cc r) }) <|> try (do { r <- bcc; return (Bcc r) }) <|> try (do { r <- message_id; return (MessageID r) }) <|> try (do { r <- in_reply_to; return (InReplyTo r) }) <|> try (do { r <- references; return (References r) }) <|> try (do { r <- subject; return (Subject r) }) <|> try (do { r <- comments; return (Comments r) }) <|> try (do { r <- keywords; return (Keywords r) }) <|> try (do { r <- orig_date; return (Date r) }) <|> try (do { r <- resent_date; return (ResentDate r) }) <|> try (do { r <- resent_from; return (ResentFrom r) }) <|> try (do { r <- resent_sender; return (ResentSender r) }) <|> try (do { r <- resent_to; return (ResentTo r) }) <|> try (do { r <- resent_cc; return (ResentCc r) }) <|> try (do { r <- resent_bcc; return (ResentBcc r) }) <|> try (do { r <- resent_msg_id; return (ResentMessageID r) }) <|> try (do { r <- received; return (Received r) }) -- catch all <|> (do { (name,cont) <- optional_field; return (OptionalField name cont) }) ) -- ** The origination date field (section 3.6.1) -- |Parse a \"@Date:@\" header line and return the date it contains a -- 'CalendarTime'. orig_date :: CharParser a CalendarTime orig_date = header "Date" date_time -- ** Originator fields (section 3.6.2) -- |Parse a \"@From:@\" header line and return the 'mailbox_list' -- address(es) contained in it. from :: CharParser a [NameAddr] from = header "From" mailbox_list -- |Parse a \"@Sender:@\" header line and return the 'mailbox' address -- contained in it. sender :: CharParser a NameAddr sender = header "Sender" mailbox -- |Parse a \"@Reply-To:@\" header line and return the 'address_list' -- address(es) contained in it. reply_to :: CharParser a [NameAddr] reply_to = header "Reply-To" address_list -- ** Destination address fields (section 3.6.3) -- |Parse a \"@To:@\" header line and return the 'address_list' -- address(es) contained in it. to :: CharParser a [NameAddr] to = header "To" address_list -- |Parse a \"@Cc:@\" header line and return the 'address_list' -- address(es) contained in it. cc :: CharParser a [NameAddr] cc = header "Cc" address_list -- |Parse a \"@Bcc:@\" header line and return the 'address_list' -- address(es) contained in it. bcc :: CharParser a [NameAddr] bcc = header "Bcc" (try address_list <|> do { optional cfws; return [] }) -- ** Identification fields (section 3.6.4) -- |Parse a \"@Message-Id:@\" header line and return the 'msg_id' -- contained in it. message_id :: CharParser a String message_id = header "Message-ID" msg_id -- |Parse a \"@In-Reply-To:@\" header line and return the list of -- 'msg_id's contained in it. in_reply_to :: CharParser a [String] in_reply_to = header "In-Reply-To" (many1 msg_id) -- |Parse a \"@References:@\" header line and return the list of -- 'msg_id's contained in it. references :: CharParser a [String] references = header "References" (many1 msg_id) -- |Parse a \"@message ID:@\" and return it. A message ID is almost -- identical to an 'angle_addr', but with stricter rules about folding -- and whitespace. msg_id :: CharParser a String msg_id = unfold (do _ <- char '<' idl <- id_left _ <- char '@' idr <- id_right _ <- char '>' return ("<" ++ idl ++ "@" ++ idr ++ ">")) "message ID" -- |Parse a \"left ID\" part of a 'msg_id'. This is almost identical to -- the 'local_part' of an e-mail address, but with stricter rules -- about folding and whitespace. id_left :: CharParser a String id_left = dot_atom_text <|> no_fold_quote "left part of an message ID" -- |Parse a \"right ID\" part of a 'msg_id'. This is almost identical to -- the 'domain' of an e-mail address, but with stricter rules about -- folding and whitespace. id_right :: CharParser a String id_right = dot_atom_text <|> no_fold_literal "right part of an message ID" -- |Parse one or more occurences of 'qtext' or 'quoted_pair' and -- return the concatenated string. This makes up the 'id_left' of a -- 'msg_id'. no_fold_quote :: CharParser a String no_fold_quote = do _ <- dquote r <- many (many1 qtext <|> quoted_pair) _ <- dquote return ("\"" ++ concat r ++ "\"") "non-folding quoted string" -- |Parse one or more occurences of 'dtext' or 'quoted_pair' and -- return the concatenated string. This makes up the 'id_right' of a -- 'msg_id'. no_fold_literal :: CharParser a String no_fold_literal = do _ <- char '[' r <- many (many1 dtext <|> quoted_pair) _ <- char ']' return ("[" ++ concat r ++ "]") "non-folding domain literal" -- ** Informational fields (section 3.6.5) -- |Parse a \"@Subject:@\" header line and return it's contents verbatim. subject :: CharParser a String subject = header "Subject" unstructured -- |Parse a \"@Comments:@\" header line and return it's contents verbatim. comments :: CharParser a String comments = header "Comments" unstructured -- |Parse a \"@Keywords:@\" header line and return the list of 'phrase's -- found. Please not that each phrase is again a list of 'atom's, as -- returned by the 'phrase' parser. keywords :: CharParser a [[String]] keywords = header "Keywords" (do r1 <- phrase r2 <- many (do _ <- char ','; phrase) return (r1:r2)) -- ** Resent fields (section 3.6.6) -- |Parse a \"@Resent-Date:@\" header line and return the date it -- contains as 'CalendarTime'. resent_date :: CharParser a CalendarTime resent_date = header "Resent-Date" date_time -- |Parse a \"@Resent-From:@\" header line and return the 'mailbox_list' -- address(es) contained in it. resent_from :: CharParser a [NameAddr] resent_from = header "Resent-From" mailbox_list -- |Parse a \"@Resent-Sender:@\" header line and return the 'mailbox_list' -- address(es) contained in it. resent_sender :: CharParser a NameAddr resent_sender = header "Resent-Sender" mailbox -- |Parse a \"@Resent-To:@\" header line and return the 'mailbox' -- address contained in it. resent_to :: CharParser a [NameAddr] resent_to = header "Resent-To" address_list -- |Parse a \"@Resent-Cc:@\" header line and return the 'address_list' -- address(es) contained in it. resent_cc :: CharParser a [NameAddr] resent_cc = header "Resent-Cc" address_list -- |Parse a \"@Resent-Bcc:@\" header line and return the 'address_list' -- address(es) contained in it. (This list may be empty.) resent_bcc :: CharParser a [NameAddr] resent_bcc = header "Resent-Bcc" ( try address_list <|> do optional cfws return [] ) "Resent-Bcc: header line" -- |Parse a \"@Resent-Message-ID:@\" header line and return the 'msg_id' -- contained in it. resent_msg_id :: CharParser a String resent_msg_id = header "Resent-Message-ID" msg_id -- ** Trace fields (section 3.6.7) return_path :: CharParser a String return_path = header "Return-Path:" path path :: CharParser a String path = unfold ( do _ <- char '<' r <- choice [ try addr_spec, do { _ <- cfws; return [] } ] _ <- char '>' return ("<" ++ r ++ ">") <|> obs_path ) "return path spec" received :: CharParser a ([(String,String)], CalendarTime) received = header "Received" (do r1 <- name_val_list _ <- char ';' r2 <- date_time return (r1,r2)) name_val_list :: CharParser a [(String,String)] name_val_list = do optional cfws many1 name_val_pair "list of name/value pairs" name_val_pair :: CharParser a (String,String) name_val_pair = do r1 <- item_name _ <- cfws r2 <- item_value return (r1,r2) "a name/value pair" item_name :: CharParser a String item_name = do r1 <- alpha r2 <- many $ choice [ char '-', alpha, digit ] return (r1 : r2) "name of a name/value pair" item_value :: CharParser a String item_value = choice [ try (do { r <- many1 angle_addr; return (concat r) }) , try addr_spec , try domain , msg_id , try atom ] "value of a name/value pair" -- ** Optional fields (section 3.6.8) -- |Parse an arbitrary header field and return a tuple containing the -- 'field_name' and 'unstructured' text of the header. The name will -- /not/ contain the terminating colon. optional_field :: CharParser a (String,String) optional_field = do n <- field_name _ <- char ':' b <- unstructured _ <- crlf return (n,b) "optional (unspecified) header line" -- |Parse and return an arbitrary header field name. That is one or -- more 'ftext' characters. field_name :: CharParser a String field_name = many1 ftext "header line name" -- |Match and return any ASCII character except for control -- characters, whitespace, and \"@:@\". ftext :: CharParser a Char ftext = satisfy (\c -> ord c `elem` ([33..57] ++ [59..126])) "character (excluding controls, space, and ':')" -- * Miscellaneous obsolete tokens (section 4.1) -- |Match the obsolete \"quoted pair\" syntax, which - unlike -- 'quoted_pair' - allowed /any/ ASCII character to be specified when -- quoted. The parser will return both, the backslash and the actual -- character. obs_qp :: CharParser a String obs_qp = do _ <- char '\\' c <- satisfy (\c -> ord c `elem` [0..127]) return ['\\',c] "any quoted US-ASCII character" -- |Match the obsolete \"text\" syntax, which - unlike 'text' - allowed -- \"carriage returns\" and \"linefeeds\". This is really weird; you -- better consult the RFC for details. The parser will return the -- complete string, including those special characters. obs_text :: CharParser a String obs_text = do r1 <- many lf r2 <- many cr r3 <- many (do r4 <- obs_char r5 <- many lf r6 <- many cr return (r4 : (r5 ++ r6))) return (r1 ++ r2 ++ concat r3) -- |Match and return the obsolete \"char\" syntax, which - unlike -- 'character' - did not allow \"carriage return\" and \"linefeed\". obs_char :: CharParser a Char obs_char = satisfy (\c -> ord c `elem` ([0..9] ++ [11,12] ++ [14..127])) "any ASCII character except CR and LF" -- |Match and return the obsolete \"utext\" syntax, which is identical -- to 'obs_text'. obs_utext :: CharParser a String obs_utext = obs_text -- |Match the obsolete \"phrase\" syntax, which - unlike 'phrase' - -- allows dots between tokens. obs_phrase :: CharParser a [String] obs_phrase = do r1 <- word r2 <- many $ choice [ word , string "." , do { _ <- cfws; return [] } ] return (r1 : filter (/=[]) r2) -- |Match a \"phrase list\" syntax and return the list of 'String's -- that make up the phrase. In contrast to a 'phrase', the -- 'obs_phrase_list' separates the individual words by commas. This -- syntax is - as you will have guessed - obsolete. obs_phrase_list :: CharParser a [String] obs_phrase_list = do r1 <- many1 (do r <- option [] phrase _ <- unfold $ char ',' return (filter (/=[]) r)) r2 <- option [] phrase return (concat r1 ++ r2) <|> phrase -- * Obsolete folding white space (section 4.2) -- |Parse and return an \"obsolete fws\" token. That is at least one -- 'wsp' character, followed by an arbitrary number (including zero) -- of 'crlf' followed by at least one more 'wsp' character. obs_fws :: CharParser a String obs_fws = do r1 <- many1 wsp r2 <- many (do r3 <- crlf r4 <- many1 wsp return (r3 ++ r4)) return (r1 ++ concat r2) -- * Obsolete Date and Time (section 4.3) -- |Parse a 'day_name' but allow for the obsolete folding syntax. obs_day_of_week :: CharParser a Day obs_day_of_week = unfold day_name "day-of-the-week name" -- |Parse a 'year' but allow for a two-digit number (obsolete) and the -- obsolete folding syntax. obs_year :: CharParser a Int obs_year = unfold (do r <- manyN 2 digit return (normalize (read r :: Int))) "year" where normalize n | n <= 49 = 2000 + n | n <= 999 = 1900 + n | otherwise = n -- |Parse a 'month_name' but allow for the obsolete folding syntax. obs_month :: CharParser a Month obs_month = between cfws cfws month_name "month name" -- |Parse a 'day' but allow for the obsolete folding syntax. obs_day :: CharParser a Int obs_day = unfold day "day" -- |Parse a 'hour' but allow for the obsolete folding syntax. obs_hour :: CharParser a Int obs_hour = unfold hour "hour" -- |Parse a 'minute' but allow for the obsolete folding syntax. obs_minute :: CharParser a Int obs_minute = unfold minute "minute" -- |Parse a 'second' but allow for the obsolete folding syntax. obs_second :: CharParser a Int obs_second = unfold second "second" -- |Match the obsolete zone names and return the appropriate offset. obs_zone :: CharParser a Int obs_zone = choice [ mkZone "UT" 0 , mkZone "GMT" 0 , mkZone "EST" (-5) , mkZone "EDT" (-4) , mkZone "CST" (-6) , mkZone "CDT" (-5) , mkZone "MST" (-7) , mkZone "MDT" (-6) , mkZone "PST" (-8) , mkZone "PDT" (-7) , do { r <- oneOf ['A'..'I']; return $ (ord r - 64) * 60*60 } "military zone spec" , do { r <- oneOf ['K'..'M']; return $ (ord r - 65) * 60*60 } "military zone spec" , do { r <- oneOf ['N'..'Y']; return $ -(ord r - 77) * 60*60 } "military zone spec" , do { _ <- char 'Z'; return 0 } "military zone spec" ] where mkZone n o = try $ do { _ <- string n; return (o*60*60) } -- * Obsolete Addressing (section 4.4) -- |This parser will match the \"obsolete angle address\" syntax. This -- construct used to be known as a \"route address\" in earlier RFCs. -- There are two differences between this construct and the -- 'angle_addr': For one - as usual -, the obsolete form allows for -- more liberal insertion of folding whitespace and comments. -- -- Secondly, and more importantly, angle addresses used to allow the -- (optional) specification of a \"route\". The newer version does not. -- Such a routing address looks like this: -- -- > <@example1.org,@example2.org:simons@example.org> -- -- The parser will return a tuple that - in case of the above address - -- looks like this: -- -- > (["example1.org","example2.org"],"simons@example.org") -- -- The first part contains a list of hosts that constitute the route -- part. This list may be empty! The second part of the tuple is the -- actual 'addr_spec' address. obs_angle_addr :: CharParser a String obs_angle_addr = unfold (do _ <- char '<' _ <- option [] obs_route addr <- addr_spec _ <- char '>' return addr) -- TODO: route is lost here. "obsolete angle address" -- |This parser parses the \"route\" part of 'obs_angle_addr' and -- returns the list of 'String's that make up this route. Relies on -- 'obs_domain_list' for the actual parsing. obs_route :: CharParser a [String] obs_route = unfold (do { r <- obs_domain_list; _ <- char ':'; return r }) "route of an obsolete angle address" -- |This parser parses a list of domain names, each of them prefaced -- with an \"at\". Multiple names are separated by a comma. The list of -- 'domain's is returned - and may be empty. obs_domain_list :: CharParser a [String] obs_domain_list = do _ <- char '@' r1 <- domain r2 <- many (do _ <- cfws <|> string "," optional cfws _ <- char '@' r <- domain return r) return (r1 : r2) "route of an obsolete angle address" -- |Parse the obsolete syntax of a 'local_part', which allowed for -- more liberal insertion of folding whitespace and comments. The -- actual string is returned. obs_local_part :: CharParser a String obs_local_part = do r1 <- word r2 <- many (do _ <- string "." r <- word return ('.' : r)) return (r1 ++ concat r2) "local part of an address" -- |Parse the obsolete syntax of a 'domain', which allowed for more -- liberal insertion of folding whitespace and comments. The actual -- string is returned. obs_domain :: CharParser a String obs_domain = do r1 <- atom r2 <- many (do _ <- string "." r <- atom return ('.' : r)) return (r1 ++ concat r2) "domain part of an address" -- |This parser will match the obsolete syntax for a 'mailbox_list'. -- This one is quite weird: An 'obs_mbox_list' contains an arbitrary -- number of 'mailbox'es - including none -, which are separated by -- commas. But you may have multiple consecutive commas without giving -- a 'mailbox'. You may also have a valid 'obs_mbox_list' that -- contains /no/ 'mailbox' at all. On the other hand, you /must/ have -- at least one comma. -- -- So, this input is perfectly valid: -- -- > "," -- -- But this one is - contrary to all intuition - not: -- -- > "simons@example.org" -- -- Strange, isn't it? obs_mbox_list :: CharParser a [NameAddr] obs_mbox_list = do r1 <- many1 (try (do r <- maybeOption mailbox _ <- unfold $ char ',' return r)) r2 <- maybeOption mailbox return [x | Just x <- r1 ++ [r2]] "obsolete syntax for a list of mailboxes" -- |This parser is identical to 'obs_mbox_list' but parses a list of -- 'address'es rather than 'mailbox'es. The main difference is that an -- 'address' may contain 'group's. Please note that as of now, the -- parser will return a simple list of addresses; the grouping -- information is lost. obs_addr_list :: CharParser a [NameAddr] obs_addr_list = do r1 <- many1 (try (do r <- maybeOption address optional cfws _ <- char ',' optional cfws return r)) r2 <- maybeOption address return (concat [x | Just x <- r1 ++ [r2]]) "obsolete syntax for a list of addresses" -- * Obsolete header fields (section 4.5) obs_fields :: GenParser Char a [Field] obs_fields = many ( try (do { r <- obs_from; return (From r) }) <|> try (do { r <- obs_sender; return (Sender r) }) <|> try (do { r <- obs_return; return (ReturnPath r) }) <|> try (do { r <- obs_reply_to; return (ReplyTo r) }) <|> try (do { r <- obs_to; return (To r) }) <|> try (do { r <- obs_cc; return (Cc r) }) <|> try (do { r <- obs_bcc; return (Bcc r) }) <|> try (do { r <- obs_message_id; return (MessageID r) }) <|> try (do { r <- obs_in_reply_to; return (InReplyTo r) }) <|> try (do { r <- obs_references; return (References r) }) <|> try (do { r <- obs_subject; return (Subject r) }) <|> try (do { r <- obs_comments; return (Comments r) }) <|> try (do { r <- obs_keywords; return (Keywords [r]) }) <|> try (do { r <- obs_orig_date; return (Date r) }) <|> try (do { r <- obs_resent_date; return (ResentDate r) }) <|> try (do { r <- obs_resent_from; return (ResentFrom r) }) <|> try (do { r <- obs_resent_send; return (ResentSender r) }) <|> try (do { r <- obs_resent_to; return (ResentTo r) }) <|> try (do { r <- obs_resent_cc; return (ResentCc r) }) <|> try (do { r <- obs_resent_bcc; return (ResentBcc r) }) <|> try (do { r <- obs_resent_mid; return (ResentMessageID r) }) <|> try (do { r <- obs_resent_reply; return (ResentReplyTo r) }) <|> try (do { r <- obs_received; return (ObsReceived r) }) -- catch all <|> (do { (name,cont) <- obs_optional; return (OptionalField name cont) }) ) -- ** Obsolete origination date field (section 4.5.1) -- |Parse a 'date' header line but allow for the obsolete -- folding syntax. obs_orig_date :: CharParser a CalendarTime obs_orig_date = obs_header "Date" date_time -- ** Obsolete originator fields (section 4.5.2) -- |Parse a 'from' header line but allow for the obsolete -- folding syntax. obs_from :: CharParser a [NameAddr] obs_from = obs_header "From" mailbox_list -- |Parse a 'sender' header line but allow for the obsolete -- folding syntax. obs_sender :: CharParser a NameAddr obs_sender = obs_header "Sender" mailbox -- |Parse a 'reply_to' header line but allow for the obsolete -- folding syntax. obs_reply_to :: CharParser a [NameAddr] obs_reply_to = obs_header "Reply-To" mailbox_list -- ** Obsolete destination address fields (section 4.5.3) -- |Parse a 'to' header line but allow for the obsolete -- folding syntax. obs_to :: CharParser a [NameAddr] obs_to = obs_header "To" address_list -- |Parse a 'cc' header line but allow for the obsolete -- folding syntax. obs_cc :: CharParser a [NameAddr] obs_cc = obs_header "Cc" address_list -- |Parse a 'bcc' header line but allow for the obsolete -- folding syntax. obs_bcc :: CharParser a [NameAddr] obs_bcc = header "Bcc" ( try address_list <|> do { optional cfws; return [] } ) -- ** Obsolete identification fields (section 4.5.4) -- |Parse a 'message_id' header line but allow for the obsolete -- folding syntax. obs_message_id :: CharParser a String obs_message_id = obs_header "Message-ID" msg_id -- |Parse an 'in_reply_to' header line but allow for the obsolete -- folding and the obsolete phrase syntax. obs_in_reply_to :: CharParser a [String] obs_in_reply_to = obs_header "In-Reply-To" (do r <- many ( do {_ <- phrase; return [] } <|> msg_id ) return (filter (/=[]) r)) -- |Parse a 'references' header line but allow for the obsolete -- folding and the obsolete phrase syntax. obs_references :: CharParser a [String] obs_references = obs_header "References" (do r <- many ( do { _ <- phrase; return [] } <|> msg_id ) return (filter (/=[]) r)) -- |Parses the \"left part\" of a message ID, but allows the obsolete -- syntax, which is identical to a 'local_part'. obs_id_left :: CharParser a String obs_id_left = local_part "left part of an message ID" -- |Parses the \"right part\" of a message ID, but allows the obsolete -- syntax, which is identical to a 'domain'. obs_id_right :: CharParser a String obs_id_right = domain "right part of an message ID" -- ** Obsolete informational fields (section 4.5.5) -- |Parse a 'subject' header line but allow for the obsolete -- folding syntax. obs_subject :: CharParser a String obs_subject = obs_header "Subject" unstructured -- |Parse a 'comments' header line but allow for the obsolete -- folding syntax. obs_comments :: CharParser a String obs_comments = obs_header "Comments" unstructured -- |Parse a 'keywords' header line but allow for the obsolete -- folding syntax. Also, this parser accepts 'obs_phrase_list'. obs_keywords :: CharParser a [String] obs_keywords = obs_header "Keywords" obs_phrase_list -- ** Obsolete resent fields (section 4.5.6) -- |Parse a 'resent_from' header line but allow for the obsolete -- folding syntax. obs_resent_from :: CharParser a [NameAddr] obs_resent_from = obs_header "Resent-From" mailbox_list -- |Parse a 'resent_sender' header line but allow for the obsolete -- folding syntax. obs_resent_send :: CharParser a NameAddr obs_resent_send = obs_header "Resent-Sender" mailbox -- |Parse a 'resent_date' header line but allow for the obsolete -- folding syntax. obs_resent_date :: CharParser a CalendarTime obs_resent_date = obs_header "Resent-Date" date_time -- |Parse a 'resent_to' header line but allow for the obsolete -- folding syntax. obs_resent_to :: CharParser a [NameAddr] obs_resent_to = obs_header "Resent-To" mailbox_list -- |Parse a 'resent_cc' header line but allow for the obsolete -- folding syntax. obs_resent_cc :: CharParser a [NameAddr] obs_resent_cc = obs_header "Resent-Cc" mailbox_list -- |Parse a 'resent_bcc' header line but allow for the obsolete -- folding syntax. obs_resent_bcc :: CharParser a [NameAddr] obs_resent_bcc = obs_header "Bcc" ( try address_list <|> do { optional cfws; return [] } ) -- |Parse a 'resent_msg_id' header line but allow for the obsolete -- folding syntax. obs_resent_mid :: CharParser a String obs_resent_mid = obs_header "Resent-Message-ID" msg_id -- |Parse a @Resent-Reply-To@ header line but allow for the -- obsolete folding syntax. obs_resent_reply :: CharParser a [NameAddr] obs_resent_reply = obs_header "Resent-Reply-To" address_list -- ** Obsolete trace fields (section 4.5.7) obs_return :: CharParser a [Char] obs_return = obs_header "Return-Path" path obs_received :: CharParser a [(String, String)] obs_received = obs_header "Received" name_val_list -- |Match 'obs_angle_addr'. obs_path :: CharParser a String obs_path = obs_angle_addr -- |This parser is identical to 'optional_field' but allows the more -- liberal line-folding syntax between the \"field_name\" and the \"field -- text\". obs_optional :: CharParser a (String,String) obs_optional = do n <- field_name _ <- many wsp _ <- char ':' b <- unstructured _ <- crlf return (n,b) "optional (unspecified) header line" hsemail-1.7.2/example/0000755000000000000000000000000011764377107013001 5ustar0000000000000000hsemail-1.7.2/example/message-test.input0000644000000000000000000000143011764377107016461 0ustar0000000000000000X-From-Line: rtMj@example.org Tue Jun 22 15:11:15 2004 Return-Path: Received: from example.org ([127.0.0.1]) by peti.cryp.to with SMTP id i5MDBAW8014197 for ; Tue, 22 Jun 2004 15:11:12 +0200 Received: (qmail 076 invoked from network); Tue, 22 Jun 2004 09:09:16 -0400 Message-ID: From: "virtual shop" To: simons@cryp.to Subject: PROTECT your Computer from tampering ! 315683 Date: Tue, 22 Jun 2004 09:09:16 -0400 Mime-Version: 1.0 Content-Type: multipart/alternative; boundary="----=_NextPart_013_7A25_1AC67A25.1AC67A25" X-Priority: 3 X-MSMail-Priority: Normal X-Mailer: Microsoft Outlook Express 6.00.2800.1409 X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2800.1409 This is a spam message. hsemail-1.7.2/example/message-test.hs0000644000000000000000000000105611764377107015740 0ustar0000000000000000module Main (main) where import Text.ParserCombinators.Parsec ( parse ) import Text.ParserCombinators.Parsec.Rfc2822 -- Read an Internet message from standard input, parse it, -- and return the result. main :: IO () main = do input <- getContents print $ parse message "" (fixEol input) return () -- Make sure all lines are terminated by CRLF. fixEol :: String -> String fixEol ('\r':'\n':xs) = '\r' : '\n' : fixEol xs fixEol ('\n':xs) = '\r' : '\n' : fixEol xs fixEol (x:xs) = x : fixEol xs fixEol [] = [] hsemail-1.7.2/example/smtp-test.input0000644000000000000000000000024611764377107016024 0ustar0000000000000000helo smtp.example.org mail from: rcpt to: RCPT to:<@example.org,@example.com:joe.doe@example.net> VrFy localuser data quit hsemail-1.7.2/example/smtp-test.hs0000644000000000000000000000051611764377107015277 0ustar0000000000000000module Main (main) where import Text.ParserCombinators.Parsec ( parse ) import Text.ParserCombinators.Parsec.Rfc2821 -- Read an SMTP command from standard input, parse it, -- return the result, and loop until EOF. main :: IO () main = do input <- getContents mapM_ (print . parse smtpCmd "") [ l ++ "\r\n" | l <- lines input ]