mbox-0.3.4/0000755000000000000000000000000013145070264010653 5ustar0000000000000000mbox-0.3.4/LICENSE0000644000000000000000000000270113145070264011660 0ustar0000000000000000Copyright (c) Gershom Bazerman 2010 All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the author nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mbox-0.3.4/mbox.cabal0000644000000000000000000000130113145070264012577 0ustar0000000000000000name: mbox version: 0.3.4 synopsis: Read and write standard mailbox files. description: Read and write standard mailbox (mboxrd) files. category: system, text, data license: BSD3 license-file: LICENSE author: Gershom Bazerman maintainer: gershomb@gmail.com Tested-With: GHC == 7.8 Build-Type: Simple Cabal-Version: >= 1.6 library build-depends: base >= 4, base < 6, safe, time < 1.9, time-locale-compat, text exposed-modules: Data.MBox, Data.MBox.String ghc-options: -Wall source-repository head type: darcs location: http://hub.darcs.net/gershomb/mbox mbox-0.3.4/Setup.hs0000644000000000000000000000005613145070264012310 0ustar0000000000000000import Distribution.Simple main = defaultMain mbox-0.3.4/Data/0000755000000000000000000000000013145070264011524 5ustar0000000000000000mbox-0.3.4/Data/MBox.hs0000644000000000000000000001141213145070264012724 0ustar0000000000000000{-# LANGUAGE ViewPatterns #-} ----------------------------------------------------------------------------- {- | Module : Data.MBox Copyright : (c) Gershom Bazerman, 2009; ported to Text by Alexander Jerneck, 2012 License : BSD 3 Clause Maintainer : gershomb@gmail.com Stability : experimental Reads and writes mboxrd files as per . This module uses Lazy Text pervasively, and should be able to operate as a streaming parser. That is to say, given a lazy stream of Text, and a streaming processing function, you should be able to analyze large mbox files in constant space. -} ------------------------------------------------------------------------- module Data.MBox (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read) import Control.Arrow import Data.Char import Data.Maybe import Data.Time import Safe import qualified Data.Text.Lazy as T import qualified Data.Time.Locale.Compat as LC type MBox = [Message] data Message = Message {fromLine :: T.Text, headers :: [Header], body :: T.Text} deriving (Read, Show) type Header = (T.Text, T.Text) -- | Reads a date header as a UTCTime parseDateHeader :: T.Text -> Maybe UTCTime parseDateHeader txt = listToMaybe . catMaybes $ map tryParse formats where header = T.unpack txt tryParse f = parseTime LC.defaultTimeLocale f header formats = [ "%a, %_d %b %Y %T %z" , "%a, %_d %b %Y %T %Z" , "%a, %d %b %Y %T %z" , "%a, %d %b %Y %T %Z" , "%a, %_d %b %Y %T %z (%Z)" , "%a, %_d %b %Y %T %z (GMT%:-z)" , "%a, %_d %b %Y %T %z (UTC%:-z)" , "%a, %_d %b %Y %T %z (GMT%:z)" , "%a, %_d %b %Y %T %z (UTC%:z)" , "%A, %B %e, %Y %l:%M %p" , "%e %b %Y %T %z" ] -- | Attempts to retrieve the contents of a forwarded message from an enclosing message. parseForward :: Message -> Message parseForward origMsg@(Message f _ b) = case drop 1 $ dropWhile (/= T.pack "-----Original Message-----") (T.lines b) of [] -> origMsg xs -> headDef origMsg . parseMBox . T.unlines $ f:xs -- | Parses Text as an mbox file. parseMBox :: T.Text -> MBox parseMBox = go . T.lines where go [] = [] go (x:xs) = uncurry (:) . (readMsg x *** go) . break ((T.pack "From ") `T.isPrefixOf`) $ xs readMsg :: T.Text -> [T.Text] -> Message readMsg x xs = uncurry (Message x) . second (T.unlines . map unquoteFrom). readHeaders $ xs readHeaders :: [T.Text] -> ([Header], [T.Text]) readHeaders [] = ([],[]) readHeaders (x:xs) | T.null x || T.all isSpace x || not (T.any (==':') x) = ([],xs) | otherwise = first ((second (T.strip . sanHeader . (`T.append` headerCont) . T.drop 1) . T.break (==':') $ x):) $ readHeaders xs' where (headerCont, xs') = first ((T.pack " " `T.append`) . T.unlines . map T.strip) . break notCont $ xs notCont :: T.Text -> Bool notCont s = doesNotStartSpace s || allSpace s allSpace = T.all isSpace doesNotStartSpace s = case T.length s of 0 -> True _ -> not (isSpace $ T.head s) unquoteFrom :: T.Text -> T.Text unquoteFrom xs'@(T.stripPrefix (T.pack ">") -> Just suf) = if (T.pack "From ") `T.isPrefixOf` T.dropWhile (=='>') suf then suf else xs' unquoteFrom xs = xs sanHeader :: T.Text -> T.Text sanHeader = T.replace (T.pack "\n") (T.pack " ") -- | Renders an MBox into Text showMBox :: MBox -> T.Text showMBox = T.concat . map showMessage -- | Renders an individual message into Text. showMessage :: Message -> T.Text showMessage (Message f hs b) = T.unlines $ f : formatHeaders hs ++ [(T.pack "\n")] ++ formatBody b where formatHeaders = map (\(x,y) -> x `T.append` (T.pack ": ") `T.append` y) formatBody = map unFrom . T.lines unFrom x | isFrom x = '>' `T.cons` x | otherwise = x isFrom x = (T.pack "From ") `T.isPrefixOf` T.dropWhile (=='>') x -- | Return True if header is a Message-ID header. isID :: Header -> Bool isID (x, _) = x == T.pack "Message-ID" -- | Return True if header is a Date header. isDate :: Header -> Bool isDate (x, _) = x == T.pack "Date" -- | Return the values of headers for which predicate is True getHeader :: (Header -> Bool) -> Message -> [T.Text] getHeader predFunc = map snd . filter predFunc . headers mbox-0.3.4/Data/MBox/0000755000000000000000000000000013145070264012371 5ustar0000000000000000mbox-0.3.4/Data/MBox/String.hs0000644000000000000000000000775313145070264014207 0ustar0000000000000000 ----------------------------------------------------------------------------- {- | Module : Data.MBox Copyright : (c) Gershom Bazerman, 2009 License : BSD 3 Clause Maintainer : gershomb@gmail.com Stability : experimental Reads and writes mboxrd files as per . This parser is written to be a streaming parser. Given a lazy source of data and a streaming consumer, you should be able to analyze arbitrary mbox files in constant space. -} ------------------------------------------------------------------------- module Data.MBox.String (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read) import Control.Arrow import Data.List (isPrefixOf) import Data.Char import Data.Maybe import Data.Time import Safe import qualified Data.Time.Locale.Compat as LC type MBox = [Message] data Message = Message {fromLine :: String, headers :: [Header], body :: String} deriving (Read, Show) type Header = (String, String) -- | Reads a date header as a UTCTime parseDateHeader :: String -> Maybe UTCTime parseDateHeader header = listToMaybe . catMaybes $ map tryParse formats where tryParse f = parseTime LC.defaultTimeLocale f header formats = [ "%a, %_d %b %Y %T %z" , "%a, %_d %b %Y %T %Z" , "%a, %d %b %Y %T %z" , "%a, %d %b %Y %T %Z" , "%a, %_d %b %Y %T %z (%Z)" , "%a, %_d %b %Y %T %z (GMT%:-z)" , "%a, %_d %b %Y %T %z (UTC%:-z)" , "%a, %_d %b %Y %T %z (GMT%:z)" , "%a, %_d %b %Y %T %z (UTC%:z)" , "%A, %B %e, %Y %l:%M %p" , "%e %b %Y %T %z" ] -- | Attempts to retrieve the contents of a forwarded message from an enclosing message. parseForward :: Message -> Message parseForward origMsg@(Message f _ b) = case drop 1 $ dropWhile (/= "-----Original Message-----") (lines b) of [] -> origMsg xs -> headDef origMsg . parseMBox . unlines $ f:xs -- | Reads a string as an mbox file. parseMBox :: String -> MBox parseMBox = go . lines where go [] = [] go (x:xs) = uncurry (:) . (readMsg x *** go) . break ("From " `isPrefixOf`) $ xs readMsg :: String -> [String] -> Message readMsg x xs = uncurry (Message x) . second (unlines . map unquoteFrom). readHeaders $ xs readHeaders :: [String] -> ([Header], [String]) readHeaders [] = ([],[]) readHeaders (x:xs) | null x || all isSpace x || not (any (==':') x) = ([],xs) | otherwise = first ((second (killSpace . sanHeader . (++ headerCont) . drop 1) . break (==':') $ x):) $ readHeaders xs' where (headerCont, xs') = first ((" " ++) . unlines . map killSpace) . break notCont $ xs notCont [] = True notCont (c:cs) = not (isSpace c) || (all isSpace cs) unquoteFrom :: String -> String unquoteFrom xs'@('>':xs) = if "From " `isPrefixOf` dropWhile (=='>') xs then xs else xs' unquoteFrom xs = xs sanHeader :: String -> String sanHeader = map (\x -> if x == '\n' then ' ' else x) -- | Renders an MBox into a String showMBox :: MBox -> String showMBox = concatMap showMessage -- | Renders an individual message into a String. showMessage :: Message -> String showMessage (Message f hs b) = unlines $ f : map (\(x,y) -> (x ++ ": " ++ y)) hs ++ ["\n"] ++ map unFrom (lines b) where unFrom x | isFrom x = '>':x | otherwise = x isFrom x = "From " `isPrefixOf` dropWhile (=='>') x killSpace :: String -> String killSpace = dropWhile isSpace . dropEndWhile isSpace dropEndWhile :: (a -> Bool) -> [a] -> [a] dropEndWhile p = foldr (\x xs -> if p x && null xs then [] else x:xs) [] -- | Header accessors isID :: Header -> Bool isID (x, _y) = x == "Message-ID" isDate :: Header -> Bool isDate (x, _y) = x == "Date" getHeader :: (Header -> Bool) -> Message -> [String] getHeader p = map snd . filter p . headers