regex-pcre-0.95.0.0/0000755000000000000000000000000007346545000012175 5ustar0000000000000000regex-pcre-0.95.0.0/ChangeLog.md0000755000000000000000000000024307346545000014350 0ustar0000000000000000See also http://pvp.haskell.org/faq ## 0.95.0.0 - Update to `regex-0.94.0.0` API - Compatibility with `base-4.13.0` - Use `pkg-config` for locating `pcre` ---- regex-pcre-0.95.0.0/LICENSE0000644000000000000000000000274707346545000013214 0ustar0000000000000000This modile is under this "3 clause" BSD license: Copyright (c) 2007, Christopher Kuklewicz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. regex-pcre-0.95.0.0/Setup.hs0000644000000000000000000000013107346545000013624 0ustar0000000000000000import Distribution.Simple main = defaultMain regex-pcre-0.95.0.0/regex-pcre.cabal0000644000000000000000000000362107346545000015224 0ustar0000000000000000Cabal-Version: 1.12 Name: regex-pcre Version: 0.95.0.0 build-type: Simple license: BSD3 license-file: LICENSE copyright: Copyright (c) 2006, Christopher Kuklewicz author: Christopher Kuklewicz maintainer: hvr@gnu.org bug-reports: https://github.com/hvr/regex-pcre/issues category: Text synopsis: PCRE Backend for "Text.Regex" (regex-base) description: This package provides a backend for the API. . See also for more information. extra-source-files: ChangeLog.md flag pkg-config default: True manual: True description: Use @pkg-config(1)@ to locate foreign @zlib@ library. source-repository head type: git location: https://github.com/hvr/regex-pcre.git library hs-source-dirs: src exposed-modules: Text.Regex.PCRE Text.Regex.PCRE.Wrap Text.Regex.PCRE.String Text.Regex.PCRE.Sequence Text.Regex.PCRE.ByteString Text.Regex.PCRE.ByteString.Lazy other-modules: Paths_regex_pcre default-language: Haskell2010 default-extensions: MultiParamTypeClasses FunctionalDependencies ForeignFunctionInterface ScopedTypeVariables GeneralizedNewtypeDeriving FlexibleContexts TypeSynonymInstances FlexibleInstances build-depends: regex-base == 0.94.* , base >= 4.3 && < 4.14 , containers >= 0.4 && < 0.7 , bytestring >= 0.9 && < 0.11 , array >= 0.3 && < 0.6 if !impl(ghc >= 8) build-depends: fail == 4.9.* if flag(pkg-config) pkgconfig-depends: libpcre else extra-libraries: pcre ghc-options: -O2 -Wall -fno-warn-unused-imports regex-pcre-0.95.0.0/src/Text/Regex/0000755000000000000000000000000007346545000014762 5ustar0000000000000000regex-pcre-0.95.0.0/src/Text/Regex/PCRE.hs0000644000000000000000000000424407346545000016053 0ustar0000000000000000{-| The "Text.Regex.PCRE" module provides a backend for regular expressions. If you import this along with other backends, then you should do so with qualified imports, perhaps renamed for convenience. Using the provided 'CompOption' and 'ExecOption' values and if 'configUTF8' is True, then you might be able to send UTF8 encoded ByteStrings to PCRE and get sensible results. This is currently untested. The regular expression can be provided as a 'ByteString', but it will be copied and a NUL byte appended to make a 'CString' unless such a byte is already present. Thus the regular expression cannot contain an explicit NUL byte. The search string is passed as a 'CStringLen' and may contain NUL bytes and does not need to end in a NUL byte. 'ByteString's are searched in place (via unsafeUseAsCStringLen). A 'String' will be converted into a 'CString' or 'CStringLen' for processing. Doing this repeatedly will be very inefficient. The "Text.Regex.PCRE.String", "Text.Regex.PCRE.ByteString", and "Text.Regex.PCRE.Wrap" modules provides both the high level interface exported by this module and medium- and low-level interfaces that returns error using Either structures. -} {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.PCRE(getVersion_Text_Regex_PCRE ,module Text.Regex.Base -- ** Wrap, for '=~' and '=~~', types and constants ,module Text.Regex.PCRE.Wrap) where import Prelude hiding (fail) import Text.Regex.PCRE.Wrap(Regex, CompOption(CompOption), ExecOption(ExecOption), (=~), (=~~), unusedOffset, getNumSubs, configUTF8, getVersion, compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial) import Text.Regex.PCRE.String() import Text.Regex.PCRE.Sequence() import Text.Regex.PCRE.ByteString() import Text.Regex.PCRE.ByteString.Lazy() import Data.Version(Version(..)) import Text.Regex.Base import qualified Paths_regex_pcre getVersion_Text_Regex_PCRE :: Version getVersion_Text_Regex_PCRE = Paths_regex_pcre.version regex-pcre-0.95.0.0/src/Text/Regex/PCRE/0000755000000000000000000000000007346545000015513 5ustar0000000000000000regex-pcre-0.95.0.0/src/Text/Regex/PCRE/ByteString.hs0000644000000000000000000001222007346545000020136 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} {-| This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.PCRE.ByteString( -- ** Types Regex, MatchOffset, MatchLength, CompOption(CompOption), ExecOption(ExecOption), ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, getVersion, -- ** Medium level API functions compile, execute, regexec, -- ** CompOption flags compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, -- ** ExecOption flags execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Text.Regex.PCRE.Wrap -- all import Data.Array(Array,listArray) import Data.ByteString(ByteString) import qualified Data.ByteString as B(empty,useAsCString,last,take,drop,null,pack) import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString,unsafeUseAsCStringLen) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength) import Text.Regex.Base.Impl(polymatch,polymatchM) import Foreign.C.String(CStringLen) import Foreign(nullPtr) instance RegexContext Regex ByteString ByteString where match = polymatch matchM = polymatchM unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.ByteString died: "++ show err) Right v -> return v {-# INLINE asCStringLen #-} asCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a asCStringLen s op = B.unsafeUseAsCStringLen s checked where checked cs@(ptr,_) | ptr == nullPtr = B.unsafeUseAsCStringLen myEmpty (op . trim) | otherwise = op cs myEmpty = B.pack [0] trim (ptr,_) = (ptr,0) instance RegexMaker Regex CompOption ExecOption ByteString where makeRegexOpts c e pattern = unsafePerformIO $ compile c e pattern >>= unwrap makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ compile c e pattern instance RegexLike Regex ByteString where matchTest regex bs = unsafePerformIO $ asCStringLen bs (wrapTest 0 regex) >>= unwrap matchOnce regex bs = unsafePerformIO $ execute regex bs >>= unwrap matchAll regex bs = unsafePerformIO $ asCStringLen bs (wrapMatchAll regex) >>= unwrap matchCount regex bs = unsafePerformIO $ asCStringLen bs (wrapCount regex) >>= unwrap -- --------------------------------------------------------------------- -- | Compiles a regular expression -- compile :: CompOption -- ^ (summed together) -> ExecOption -- ^ (summed together) -> ByteString -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression compile c e pattern = do -- PCRE does not allow one to specify a length for the regular expression, it must by 0 terminated let asCString bs = if (not (B.null bs)) && (0==B.last bs) then B.unsafeUseAsCString bs else B.useAsCString bs asCString pattern (wrapCompile c e) -- --------------------------------------------------------------------- -- | Matches a regular expression against a buffer, returning the buffer -- indicies of the match, and any submatches -- -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute regex bs = do maybeStartEnd <- asCStringLen bs (wrapMatch 0 regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) Right (Just parts) -> return . Right . Just . listArray (0,pred (length parts)) . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts Left err -> return (Left err) regexec :: Regex -- ^ Compiled regular expression -> ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (ByteString, ByteString, ByteString, [ByteString]))) regexec regex bs = do let getSub (start,stop) | start == unusedOffset = B.empty | otherwise = B.take (stop-start) . B.drop start $ bs matchedParts [] = (B.empty,B.empty,bs,[]) -- no information matchedParts (matchedStartStop@(start,stop):subStartStop) = (B.take start bs ,getSub matchedStartStop ,B.drop stop bs ,map getSub subStartStop) maybeStartEnd <- asCStringLen bs (wrapMatch 0 regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) regex-pcre-0.95.0.0/src/Text/Regex/PCRE/ByteString/0000755000000000000000000000000007346545000017605 5ustar0000000000000000regex-pcre-0.95.0.0/src/Text/Regex/PCRE/ByteString/Lazy.hs0000644000000000000000000001147307346545000021066 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} {-| This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.PCRE.ByteString.Lazy( -- ** Types Regex, MatchOffset, MatchLength, CompOption(CompOption), ExecOption(ExecOption), ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, getVersion, -- ** Medium level API functions compile, execute, regexec, -- ** CompOption flags compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, -- ** ExecOption flags execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Text.Regex.PCRE.Wrap -- all import Data.Array(Array) import qualified Data.ByteString.Lazy as L(ByteString,toChunks,fromChunks,last,null,snoc) import qualified Data.ByteString as B(ByteString,concat,pack) import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString,unsafeUseAsCStringLen) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength) import Text.Regex.Base.Impl(polymatch,polymatchM) import qualified Text.Regex.PCRE.ByteString as BS(execute,regexec) import Foreign.C.String(CString,CStringLen) import Foreign(nullPtr) instance RegexContext Regex L.ByteString L.ByteString where match = polymatch matchM = polymatchM {-# INLINE fromLazy #-} fromLazy :: L.ByteString -> B.ByteString fromLazy = B.concat . L.toChunks {-# INLINE toLazy #-} toLazy :: B.ByteString -> L.ByteString toLazy = L.fromChunks . return unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.ByteString.Lazy died: "++ show err) Right v -> return v {-# INLINE asCString #-} asCString :: L.ByteString -> (CString -> IO a) -> IO a asCString s = if (not (L.null s)) && (0==L.last s) then B.unsafeUseAsCString (fromLazy s) else B.unsafeUseAsCString (fromLazy (L.snoc s 0)) {-# INLINE asCStringLen #-} asCStringLen :: L.ByteString -> (CStringLen -> IO a) -> IO a asCStringLen ls op = B.unsafeUseAsCStringLen (fromLazy ls) checked where checked cs@(ptr,_) | ptr == nullPtr = B.unsafeUseAsCStringLen myEmpty (op . trim) | otherwise = op cs myEmpty = B.pack [0] trim (ptr,_) = (ptr,0) instance RegexMaker Regex CompOption ExecOption L.ByteString where makeRegexOpts c e pattern = unsafePerformIO $ compile c e pattern >>= unwrap makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ compile c e pattern instance RegexLike Regex L.ByteString where matchTest regex bs = unsafePerformIO $ asCStringLen bs (wrapTest 0 regex) >>= unwrap matchOnce regex bs = unsafePerformIO $ execute regex bs >>= unwrap matchAll regex bs = unsafePerformIO $ asCStringLen bs (wrapMatchAll regex) >>= unwrap matchCount regex bs = unsafePerformIO $ asCStringLen bs (wrapCount regex) >>= unwrap -- --------------------------------------------------------------------- -- | Compiles a regular expression -- compile :: CompOption -- ^ (summed together) -> ExecOption -- ^ (summed together) -> L.ByteString -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression compile c e pattern = do asCString pattern (wrapCompile c e) -- --------------------------------------------------------------------- -- | Matches a regular expression against a buffer, returning the buffer -- indicies of the match, and any submatches -- -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> L.ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute regex bs = BS.execute regex (fromLazy bs) regexec :: Regex -- ^ Compiled regular expression -> L.ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (L.ByteString, L.ByteString, L.ByteString, [L.ByteString]))) regexec regex bs = do x <- BS.regexec regex (fromLazy bs) return $ case x of Left e -> Left e Right Nothing -> Right Nothing Right (Just (a,b,c,ds)) -> Right (Just (toLazy a,toLazy b,toLazy c,map toLazy ds)) regex-pcre-0.95.0.0/src/Text/Regex/PCRE/Sequence.hs0000644000000000000000000001317007346545000017621 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} {-| This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.PCRE.Sequence( -- ** Types Regex, MatchOffset, MatchLength, CompOption(CompOption), ExecOption(ExecOption), ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, getVersion, -- ** Medium level API functions compile, execute, regexec, -- ** Constants for CompOption compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, -- ** Constants for ExecOption execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Text.Regex.PCRE.Wrap -- all --import Foreign.C.String(withCStringLen,withCString) import Data.Array(Array,listArray) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchLength,MatchOffset,Extract(..)) import Text.Regex.Base.Impl(polymatch,polymatchM) import Data.Sequence as S hiding (length) import qualified Data.Sequence as S (length) import Foreign.C.String import Foreign.Marshal.Array import Foreign.Marshal.Alloc import Foreign.Storable instance RegexContext Regex (Seq Char) (Seq Char) where match = polymatch matchM = polymatchM unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.Sequence died: "++ show err) Right v -> return v instance RegexMaker Regex CompOption ExecOption (Seq Char) where makeRegexOpts c e pattern = unsafePerformIO $ compile c e pattern >>= unwrap makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ compile c e pattern instance RegexLike Regex (Seq Char) where matchTest regex str = unsafePerformIO $ withSeq str (wrapTest 0 regex) >>= unwrap matchOnce regex str = unsafePerformIO $ execute regex str >>= unwrap matchAll regex str = unsafePerformIO $ withSeq str (wrapMatchAll regex) >>= unwrap matchCount regex str = unsafePerformIO $ withSeq str (wrapCount regex) >>= unwrap -- | Compiles a regular expression compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> (Seq Char) -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: an error string and offset or the compiled regular expression compile c e pattern = withSeq0 pattern (wrapCompile c e) -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> (Seq Char) -- ^ (Seq Char) to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute regex str = do maybeStartEnd <- withSeq str (wrapMatch 0 regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> fail "got [] back!" -- should never happen Right (Just parts) -> return . Right . Just . listArray (0,pred (length parts)) . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts Left err -> return (Left err) -- | execute match and extract substrings rather than just offsets regexec :: Regex -- ^ compiled regular expression -> (Seq Char) -- ^ string to match -> IO (Either WrapError (Maybe ((Seq Char), (Seq Char),(Seq Char), [(Seq Char)]))) -- ^ Returns: Nothing if no match, else -- (text before match, text after match, array of matches with 0 being the whole match) regexec regex str = do let getSub (start,stop) | start == unusedOffset = S.empty | otherwise = extract (start,stop-start) str matchedParts [] = (S.empty,S.empty,str,[]) -- no information matchedParts (matchedStartStop@(start,stop):subStartStop) = (before start str ,getSub matchedStartStop ,after stop str ,map getSub subStartStop) maybeStartEnd <- withSeq str (wrapMatch 0 regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> fail "got [] back!" -- should never happen Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) withSeq :: Seq Char -> (CStringLen -> IO a) -> IO a withSeq s f = let -- Ensure null at end of s len = S.length s pokes p a | seq p (seq a False) = undefined | otherwise = case viewl a of EmptyL -> return () c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a' in allocaBytes (S.length s) (\ptr -> pokes ptr s >> f (ptr,len)) withSeq0 :: Seq Char -> (CString -> IO a) -> IO a withSeq0 s f = let -- Ensure null at end of s s' = case viewr s of -- bang !s' EmptyR -> singleton '\0' _ :> '\0' -> s _ -> s |> '\0' pokes p a | seq p (seq a False) = undefined | otherwise = case viewl a of -- bang pokes !p !a EmptyL -> return () c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a' in allocaBytes (S.length s') (\ptr -> pokes ptr s' >> f ptr) regex-pcre-0.95.0.0/src/Text/Regex/PCRE/String.hs0000644000000000000000000001073107346545000017317 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} {-| This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.PCRE.String( -- ** Types Regex, MatchOffset, MatchLength, CompOption(CompOption), ExecOption(ExecOption), ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, getVersion, -- ** Medium level API functions compile, execute, regexec, -- ** Constants for CompOption compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, -- ** Constants for ExecOption execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Text.Regex.PCRE.Wrap -- all import Foreign.C.String(withCStringLen,withCString) import Data.Array(Array,listArray) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchLength,MatchOffset) import Text.Regex.Base.Impl(polymatch,polymatchM) instance RegexContext Regex String String where match = polymatch matchM = polymatchM unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.String died: "++ show err) Right v -> return v instance RegexMaker Regex CompOption ExecOption String where makeRegexOpts c e pattern = unsafePerformIO $ compile c e pattern >>= unwrap makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ compile c e pattern instance RegexLike Regex String where matchTest regex str = unsafePerformIO $ withCStringLen str (wrapTest 0 regex) >>= unwrap matchOnce regex str = unsafePerformIO $ execute regex str >>= unwrap matchAll regex str = unsafePerformIO $ withCStringLen str (wrapMatchAll regex) >>= unwrap matchCount regex str = unsafePerformIO $ withCStringLen str (wrapCount regex) >>= unwrap -- | Compiles a regular expression compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> String -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: an error string and offset or the compiled regular expression compile c e pattern = withCString pattern (wrapCompile c e) -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> String -- ^ String to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute regex str = do maybeStartEnd <- withCStringLen str (wrapMatch 0 regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> fail "got [] back!" -- should never happen Right (Just parts) -> return . Right . Just . listArray (0,pred (length parts)) . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts Left err -> return (Left err) -- | execute match and extract substrings rather than just offsets regexec :: Regex -- ^ compiled regular expression -> String -- ^ string to match -> IO (Either WrapError (Maybe (String, String,String, [String]))) -- ^ Returns: Nothing if no match, else -- (text before match, text after match, array of matches with 0 being the whole match) regexec regex str = do let getSub (start,stop) | start == unusedOffset = "" | otherwise = take (stop-start) . drop start $ str matchedParts [] = ("","",str,[]) -- no information matchedParts (matchedStartStop@(start,stop):subStartStop) = (take start str ,getSub matchedStartStop ,drop stop str ,map getSub subStartStop) maybeStartEnd <- withCStringLen str (wrapMatch 0 regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> fail "got [] back!" -- should never happen Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) regex-pcre-0.95.0.0/src/Text/Regex/PCRE/Wrap.hsc0000644000000000000000000003604507346545000017133 0ustar0000000000000000-- The exported symbols are the same whether HAVE_PCRE_H is defined, -- but when if it is not defined then 'getVersion == Nothing' and all -- other exported values will call error or fail. -- | This will fail or error only if allocation fails or a nullPtr is passed in. -- TODO :: Consider wrapMatchAll using list of start/end offsets and not MatchArray -- {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.PCRE.Wrap( -- ** High-level interface Regex, CompOption(CompOption), ExecOption(ExecOption), (=~), (=~~), -- ** Low-level interface StartOffset, EndOffset, ReturnCode(ReturnCode), WrapError, wrapCompile, wrapTest, wrapMatch, wrapMatchAll, wrapCount, -- ** Miscellaneous getVersion, configUTF8, getNumSubs, unusedOffset, -- ** CompOption values compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, -- ** ExecOption values execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial, -- ** ReturnCode values retOk, retNoMatch, retNull, retBadOption, retBadMagic, retUnknownNode, retNoMemory, retNoSubstring ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Control.Monad(when) import Data.Array(Array,accumArray) import Data.Bits(Bits((.|.))) -- ((.&.),(.|.),complement)) import System.IO.Unsafe(unsafePerformIO) import Foreign(Ptr,ForeignPtr,FinalizerPtr -- ,FunPtr ,alloca,allocaBytes,nullPtr ,peek,peekElemOff ,newForeignPtr,withForeignPtr) import Foreign.C(CChar) #if __GLASGOW_HASKELL__ >= 703 import Foreign.C(CInt(CInt)) #else import Foreign.C(CInt) #endif import Foreign.C.String(CString,CStringLen,peekCString) import Text.Regex.Base.RegexLike(RegexOptions(..),RegexMaker(..),RegexContext(..),MatchArray,MatchOffset) -- | Version string of PCRE library -- -- __NOTE__: The 'Maybe' type is used for historic reasons; practically, 'getVersion' is never 'Nothing'. {-# NOINLINE getVersion #-} getVersion :: Maybe String type PCRE = () type StartOffset = MatchOffset type EndOffset = MatchOffset type WrapError = (ReturnCode,String) newtype CompOption = CompOption CInt deriving (Eq,Show,Num,Bits) newtype ExecOption = ExecOption CInt deriving (Eq,Show,Num,Bits) newtype ReturnCode = ReturnCode CInt deriving (Eq,Show) -- | A compiled regular expression data Regex = Regex (ForeignPtr PCRE) CompOption ExecOption compBlank :: CompOption execBlank :: ExecOption unusedOffset :: MatchOffset retOk :: ReturnCode wrapCompile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> CString -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: an error offset and string or the compiled regular expression wrapTest :: StartOffset -- ^ Starting index in CStringLen -> Regex -- ^ Compiled regular expression -> CStringLen -- ^ String to match against and length in bytes -> IO (Either WrapError Bool) wrapMatch :: StartOffset -- ^ Starting index in CStringLen -> Regex -- ^ Compiled regular expression -> CStringLen -- ^ String to match against and length in bytes -> IO (Either WrapError (Maybe [(StartOffset,EndOffset)])) -- ^ Returns: 'Right Nothing' if the regex did not match the -- string, or: -- 'Right Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions, or: -- 'Left ReturnCode' if there is some strange error wrapMatchAll :: Regex -> CStringLen -> IO (Either WrapError [ MatchArray ]) wrapCount :: Regex -> CStringLen -> IO (Either WrapError Int) getNumSubs :: Regex -> Int {-# NOINLINE configUTF8 #-} configUTF8 :: Bool (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,MonadFail m) => source1 -> source -> m target #include #include instance RegexOptions Regex CompOption ExecOption where blankCompOpt = compBlank blankExecOpt = execBlank defaultCompOpt = compMultiline defaultExecOpt = execBlank setExecOpts e' (Regex r c _) = Regex r c e' getExecOpts (Regex _ _ e) = e -- (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~) x r = let q :: Regex q = makeRegex r in match q x -- (=~~) ::(RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,MonadFail m) => source1 -> source -> m target (=~~) x r = do (q :: Regex) <- makeRegexM r matchM q x type PCRE_Extra = () fi :: (Integral i,Num n ) => i -> n fi x = fromIntegral x compBlank = CompOption 0 execBlank = ExecOption 0 unusedOffset = (-1) retOk = ReturnCode 0 retNeededMoreSpace :: ReturnCode retNeededMoreSpace = ReturnCode 0 newtype InfoWhat = InfoWhat CInt deriving (Eq,Show) newtype ConfigWhat = ConfigWhat CInt deriving (Eq,Show) nullTest' :: Ptr a -> String -> IO (Either (MatchOffset,String) b) -> IO (Either (MatchOffset,String) b) {-# INLINE nullTest' #-} nullTest' ptr msg io = do if nullPtr == ptr then return (Left (0,"Ptr parameter was nullPtr in Text.Regex.PCRE.Wrap."++msg)) else io nullTest :: Ptr a -> String -> IO (Either WrapError b) -> IO (Either WrapError b) {-# INLINE nullTest #-} nullTest ptr msg io = do if nullPtr == ptr then return (Left (retOk,"Ptr parameter was nullPtr in Text.Regex.PCRE.Wrap."++msg)) else io wrapRC :: ReturnCode -> IO (Either WrapError b) {-# INLINE wrapRC #-} wrapRC r = return (Left (r,"Error in Text.Regex.PCRE.Wrap: "++show r)) -- | Compiles a regular expression wrapCompile flags e pattern = do nullTest' pattern "wrapCompile pattern" $ do alloca $ \errOffset -> alloca $ \errPtr -> do nullTest' errPtr "wrapCompile errPtr" $ do pcre_ptr <- c_pcre_compile pattern flags errPtr errOffset nullPtr if pcre_ptr == nullPtr then do -- No need to use c_ptr_free in the error case (e.g. pcredemo.c) offset <- peek errOffset string <- peekCString =<< peek errPtr return (Left (fi offset,string)) else do regex <- newForeignPtr c_ptr_free pcre_ptr return . Right $ Regex regex flags e getNumSubs (Regex pcre_fptr _ _) = fi . unsafePerformIO $ withForeignPtr pcre_fptr getNumSubs' getNumSubs' :: Ptr PCRE -> IO CInt {-# INLINE getNumSubs' #-} getNumSubs' pcre_ptr = alloca $ \st -> do -- (st :: Ptr CInt) when (st == nullPtr) (fail "Text.Regex.PCRE.Wrap.getNumSubs' could not allocate a CInt!!!") ok0 <- c_pcre_fullinfo pcre_ptr nullPtr pcreInfoCapturecount st when (ok0 /= 0) (fail $ "Impossible/fatal: Haskell package regex-pcre error in Text.Posix.PCRE.Wrap.getNumSubs' of ok0 /= 0. ok0 is from pcre_fullinfo c-function which returned "++show ok0) peek st wrapTest startOffset (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapTest cstr" $ do withForeignPtr pcre_fptr $ \pcre_ptr -> do r@(ReturnCode r') <- c_pcre_exec pcre_ptr nullPtr cstr (fi len) (fi startOffset) flags nullPtr 0 if r == retNoMatch then return (Right False) else if r' < 0 then wrapRC r else return (Right True) -- | Matches a regular expression against a string -- -- Should never return (Right (Just [])) wrapMatch startOffset (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapMatch cstr" $ do withForeignPtr pcre_fptr $ \pcre_ptr -> do nsub <- getNumSubs' pcre_ptr let nsub_int :: Int nsub_int = fi nsub ovec_size :: CInt ovec_size = ((nsub + 1) * 3) -- "man pcreapi" for explanation ovec_bytes :: Int ovec_bytes = (fi ovec_size) * (#const sizeof(int)) allocaBytes ovec_bytes $ \ovec -> do nullTest ovec "wrapMatch ovec" $ do r@(ReturnCode r') <- c_pcre_exec pcre_ptr nullPtr cstr (fi len) (fi startOffset) flags ovec ovec_size if r == retNoMatch then return (Right Nothing) else if r' < 0 then wrapRC r else do let pairsSet :: Int pairsSet = if r == retNeededMoreSpace -- if r == ReturnCode 0 then nsub_int + 1 -- should not happen else fi r' -- implies pairsSet > 0 extraPairs :: [(Int,Int)] extraPairs = replicate (nsub_int + 1 - pairsSet) (unusedOffset,unusedOffset) pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0 .. ((pairsSet*2)-1)] return . Right . Just $ (pairs ++ extraPairs) -- | wrapMatchAll is an improvement over wrapMatch since it only -- allocates memory with allocaBytes once at the start. -- -- wrapMatchAll (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapMatchAll cstr" $ do withForeignPtr pcre_fptr $ \regex -> do nsub <- getNumSubs' regex let nsub_int :: Int nsub_int = fi nsub ovec_size :: CInt ovec_size = ((nsub + 1) * 3) -- "man pcreapi" for explanation ovec_bytes :: Int ovec_bytes = (fi ovec_size) * (#const sizeof(int)) clen = fi len flags' = (execNotEmpty .|. execAnchored .|. flags) allocaBytes ovec_bytes $ \ovec -> nullTest ovec "wrapMatchAll ovec" $ let loop acc flags_in_use pos = do r@(ReturnCode r') <- c_pcre_exec regex nullPtr cstr clen (fi pos) flags_in_use ovec ovec_size if r == retNoMatch then return (Right (acc [])) else if r' < 0 then wrapRC r else do let pairsSet = if r == retNeededMoreSpace then nsub_int+1 else fi r' pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0 .. ((pairsSet*2)-1)] let acc' = acc . (toMatchArray nsub_int pairs:) case pairs of [] -> return (Right (acc' [])) ((s,e):_) | s==e -> if s == len then return (Right (acc' [])) else loop acc' flags' e | otherwise -> loop acc' flags e in loop id flags 0 toMatchArray :: Int -> [(Int,Int)] -> Array Int (Int,Int) toMatchArray n pairs = accumArray (\_ (s,e) -> (s,(e-s))) (-1,0) (0,n) (zip [0..] pairs) toPairs :: [CInt] -> [(Int,Int)] toPairs [] = [] toPairs (a:b:rest) = (fi a,fi b):toPairs rest toPairs [_] = error "Should not have just one element in WrapPCRE.wrapMatchAll.toPairs" wrapCount (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapCount cstr" $ do withForeignPtr pcre_fptr $ \pcre_ptr -> do nsub <- getNumSubs' pcre_ptr let ovec_size :: CInt ovec_size = ((nsub + 1) * 3) -- "man pcreapi" for explanation ovec_bytes :: Int ovec_bytes = (fi ovec_size) * (#const sizeof(int)) clen = fi len allocaBytes ovec_bytes $ \ovec -> nullTest ovec "wrapCount ovec" $ let act pos = c_pcre_exec pcre_ptr nullPtr cstr clen (fi pos) flags ovec ovec_size loop acc pos | acc `seq` pos `seq` False = undefined | otherwise = do r@(ReturnCode r') <- act pos if r == retNoMatch then return (Right acc) else if r' < 0 then wrapRC r else do pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0,1] case pairs of [] -> return (Right (succ acc)) ((s,e):_) | s==e -> return (Right (succ acc)) | otherwise -> loop (succ acc) e in loop 0 0 getVersion = unsafePerformIO $ do version <- c_pcre_version if version == nullPtr then return (Just "pcre_version was null") else return . Just =<< peekCString version configUTF8 = unsafePerformIO $ alloca $ \ptrVal -> do -- (ptrVal :: Ptr CInt) when (ptrVal == nullPtr) (fail "Text.Regex.PCRE.Wrap.configUTF8 could not alloca CInt!!!") _unicodeSupported <- c_pcre_config pcreConfigUtf8 ptrVal {- pcre_config: The output is an integer that is set to one if UTF-8 support is available; otherwise it is set to zero. -} val <- peek ptrVal case val of (1 :: CInt) -> return True 0 -> return False _ -> return False -- should not happen foreign import ccall unsafe "pcre.h pcre_compile" c_pcre_compile :: CString -> CompOption -> Ptr CString -> Ptr CInt -> CString -> IO (Ptr PCRE) foreign import ccall unsafe "&free" c_ptr_free :: FinalizerPtr a -- FunPtr (Ptr a -> IO ()) foreign import ccall unsafe "pcre.h pcre_exec" c_pcre_exec :: Ptr PCRE -> Ptr PCRE_Extra -> CString -> CInt -> CInt -> ExecOption -> Ptr CInt -> CInt -> IO ReturnCode foreign import ccall unsafe "pcre.h pcre_fullinfo" c_pcre_fullinfo :: Ptr PCRE -> Ptr PCRE_Extra -> InfoWhat -> Ptr a -> IO CInt foreign import ccall unsafe "pcre.h pcre_version" c_pcre_version :: IO (Ptr CChar) foreign import ccall unsafe "pcre.h pcre_config" c_pcre_config :: ConfigWhat -> Ptr a -> IO CInt #enum CompOption,CompOption, \ compAnchored = PCRE_ANCHORED, \ compAutoCallout = PCRE_AUTO_CALLOUT, \ compCaseless = PCRE_CASELESS, \ compDollarEndOnly = PCRE_DOLLAR_ENDONLY, \ compDotAll = PCRE_DOTALL, \ compExtended = PCRE_EXTENDED, \ compExtra = PCRE_EXTRA, \ compFirstLine = PCRE_FIRSTLINE, \ compMultiline = PCRE_MULTILINE, \ compNoAutoCapture = PCRE_NO_AUTO_CAPTURE, \ compUngreedy = PCRE_UNGREEDY, \ compUTF8 = PCRE_UTF8, \ compNoUTF8Check = PCRE_NO_UTF8_CHECK #enum ExecOption,ExecOption, \ execAnchored = PCRE_ANCHORED, \ execNotBOL = PCRE_NOTBOL, \ execNotEOL = PCRE_NOTEOL, \ execNotEmpty = PCRE_NOTEMPTY, \ execNoUTF8Check = PCRE_NO_UTF8_CHECK, \ execPartial = PCRE_PARTIAL #enum ReturnCode,ReturnCode, \ retNoMatch = PCRE_ERROR_NOMATCH, \ retNull = PCRE_ERROR_NULL, \ retBadOption = PCRE_ERROR_BADOPTION, \ retBadMagic = PCRE_ERROR_BADMAGIC, \ retUnknownNode = PCRE_ERROR_UNKNOWN_NODE, \ retNoMemory = PCRE_ERROR_NOMEMORY, \ retNoSubstring = PCRE_ERROR_NOSUBSTRING -- Comment out most of these to avoid unused binding warnings -- PCRE_INFO_FIRSTCHAR is deprecated, use PCRE_INFO_FIRSTBYTE instead. #enum InfoWhat,InfoWhat, \ PCRE_INFO_CAPTURECOUNT {- PCRE_INFO_BACKREFMAX, \ PCRE_INFO_DEFAULT_TABLES, \ PCRE_INFO_FIRSTBYTE, \ PCRE_INFO_FIRSTCHAR, \ PCRE_INFO_FIRSTTABLE, \ PCRE_INFO_LASTLITERAL, \ PCRE_INFO_NAMECOUNT, \ PCRE_INFO_NAMEENTRYSIZE, \ PCRE_INFO_NAMETABLE, \ PCRE_INFO_OPTIONS, \ PCRE_INFO_SIZE, \ PCRE_INFO_STUDYSIZE -} #enum ConfigWhat,ConfigWhat, \ PCRE_CONFIG_UTF8 {- PCRE_CONFIG_UNICODE_PROPERTIES, \ PCRE_CONFIG_NEWLINE, \ PCRE_CONFIG_LINK_SIZE, \ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, \ PCRE_CONFIG_MATCH_LIMIT, \ PCRE_CONFIG_MATCH_LIMIT_RECURSION, \ PCRE_CONFIG_STACKRECURSE -}