regex-posix-0.96.0.0/0000755000000000000000000000000007346545000012407 5ustar0000000000000000regex-posix-0.96.0.0/ChangeLog.md0000755000000000000000000000025207346545000014562 0ustar0000000000000000See also http://pvp.haskell.org/faq ## 0.96.0.0 - Update to `regex-base-0.94.0.0` API - Compatibility with `base-4.13.0` - Remove internal regex C implementation ---- regex-posix-0.96.0.0/LICENSE0000644000000000000000000000274407346545000013423 0ustar0000000000000000This modile is under this "3 clause" BSD license: Copyright (c) 2007, Christopher Kuklewicz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. regex-posix-0.96.0.0/Setup.hs0000644000000000000000000000005607346545000014044 0ustar0000000000000000import Distribution.Simple main = defaultMain regex-posix-0.96.0.0/cbits/0000755000000000000000000000000007346545000013513 5ustar0000000000000000regex-posix-0.96.0.0/cbits/myfree.c0000644000000000000000000000027507346545000015152 0ustar0000000000000000#include #include #include "myfree.h" /* void free(void *ptr); void regfree(regex_t *preg); */ void hs_regex_regfree(void *preg) { regfree(preg); free(preg); } regex-posix-0.96.0.0/cbits/myfree.h0000755000000000000000000000003707346545000015156 0ustar0000000000000000void hs_regex_regfree(void *); regex-posix-0.96.0.0/regex-posix.cabal0000644000000000000000000000460607346545000015653 0ustar0000000000000000cabal-version: 1.12 name: regex-posix version: 0.96.0.0 build-type: Simple license: BSD3 license-file: LICENSE copyright: Copyright (c) 2007-2010, Christopher Kuklewicz author: Christopher Kuklewicz maintainer: hvr@gnu.org bug-reports: https://github.com/hvr/regex-posix synopsis: POSIX Backend for "Text.Regex" (regex-base) category: Text description: The POSIX regex backend for . . The main appeal of this backend is that it's very lightweight due to its reliance on the ubiquitous facility that is provided by the standard C library on most POSIX platforms. . See also for more information. extra-source-files: ChangeLog.md cbits/myfree.h source-repository head type: git location: https://github.com/hvr/regex-posix.git flag _regex-posix-clib manual: False default: False description: Use package library hs-source-dirs: src exposed-modules: Text.Regex.Posix Text.Regex.Posix.Wrap Text.Regex.Posix.String Text.Regex.Posix.Sequence Text.Regex.Posix.ByteString Text.Regex.Posix.ByteString.Lazy other-modules: Paths_regex_posix c-sources: cbits/myfree.c include-dirs: cbits if flag(_regex-posix-clib) build-depends: regex-posix-clib == 2.7.* else -- use POSIX.2 regex implementation from @libc@ -- However, Windows/msys2 doesn't provide a POSIX.2 regex impl in its @libc@ if os(windows) build-depends: base<0 default-language: Haskell2010 default-extensions: MultiParamTypeClasses FunctionalDependencies ForeignFunctionInterface GeneralizedNewtypeDeriving FlexibleContexts TypeSynonymInstances FlexibleInstances build-depends: regex-base == 0.94.* , base >= 4.3 && < 4.14 , containers >= 0.4 && < 0.7 , bytestring >= 0.9 && < 0.11 , array >= 0.3 && < 0.6 if !impl(ghc >= 8) build-depends: fail == 4.9.* ghc-options: -O2 -Wall -fno-warn-unused-imports regex-posix-0.96.0.0/src/Text/Regex/0000755000000000000000000000000007346545000015174 5ustar0000000000000000regex-posix-0.96.0.0/src/Text/Regex/Posix.hs0000644000000000000000000000656207346545000016643 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- -- Module : Text.Regex.Posix -- Copyright : (c) Chris Kuklewicz 2006 -- SPDX-License-Identifier: BSD-3-Clause -- -- Maintainer : hvr@gnu.org -- Stability : experimental -- Portability : non-portable (regex-base needs MPTC+FD) -- -- Module that provides the Regex backend that wraps the -- . -- This is the backend being used by the package to replace -- "Text.Regex". -- -- The "Text.Regex.Posix" module provides a backend for regular -- expressions. If you import this along with other backends, then -- you should do so with qualified imports, perhaps renamed for -- convenience. -- -- If the '=~' and '=~~' functions are too high level, you can use the -- compile, regexec, and execute functions from importing either -- "Text.Regex.Posix.String" or "Text.Regex.Posix.ByteString". If you -- want to use a low-level 'Foreign.C.CString' interface to the library, -- then import "Text.Regex.Posix.Wrap" and use the wrap* functions. -- -- This module is only efficient with 'Data.ByteString.ByteString' only -- if it is null terminated, i.e. @(Bytestring.last bs)==0@. Otherwise the -- library must make a temporary copy of the 'Data.ByteString.ByteString' -- and append the @NUL@ byte. -- -- A 'String' will be converted into a 'Foreign.C.CString' for processing. -- Doing this repeatedly will be very inefficient. -- -- Note that the posix library works with single byte characters, and -- does not understand Unicode. If you need Unicode support you will -- have to use a different backend. -- -- When offsets are reported for subexpression captures, a subexpression -- that did not match anything (as opposed to matching an empty string) -- will have its offset set to the 'unusedRegOffset' value, which is @(-1)@. -- -- Benchmarking shows the default regex library on many platforms is very -- inefficient. You might increase performace by an order of magnitude -- by obtaining @libpcre@ and -- or @libtre@ and . If you -- do not need the captured substrings then you can also get great -- performance from . If you do need the capture substrings -- then you may be able to use to improve performance. ----------------------------------------------------------------------------- module Text.Regex.Posix(getVersion_Text_Regex_Posix ,module Text.Regex.Base -- ** Wrap, for '=~' and '=~~', types and constants ,module Text.Regex.Posix.Wrap) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail) import Text.Regex.Posix.Wrap(Regex, CompOption(CompOption), ExecOption(ExecOption), (=~), (=~~), unusedRegOffset, compBlank, compExtended, compIgnoreCase, compNoSub, compNewline, execBlank, execNotBOL, execNotEOL) import Text.Regex.Posix.String() import Text.Regex.Posix.Sequence() import Text.Regex.Posix.ByteString() import Text.Regex.Posix.ByteString.Lazy() import Data.Version(Version(..)) import Text.Regex.Base import qualified Paths_regex_posix getVersion_Text_Regex_Posix :: Version getVersion_Text_Regex_Posix = Paths_regex_posix.version regex-posix-0.96.0.0/src/Text/Regex/Posix/0000755000000000000000000000000007346545000016276 5ustar0000000000000000regex-posix-0.96.0.0/src/Text/Regex/Posix/ByteString.hs0000644000000000000000000001333307346545000020727 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex.Posix.ByteString -- Copyright : (c) Chris Kuklewicz 2006 -- SPDX-License-Identifier: BSD-3-Clause -- -- Maintainer : hvr@gnu.org -- Stability : experimental -- Portability : non-portable (regex-base needs MPTC+FD) -- -- This provides 'ByteString' instances for RegexMaker and RegexLike -- based on "Text.Regex.Posix.Wrap", and a (RegexContext Regex -- ByteString ByteString) instance. -- -- To use these instance, you would normally import -- "Text.Regex.Posix". You only need to import this module to use -- the medium level API of the compile, regexec, and execute -- functions. All of these report error by returning Left values -- instead of undefined or error or fail. -- -- The ByteString will only be passed to the library efficiently (as a -- pointer) if it ends in a NUL byte. Otherwise a temporary copy must -- be made with the 0 byte appended. ----------------------------------------------------------------------------- module Text.Regex.Posix.ByteString( -- ** Types Regex, MatchOffset, MatchLength, ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, -- ** Medium level API functions compile, execute, regexec, -- ** Compilation options CompOption(CompOption), compBlank, compExtended, -- use extended regex syntax compIgnoreCase, -- ignore case when matching compNoSub, -- no substring matching needed compNewline, -- '.' doesn't match newline -- ** Execution options ExecOption(ExecOption), execBlank, execNotBOL, -- not at begining of line execNotEOL -- not at end of line ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Data.Array(Array,listArray) import Data.ByteString(ByteString) import qualified Data.ByteString as B(empty,useAsCString,last,take,drop,null) import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexMaker(..),RegexContext(..),RegexLike(..),MatchOffset,MatchLength) import Text.Regex.Posix.Wrap -- all import Text.Regex.Base.Impl(polymatch,polymatchM) import Foreign.C.String(CString) instance RegexContext Regex ByteString ByteString where match = polymatch matchM = polymatchM unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.Posix.ByteString died: "++ show err) Right v -> return v instance RegexMaker Regex CompOption ExecOption ByteString where makeRegexOpts c e pattern = unsafePerformIO $ compile c e pattern >>= unwrap makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ compile c e pattern instance RegexLike Regex ByteString where matchTest regex bs = unsafePerformIO $ asCString bs (wrapTest regex) >>= unwrap matchOnce regex bs = unsafePerformIO $ execute regex bs >>= unwrap matchAll regex bs = unsafePerformIO $ asCString bs (wrapMatchAll regex) >>= unwrap matchCount regex bs = unsafePerformIO $ asCString bs (wrapCount regex) >>= unwrap -- --------------------------------------------------------------------- -- | Compiles a regular expression -- compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> ByteString -- ^ The regular expression to compile -> IO (Either WrapError Regex) -- ^ Returns: the compiled regular expression compile c e pattern = asCString pattern (wrapCompile c e) -- --------------------------------------------------------------------- -- | Matches a regular expression against a buffer, returning the buffer -- indicies of the match, and any submatches -- -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute regex bs = do maybeStartEnd <- asCString bs (wrapMatch regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> ... Right (Just parts) -> return . Right . Just . listArray (0,pred (length parts)) . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts Left err -> return (Left err) regexec :: Regex -- ^ Compiled regular expression -> ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (ByteString, ByteString, ByteString, [ByteString]))) regexec regex bs = do let getSub (start,stop) | start == unusedRegOffset = B.empty | otherwise = B.take (fi (stop-start)) . B.drop (fi start) $ bs matchedParts [] = (B.empty,B.empty,bs,[]) -- no information matchedParts (matchedStartStop@(start,stop):subStartStop) = (B.take (fi start) bs ,getSub matchedStartStop ,B.drop (fi stop) bs ,map getSub subStartStop) maybeStartEnd <- asCString bs (wrapMatch regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> ... Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) unusedOffset :: Int unusedOffset = fromIntegral unusedRegOffset fi :: (Integral i,Num n) => i->n fi = fromIntegral asCString :: ByteString -> (CString -> IO a) -> IO a asCString bs = if (not (B.null bs)) && (0==B.last bs) then B.unsafeUseAsCString bs else B.useAsCString bs regex-posix-0.96.0.0/src/Text/Regex/Posix/ByteString/0000755000000000000000000000000007346545000020370 5ustar0000000000000000regex-posix-0.96.0.0/src/Text/Regex/Posix/ByteString/Lazy.hs0000644000000000000000000001306307346545000021646 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex.Posix.ByteString.Lazy -- Copyright : (c) Chris Kuklewicz 2007 -- SPDX-License-Identifier: BSD-3-Clause -- -- Maintainer : hvr@gnu.org -- Stability : experimental -- Portability : non-portable (regex-base needs MPTC+FD) -- -- This provides 'ByteString.Lazy' instances for RegexMaker and RegexLike -- based on "Text.Regex.Posix.Wrap", and a (RegexContext Regex -- ByteString ByteString) instance. -- -- To use these instance, you would normally import -- "Text.Regex.Posix". You only need to import this module to use -- the medium level API of the compile, regexec, and execute -- functions. All of these report error by returning Left values -- instead of undefined or error or fail. -- -- A Lazy ByteString with more than one chunk cannot be be passed to -- the library efficiently (as a pointer). It will have to converted -- via a full copy to a temporary normal bytestring (with a null byte -- appended if necessary). ----------------------------------------------------------------------------- module Text.Regex.Posix.ByteString.Lazy( -- ** Types Regex, MatchOffset, MatchLength, ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, -- ** Medium level API functions compile, execute, regexec, -- ** Compilation options CompOption(CompOption), compBlank, compExtended, -- use extended regex syntax compIgnoreCase, -- ignore case when matching compNoSub, -- no substring matching needed compNewline, -- '.' doesn't match newline -- ** Execution options ExecOption(ExecOption), execBlank, execNotBOL, -- not at begining of line execNotEOL -- not at end of line ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Data.Array(Array) import qualified Data.ByteString.Lazy as L (ByteString,null,toChunks,fromChunks,last,snoc) import qualified Data.ByteString as B(ByteString,concat) import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexMaker(..),RegexContext(..),RegexLike(..),MatchOffset,MatchLength) import Text.Regex.Posix.Wrap -- all import qualified Text.Regex.Posix.ByteString as BS(execute,regexec) import Text.Regex.Base.Impl(polymatch,polymatchM) import Foreign.C.String(CString) instance RegexContext Regex L.ByteString L.ByteString where match = polymatch matchM = polymatchM fromLazy :: L.ByteString -> B.ByteString fromLazy = B.concat . L.toChunks toLazy :: B.ByteString -> L.ByteString toLazy = L.fromChunks . return unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.Posix.ByteString.Lazy died: "++ show err) Right v -> return v {-# INLINE asCString #-} asCString :: L.ByteString -> (CString -> IO a) -> IO a asCString s = if (not (L.null s)) && (0==L.last s) then B.unsafeUseAsCString (fromLazy s) else B.unsafeUseAsCString (fromLazy (L.snoc s 0)) instance RegexMaker Regex CompOption ExecOption L.ByteString where makeRegexOpts c e pattern = unsafePerformIO $ compile c e pattern >>= unwrap makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ compile c e pattern instance RegexLike Regex L.ByteString where matchTest regex bs = unsafePerformIO $ asCString bs (wrapTest regex) >>= unwrap matchOnce regex bs = unsafePerformIO $ execute regex bs >>= unwrap matchAll regex bs = unsafePerformIO $ asCString bs (wrapMatchAll regex) >>= unwrap matchCount regex bs = unsafePerformIO $ asCString bs (wrapCount regex) >>= unwrap -- --------------------------------------------------------------------- -- | Compiles a regular expression -- compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> L.ByteString -- ^ The regular expression to compile -> IO (Either WrapError Regex) -- ^ Returns: the compiled regular expression compile c e pattern = asCString pattern (wrapCompile c e) -- --------------------------------------------------------------------- -- | Matches a regular expression against a buffer, returning the buffer -- indicies of the match, and any submatches -- -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> L.ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute regex bs = if (not (L.null bs)) && (0==L.last bs) then BS.execute regex (fromLazy bs) else BS.execute regex (fromLazy (L.snoc bs 0)) regexec :: Regex -- ^ Compiled regular expression -> L.ByteString -- ^ String to match against -> IO (Either WrapError (Maybe (L.ByteString, L.ByteString, L.ByteString, [L.ByteString]))) regexec regex bs = do x <- if (not (L.null bs)) && (0==L.last bs) then BS.regexec regex (fromLazy bs) else BS.regexec regex (fromLazy (L.snoc bs 0)) return $ case x of Left e -> Left e Right Nothing -> Right Nothing Right (Just (a,b,c,ds)) -> Right (Just (toLazy a,toLazy b,toLazy c,map toLazy ds)) unusedOffset :: Int unusedOffset = fromIntegral unusedRegOffset regex-posix-0.96.0.0/src/Text/Regex/Posix/Sequence.hs0000644000000000000000000001442607346545000020411 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex.Posix.Sequence -- Copyright : (c) Chris Kuklewicz 2006 -- SPDX-License-Identifier: BSD-3-Clause -- -- Maintainer : hvr@gnu.org -- Stability : experimental -- Portability : non-portable (regex-base needs MPTC+FD) -- -- This provides 'String' instances for 'RegexMaker' and 'RegexLike' based -- on "Text.Regex.Posix.Wrap", and a ('RegexContext' 'Regex' 'String' 'String') -- instance. -- -- To use these instance, you would normally import -- "Text.Regex.Posix". You only need to import this module to use -- the medium level API of the compile, regexec, and execute -- functions. All of these report error by returning Left values -- instead of undefined or error or fail. -- ----------------------------------------------------------------------------- module Text.Regex.Posix.Sequence( -- ** Types Regex, MatchOffset, MatchLength, ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, -- ** Medium level API functions compile, regexec, execute, -- ** Compilation options CompOption(CompOption), compBlank, compExtended, -- use extended regex syntax compIgnoreCase, -- ignore case when matching compNoSub, -- no substring matching needed compNewline, -- '.' doesn't match newline ExecOption(ExecOption), execBlank, execNotBOL, -- not at begining of line execNotEOL -- not at end of line ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Data.Array(listArray, Array) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength,Extract(..)) import Text.Regex.Posix.Wrap import Text.Regex.Base.Impl(polymatch,polymatchM) import Data.Sequence as S hiding (length) import qualified Data.Sequence as S (length) import Foreign.C.String import Foreign.Marshal.Array import Foreign.Marshal.Alloc import Foreign.Storable instance RegexContext Regex (Seq Char) (Seq Char) where match = polymatch matchM = polymatchM unusedOffset :: Int unusedOffset = fromIntegral unusedRegOffset unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.Posix.Sequence died: "++ show err) Right v -> return v instance RegexMaker Regex CompOption ExecOption (Seq Char) where makeRegexOpts c e pattern = unsafePerformIO $ (compile c e pattern >>= unwrap) makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ (compile c e pattern) instance RegexLike Regex (Seq Char) where matchTest regex str = unsafePerformIO $ do withSeq str (wrapTest regex) >>= unwrap matchOnce regex str = unsafePerformIO $ execute regex str >>= unwrap matchAll regex str = unsafePerformIO $ withSeq str (wrapMatchAll regex) >>= unwrap matchCount regex str = unsafePerformIO $ withSeq str (wrapCount regex) >>= unwrap -- compile compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> (Seq Char) -- ^ The regular expression to compile (ASCII only, no null bytes) -> IO (Either WrapError Regex) -- ^ Returns: the compiled regular expression compile flags e pattern = withSeq pattern (wrapCompile flags e) -- ----------------------------------------------------------------------------- -- regexec -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> (Seq Char) -- ^ (Seq Char) to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- -- @ -- 'Just' (array of offset length pairs) -- @ execute regex str = do maybeStartEnd <- withSeq str (wrapMatch regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> fail "got [] back!" -- return wierd array instead Right (Just parts) -> return . Right . Just . listArray (0,pred (length parts)) . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts Left err -> return (Left err) -- ----------------------------------------------------------------------------- -- regexec -- | Matches a regular expression against a string regexec :: Regex -- ^ Compiled regular expression -> (Seq Char) -- ^ (Seq Char) to match against -> IO (Either WrapError (Maybe ((Seq Char), (Seq Char), (Seq Char), [(Seq Char)]))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- -- @ -- 'Just' (everything before match, -- matched portion, -- everything after match, -- subexpression matches) -- @ regexec regex str = do let getSub :: (RegOffset,RegOffset) -> (Seq Char) getSub (start,stop) | start == unusedRegOffset = S.empty | otherwise = extract (fromEnum start,fromEnum $ stop-start) $ str matchedParts :: [(RegOffset,RegOffset)] -> ((Seq Char), (Seq Char), (Seq Char), [(Seq Char)]) matchedParts [] = (str,S.empty,S.empty,[]) -- no information matchedParts (matchedStartStop@(start,stop):subStartStop) = (before (fromEnum start) str ,getSub matchedStartStop ,after (fromEnum stop) str ,map getSub subStartStop) maybeStartEnd <- withSeq str (wrapMatch regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) withSeq :: Seq Char -> (CString -> IO a) -> IO a withSeq s f = let -- Ensure null at end of s s' = case viewr s of -- bang !s EmptyR -> singleton '\0' _ :> '\0' -> s _ -> s |> '\0' pokes p a = case viewl a of -- bang pokes !p !a EmptyL -> return () c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a' in allocaBytes (S.length s') (\ptr -> pokes ptr s' >> f ptr) regex-posix-0.96.0.0/src/Text/Regex/Posix/String.hs0000644000000000000000000001276207346545000020110 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex.Posix.String -- Copyright : (c) Chris Kuklewicz 2006 -- SPDX-License-Identifier: BSD-3-Clause -- -- Maintainer : hvr@gnu.org -- Stability : experimental -- Portability : non-portable (regex-base needs MPTC+FD) -- -- This provides 'String' instances for 'RegexMaker' and 'RegexLike' based -- on "Text.Regex.Posix.Wrap", and a ('RegexContext' 'Regex' 'String' 'String') -- instance. -- -- To use these instance, you would normally import -- "Text.Regex.Posix". You only need to import this module to use -- the medium level API of the compile, regexec, and execute -- functions. All of these report error by returning Left values -- instead of undefined or error or fail. -- ----------------------------------------------------------------------------- module Text.Regex.Posix.String( -- ** Types Regex, MatchOffset, MatchLength, ReturnCode, WrapError, -- ** Miscellaneous unusedOffset, -- ** Medium level API functions compile, regexec, execute, -- ** Compilation options CompOption(CompOption), compBlank, compExtended, -- use extended regex syntax compIgnoreCase, -- ignore case when matching compNoSub, -- no substring matching needed compNewline, -- '.' doesn't match newline -- ** Execution options ExecOption(ExecOption), execBlank, execNotBOL, -- not at begining of line execNotEOL -- not at end of line ) where import Prelude hiding (fail) import Control.Monad.Fail (MonadFail(fail)) import Data.Array(listArray, Array) import Data.List(genericDrop, genericTake) import Foreign.C.String(withCAString) import System.IO.Unsafe(unsafePerformIO) import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength) import Text.Regex.Posix.Wrap import Text.Regex.Base.Impl(polymatch,polymatchM) instance RegexContext Regex String String where match = polymatch matchM = polymatchM unusedOffset :: Int unusedOffset = fromIntegral unusedRegOffset unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left err -> fail ("Text.Regex.Posix.String died: "++ show err) Right v -> return v instance RegexMaker Regex CompOption ExecOption String where makeRegexOpts c e pattern = unsafePerformIO $ (compile c e pattern >>= unwrap) makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ (compile c e pattern) instance RegexLike Regex String where matchTest regex str = unsafePerformIO $ do withCAString str (wrapTest regex) >>= unwrap matchOnce regex str = unsafePerformIO $ execute regex str >>= unwrap matchAll regex str = unsafePerformIO $ withCAString str (wrapMatchAll regex) >>= unwrap matchCount regex str = unsafePerformIO $ withCAString str (wrapCount regex) >>= unwrap -- compile compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> String -- ^ The regular expression to compile (ASCII only, no null bytes) -> IO (Either WrapError Regex) -- ^ Returns: the compiled regular expression compile flags e pattern = withCAString pattern (wrapCompile flags e) -- ----------------------------------------------------------------------------- -- regexec -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> String -- ^ String to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- -- @ -- 'Just' (array of offset length pairs) -- @ execute regex str = do maybeStartEnd <- withCAString str (wrapMatch regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) -- Right (Just []) -> fail "got [] back!" -- return wierd array instead Right (Just parts) -> return . Right . Just . listArray (0,pred (length parts)) . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts Left err -> return (Left err) -- ----------------------------------------------------------------------------- -- regexec -- | Matches a regular expression against a string regexec :: Regex -- ^ Compiled regular expression -> String -- ^ String to match against -> IO (Either WrapError (Maybe (String, String, String, [String]))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- -- @ -- 'Just' (everything before match, -- matched portion, -- everything after match, -- subexpression matches) -- @ regexec regex str = do let getSub (start,stop) | start == unusedRegOffset = "" | otherwise = genericTake (stop-start) . genericDrop start $ str matchedParts [] = (str,"","",[]) -- no information matchedParts (matchedStartStop@(start,stop):subStartStop) = (genericTake start str ,getSub matchedStartStop ,genericDrop stop str ,map getSub subStartStop) maybeStartEnd <- withCAString str (wrapMatch regex) case maybeStartEnd of Right Nothing -> return (Right Nothing) Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) regex-posix-0.96.0.0/src/Text/Regex/Posix/Wrap.hsc0000644000000000000000000005363107346545000017716 0ustar0000000000000000{-# OPTIONS_GHC -Wall -fno-warn-unused-imports #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Regex.Posix.Wrap -- Copyright : (c) Chris Kuklewicz 2006,2007,2008 derived from (c) The University of Glasgow 2002 -- SPDX-License-Identifier: BSD-3-Clause -- -- Maintainer : hvr@gnu.org -- Stability : experimental -- Portability : non-portable (regex-base needs MPTC+FD) -- -- WrapPosix.hsc exports a wrapped version of the ffi imports. To -- increase type safety, the flags are newtype'd. The other important -- export is a 'Regex' type that is specific to the Posix library -- backend. The flags are documented in "Text.Regex.Posix". The -- 'defaultCompOpt' is @(compExtended .|. compNewline)@. -- -- The 'Regex', 'CompOption', and 'ExecOption' types and their 'RegexOptions' -- instance is declared. The '=~' and '=~~' convenience functions are -- defined. -- -- This module will fail or error only if allocation fails or a nullPtr -- is passed in. -- -- 2009-January : wrapMatchAll and wrapCount now adjust the execution -- option execNotBOL after the first result to take into account '\n' -- in the text immediately before the next matches. (version 0.93.3) -- -- 2009-January : wrapMatchAll and wrapCount have been changed to -- return all non-overlapping matches, including empty matches even if -- they coincide with the end of the previous non-empty match. The -- change is that the first non-empty match no longer terminates the -- search. One can filter the results to obtain the old behavior or -- to obtain the behavior of "sed", where "sed" eliminates the empty -- matches which coincide with the end of non-empty matches. (version -- 0.94.0) ----------------------------------------------------------------------------- module Text.Regex.Posix.Wrap( -- ** High-level API Regex, RegOffset, RegOffsetT, (=~), (=~~), -- ** Low-level API WrapError, wrapCompile, wrapTest, wrapMatch, wrapMatchAll, wrapCount, -- ** Miscellaneous unusedRegOffset, -- ** Compilation options CompOption(CompOption), compBlank, compExtended, -- use extended regex syntax compIgnoreCase, -- ignore case when matching compNoSub, -- no substring matching needed compNewline, -- '.' doesn't match newline -- ** Execution options ExecOption(ExecOption), execBlank, execNotBOL, -- not at begining of line execNotEOL, -- not at end of line -- ** Return codes ReturnCode(ReturnCode), retBadbr, retBadpat, retBadrpt, retEcollate, retEctype, retEescape, retEsubreg, retEbrack, retEparen, retEbrace, retErange, retEspace ) where #include #include #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 1 #endif #include #include "myfree.h" import Prelude hiding (fail) import Control.Monad.Fail (MonadFail) import Control.Monad(liftM) import Data.Array(Array,listArray) import Data.Bits(Bits(..)) import Data.Int(Int32,Int64) -- need whatever RegeOffset or #regoff_t type will be import Data.Word(Word32,Word64) -- need whatever RegeOffset or #regoff_t type will be import Foreign(Ptr, FunPtr, nullPtr, newForeignPtr, addForeignPtrFinalizer, Storable(peekByteOff), allocaArray, allocaBytes, withForeignPtr,ForeignPtr,plusPtr,peekElemOff) import Foreign.Marshal.Alloc(mallocBytes) import Foreign.C(CChar) #if __GLASGOW_HASKELL__ >= 703 import Foreign.C(CSize(CSize),CInt(CInt)) #else import Foreign.C(CSize,CInt) #endif import Foreign.C.String(peekCAString, CString) import Text.Regex.Base.RegexLike(RegexOptions(..),RegexMaker(..),RegexContext(..),MatchArray) -- deprecated: import qualified System.IO.Error as IOERROR(try) import qualified Control.Exception(try,IOException) try :: IO a -> IO (Either Control.Exception.IOException a) try = Control.Exception.try data CRegex -- pointer tag for regex_t C type -- | RegOffset is "typedef int regoff_t" on Linux and ultimately "typedef -- long long __int64_t" on Max OS X. So rather than saying -- 2,147,483,647 is all the length you need, I'll take the larger: -- 9,223,372,036,854,775,807 should be enough bytes for anyone, no -- need for Integer. The alternative is to compile to different sizes -- in a platform dependent manner with "type RegOffset = (#type -- regoff_t)", which I do not want to do. -- -- There is also a special value 'unusedRegOffset' :: 'RegOffset' which is -- (-1) and as a starting index means that the subgroup capture was -- unused. Otherwise the RegOffset indicates a character boundary that -- is before the character at that index offset, with the first -- character at index offset 0. So starting at 1 and ending at 2 means -- to take only the second character. type RegOffset = Int64 --debugging 64-bit ubuntu type RegOffsetT = (#type regoff_t) -- | A bitmapped 'CInt' containing options for compilation of regular -- expressions. Option values (and their man 3 regcomp names) are -- -- * 'compBlank' which is a completely zero value for all the flags. -- This is also the 'blankCompOpt' value. -- -- * 'compExtended' (REG_EXTENDED) which can be set to use extended instead -- of basic regular expressions. -- This is set in the 'defaultCompOpt' value. -- -- * 'compNewline' (REG_NEWLINE) turns on newline sensitivity: The dot (.) -- and inverted set @[^ ]@ never match newline, and ^ and $ anchors do -- match after and before newlines. -- This is set in the 'defaultCompOpt' value. -- -- * 'compIgnoreCase' (REG_ICASE) which can be set to match ignoring upper -- and lower distinctions. -- -- * 'compNoSub' (REG_NOSUB) which turns off all information from matching -- except whether a match exists. newtype CompOption = CompOption CInt deriving (Eq,Show,Num,Bits) -- | A bitmapped 'CInt' containing options for execution of compiled -- regular expressions. Option values (and their man 3 regexec names) are -- -- * 'execBlank' which is a complete zero value for all the flags. This is -- the blankExecOpt value. -- -- * 'execNotBOL' (REG_NOTBOL) can be set to prevent ^ from matching at the -- start of the input. -- -- * 'execNotEOL' (REG_NOTEOL) can be set to prevent $ from matching at the -- end of the input (before the terminating NUL). newtype ExecOption = ExecOption CInt deriving (Eq,Show,Num,Bits) -- | ReturnCode is an enumerated 'CInt', corresponding to the error codes -- from @man 3 regex@: -- -- * 'retBadbr' (@REG_BADBR@) invalid repetition count(s) in @{ }@ -- -- * 'retBadpat' (@REG_BADPAT@) invalid regular expression -- -- * 'retBadrpt' (@REG_BADRPT@) @?@, @*@, or @+@ operand invalid -- -- * 'retEcollate' (@REG_ECOLLATE@) invalid collating element -- -- * 'retEctype' (@REG_ECTYPE@) invalid character class -- -- * 'retEescape' (@REG_EESCAPE@) @\\@ applied to unescapable character -- -- * 'retEsubreg' (@REG_ESUBREG@) invalid backreference number -- -- * 'retEbrack' (@REG_EBRACK@) brackets @[ ]@ not balanced -- -- * 'retEparen' (@REG_EPAREN@) parentheses @( )@ not balanced -- -- * 'retEbrace' (@REG_EBRACE@) braces @{ }@ not balanced -- -- * 'retErange' (@REG_ERANGE@) invalid character range in @[ ]@ -- -- * 'retEspace' (@REG_ESPACE@) ran out of memory -- -- * 'retNoMatch' (@REG_NOMATCH@) The regexec() function failed to match -- newtype ReturnCode = ReturnCode CInt deriving (Eq,Show) -- | A compiled regular expression. data Regex = Regex (ForeignPtr CRegex) CompOption ExecOption -- | A completely zero value for all the flags. -- This is also the 'blankCompOpt' value. compBlank :: CompOption compBlank = CompOption 0 -- | A completely zero value for all the flags. -- This is also the 'blankExecOpt' value. execBlank :: ExecOption execBlank = ExecOption 0 unusedRegOffset :: RegOffset unusedRegOffset = (-1) -- | The return code will be retOk when it is the Haskell wrapper and -- not the underlying library generating the error message. type WrapError = (ReturnCode,String) wrapCompile :: CompOption -- ^ Flags (bitmapped) -> ExecOption -- ^ Flags (bitmapped) -> CString -- ^ The regular expression to compile (ASCII only, no null bytes) -> IO (Either WrapError Regex) -- ^ Returns: the compiled regular expression wrapTest :: Regex -> CString -> IO (Either WrapError Bool) -- | wrapMatch returns offsets for the begin and end of each capture. -- Unused captures have offsets of unusedRegOffset which is (-1) wrapMatch :: Regex -> CString -> IO (Either WrapError (Maybe [(RegOffset,RegOffset)])) -- | wrapMatchAll returns the offset and length of each capture. -- Unused captures have an offset of unusedRegOffset which is (-1) and -- length of 0. wrapMatchAll :: Regex -> CString -> IO (Either WrapError [MatchArray]) wrapCount :: Regex -> CString -> IO (Either WrapError Int) (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,MonadFail m) => source1 -> source -> m target instance RegexOptions Regex CompOption ExecOption where blankCompOpt = compBlank blankExecOpt = execBlank defaultCompOpt = compExtended .|. compNewline defaultExecOpt = execBlank setExecOpts e' (Regex r c _) = Regex r c e' getExecOpts (Regex _ _ e) = e -- (=~) ::(RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex make = makeRegex in match (make r) x -- (=~~) ::(RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,MonadFail m) => source1 -> source -> m target (=~~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex make = makeRegex in matchM (make r) x type CRegMatch = () -- dummy regmatch_t used below to read out so and eo values -- ----------------------------------------------------------------------------- -- The POSIX regex C interface -- string.h foreign import ccall unsafe "memset" c_memset :: Ptr CRegex -> CInt -> CSize -> IO (Ptr CRegex) -- cbits/myfree.h and cbits/myfree.c foreign import ccall unsafe "&hs_regex_regfree" c_myregfree :: FunPtr (Ptr CRegex -> IO ()) foreign import ccall unsafe "regex.h regcomp" c_regcomp :: Ptr CRegex -> CString -> CompOption -> IO ReturnCode {- currently unused foreign import ccall unsafe "regex.h ®free" c_regfree :: FunPtr (Ptr CRegex -> IO ()) -} foreign import ccall unsafe "regex.h regexec" c_regexec :: Ptr CRegex -> CString -> CSize -> Ptr CRegMatch -> ExecOption -> IO ReturnCode foreign import ccall unsafe "regex.h regerror" c_regerror :: ReturnCode -> Ptr CRegex -> CString -> CSize -> IO CSize retOk :: ReturnCode retOk = ReturnCode 0 -- Flags for regexec #enum ExecOption,ExecOption, \ execNotBOL = REG_NOTBOL, \ execNotEOL = REG_NOTEOL -- Flags for regcomp #enum CompOption,CompOption, \ compExtended = REG_EXTENDED, \ compIgnoreCase = REG_ICASE, \ compNoSub = REG_NOSUB, \ compNewline = REG_NEWLINE -- Return values from regexec (REG_NOMATCH, REG_ESPACE,...) -- Error codes from regcomp (not REG_NOMATCH) -- Though calling retNoMatch an error is rather missing the point... #enum ReturnCode,ReturnCode, \ retNoMatch = REG_NOMATCH, \ retBadbr = REG_BADBR, \ retBadpat = REG_BADPAT, \ retBadrpt = REG_BADRPT, \ retEcollate = REG_ECOLLATE, \ retEctype = REG_ECTYPE, \ retEescape = REG_EESCAPE, \ retEsubreg = REG_ESUBREG, \ retEbrack = REG_EBRACK, \ retEparen = REG_EPAREN, \ retEbrace = REG_EBRACE, \ retErange = REG_ERANGE, \ retEspace = REG_ESPACE ---- -- error helpers nullTest :: Ptr a -> String -> IO (Either WrapError b) -> IO (Either WrapError b) {-# INLINE nullTest #-} nullTest ptr msg io = do if nullPtr == ptr then return (Left (retOk,"Ptr parameter was nullPtr in Text.Regex.TRE.Wrap."++msg)) else io isNewline,isNull :: Ptr CChar -> Int -> IO Bool isNewline cstr pos = liftM (newline ==) (peekElemOff cstr pos) where newline = toEnum 10 isNull cstr pos = liftM (nullChar ==) (peekElemOff cstr pos) where nullChar = toEnum 0 {- wrapRC :: ReturnCode -> IO (Either WrapError b) {-# INLINE wrapRC #-} wrapRC r = return (Left (r,"Error in Text.Regex.Posix.Wrap: "++show r)) -} wrapError :: ReturnCode -> Ptr CRegex -> IO (Either WrapError b) wrapError errCode regex_ptr = do -- Call once to compute the error message buffer size errBufSize <- c_regerror errCode regex_ptr nullPtr 0 -- Allocate a temporary buffer to hold the error message allocaArray (fromIntegral errBufSize) $ \errBuf -> do nullTest errBuf "wrapError errBuf" $ do _ <- c_regerror errCode regex_ptr errBuf errBufSize msg <- peekCAString errBuf :: IO String return (Left (errCode, msg)) ---------- wrapCompile flags e pattern = do nullTest pattern "wrapCompile pattern" $ do e_regex_ptr <- try $ mallocBytes (#const sizeof(regex_t)) -- ioError called if nullPtr case e_regex_ptr of Left ioerror -> return (Left (retOk,"Text.Regex.Posix.Wrap.wrapCompile: IOError from mallocBytes(regex_t) : "++show ioerror)) Right raw_regex_ptr -> do zero_regex_ptr <- c_memset raw_regex_ptr 0 (#const sizeof(regex_t)) -- no calloc, so clear the new area to zero regex_fptr <- newForeignPtr c_myregfree zero_regex_ptr -- once pointed-to area is clear it should be safe to add finalizer withForeignPtr regex_fptr $ \regex_ptr -> do -- withForeignPtr is best hygiene here errCode <- c_regcomp regex_ptr pattern flags if (errCode == retOk) then return . Right $ Regex regex_fptr flags e else wrapError errCode regex_ptr --------- wrapTest (Regex regex_fptr _ flags) cstr = do nullTest cstr "wrapTest" $ do withForeignPtr regex_fptr $ \regex_ptr -> do r <- c_regexec regex_ptr cstr 0 nullPtr flags if r == retOk then return (Right True) else if r == retNoMatch then return (Right False) else wrapError r regex_ptr --------- wrapMatch regex@(Regex regex_fptr compileOptions flags) cstr = do nullTest cstr "wrapMatch cstr" $ do if (0 /= compNoSub .&. compileOptions) then do r <- wrapTest regex cstr case r of Right True -> return (Right (Just [])) -- Source of much "wtf?" crap Right False -> return (Right Nothing) Left err -> return (Left err) else do withForeignPtr regex_fptr $ \regex_ptr -> do nsub <- (#peek regex_t, re_nsub) regex_ptr :: IO CSize let nsub_int,nsub_bytes :: Int nsub_int = fromIntegral nsub nsub_bytes = ((1 + nsub_int) * (#const sizeof(regmatch_t))) -- add one because index zero covers the whole match allocaBytes nsub_bytes $ \p_match -> do nullTest p_match "wrapMatch allocaBytes" $ do doMatch regex_ptr cstr nsub p_match flags -- Very very thin wrapper -- Requires, but does not check, that nsub>=0 -- Cannot return (Right (Just [])) doMatch :: Ptr CRegex -> CString -> CSize -> Ptr CRegMatch -> ExecOption -> IO (Either WrapError (Maybe [(RegOffset,RegOffset)])) {-# INLINE doMatch #-} doMatch regex_ptr cstr nsub p_match flags = do r <- c_regexec regex_ptr cstr (1 + nsub) p_match flags if r == retOk then do regions <- mapM getOffsets . take (1+fromIntegral nsub) . iterate (`plusPtr` (#const sizeof(regmatch_t))) $ p_match return (Right (Just regions)) -- regions will not be [] else if r == retNoMatch then return (Right Nothing) else wrapError r regex_ptr where getOffsets :: Ptr CRegMatch -> IO (RegOffset,RegOffset) {-# INLINE getOffsets #-} getOffsets pmatch' = do start <- (#peek regmatch_t, rm_so) pmatch' :: IO (#type regoff_t) end <- (#peek regmatch_t, rm_eo) pmatch' :: IO (#type regoff_t) return (fromIntegral start,fromIntegral end) wrapMatchAll regex@(Regex regex_fptr compileOptions flags) cstr = do nullTest cstr "wrapMatchAll cstr" $ do if (0 /= compNoSub .&. compileOptions) then do r <- wrapTest regex cstr case r of Right True -> return (Right [(toMA 0 [])]) -- Source of much "wtf?" crap Right False -> return (Right []) Left err -> return (Left err) else do withForeignPtr regex_fptr $ \regex_ptr -> do nsub <- (#peek regex_t, re_nsub) regex_ptr :: IO CSize let nsub_int,nsub_bytes :: Int nsub_int = fromIntegral nsub nsub_bytes = ((1 + nsub_int) * (#const sizeof(regmatch_t))) -- add one because index zero covers the whole match allocaBytes nsub_bytes $ \p_match -> do nullTest p_match "wrapMatchAll p_match" $ do let flagsBOL = (complement execNotBOL) .&. flags flagsMIDDLE = execNotBOL .|. flags atBOL pos = doMatch regex_ptr (plusPtr cstr pos) nsub p_match flagsBOL atMIDDLE pos = doMatch regex_ptr (plusPtr cstr pos) nsub p_match flagsMIDDLE loop acc old (s,e) | acc `seq` old `seq` False = undefined | s == e = do let pos = old + fromIntegral e -- pos may be 0 atEnd <- isNull cstr pos if atEnd then return (Right (acc [])) else loop acc old (s,succ e) | otherwise = do let pos = old + fromIntegral e -- pos must be greater than 0 (tricky but true) prev'newline <- isNewline cstr (pred pos) -- safe result <- if prev'newline then atBOL pos else atMIDDLE pos case result of Right Nothing -> return (Right (acc [])) Right (Just parts@(whole:_)) -> let ma = toMA pos parts in loop (acc.(ma:)) pos whole Left err -> return (Left err) Right (Just []) -> return (Right (acc [(toMA pos [])])) -- should never happen result <- doMatch regex_ptr cstr nsub p_match flags case result of Right Nothing -> return (Right []) Right (Just parts@(whole:_)) -> let ma = toMA 0 parts in loop (ma:) 0 whole Left err -> return (Left err) Right (Just []) -> return (Right [(toMA 0 [])]) -- should never happen where toMA :: Int -> [(RegOffset,RegOffset)] -> Array Int (Int,Int) toMA pos [] = listArray (0,0) [(pos,0)] -- wtf? toMA pos parts = listArray (0,pred (length parts)) . map (\(s,e)-> if s>=0 then (pos+fromIntegral s, fromIntegral (e-s)) else (-1,0)) $ parts --------- wrapCount regex@(Regex regex_fptr compileOptions flags) cstr = do nullTest cstr "wrapCount cstr" $ do if (0 /= compNoSub .&. compileOptions) then do r <- wrapTest regex cstr case r of Right True -> return (Right 1) Right False -> return (Right 0) Left err -> return (Left err) else do withForeignPtr regex_fptr $ \regex_ptr -> do let nsub_bytes = (#size regmatch_t) allocaBytes nsub_bytes $ \p_match -> do nullTest p_match "wrapCount p_match" $ do let flagsBOL = (complement execNotBOL) .&. flags flagsMIDDLE = execNotBOL .|. flags atBOL pos = doMatch regex_ptr (plusPtr cstr pos) 0 p_match flagsBOL atMIDDLE pos = doMatch regex_ptr (plusPtr cstr pos) 0 p_match flagsMIDDLE loop acc old (s,e) | acc `seq` old `seq` False = undefined | s == e = do let pos = old + fromIntegral e -- 0 <= pos atEnd <- isNull cstr pos if atEnd then return (Right acc) else loop acc old (s,succ e) | otherwise = do let pos = old + fromIntegral e -- 0 < pos prev'newline <- isNewline cstr (pred pos) -- safe result <- if prev'newline then atBOL pos else atMIDDLE pos case result of Right Nothing -> return (Right acc) Right (Just (whole:_)) -> loop (succ acc) pos whole Left err -> return (Left err) Right (Just []) -> return (Right acc) -- should never happen result <- doMatch regex_ptr cstr 0 p_match flags case result of Right Nothing -> return (Right 0) Right (Just (whole:_)) -> loop 1 0 whole Left err -> return (Left err) Right (Just []) -> return (Right 0) -- should never happen {- -- This is the slower 0.66 version of the code (91s instead of 79s on 10^6 bytes) wrapMatchAll regex cstr = do let regex' = setExecOpts (execNotBOL .|. (getExecOpts regex)) regex at pos = wrapMatch regex' (plusPtr cstr pos) loop old (s,e) | s == e = return [] | otherwise = do let pos = old + fromIntegral e result <- at pos case unwrap result of Nothing -> return [] Just [] -> return ((toMA pos []):[]) -- wtf? Just parts@(whole:_) -> do rest <- loop pos whole return ((toMA pos parts) : rest) result <- wrapMatch regex cstr case unwrap result of Nothing -> return [] Just [] -> return ((toMA 0 []):[]) -- wtf? Just parts@(whole:_) -> do rest <- loop 0 whole return ((toMA 0 parts) : rest) --------- -- This was also changed to match wrapMatchAll after 0.66 wrapCount regex cstr = do let regex' = setExecOpts (execNotBOL .|. (getExecOpts regex)) regex at pos = wrapMatch regex' (plusPtr cstr pos) loop acc old (s,e) | acc `seq` old `seq` False = undefined | s == e = return acc | otherwise = do let pos = old + fromIntegral e result <- at pos case unwrap result of Nothing -> return acc Just [] -> return (succ acc) -- wtf? Just (whole:_) -> loop (succ acc) pos whole result <- wrapMatch regex cstr case unwrap result of Nothing -> return 0 Just [] -> return 1 -- wtf? Just (whole:_) -> loop 1 0 whole -}