regex-1.1.0.2/Text/0000755000000000000000000000000014254065176012107 5ustar0000000000000000regex-1.1.0.2/Text/RE/0000755000000000000000000000000014254065176012415 5ustar0000000000000000regex-1.1.0.2/Text/RE/TDFA/0000755000000000000000000000000014254065176013133 5ustar0000000000000000regex-1.1.0.2/Text/RE/TDFA/ByteString/0000755000000000000000000000000014254065176015225 5ustar0000000000000000regex-1.1.0.2/Text/RE/TDFA/Text/0000755000000000000000000000000014254065176014057 5ustar0000000000000000regex-1.1.0.2/Text/RE/TestBench/0000755000000000000000000000000014254065176014274 5ustar0000000000000000regex-1.1.0.2/Text/RE/Tools/0000755000000000000000000000000014254065176013515 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/0000755000000000000000000000000014254065176014653 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/0000755000000000000000000000000014254065176017354 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/0000755000000000000000000000000014254065176020072 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/ByteString/0000755000000000000000000000000014254065176022164 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/Text/0000755000000000000000000000000014254065176021016 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/TestBench/0000755000000000000000000000000014254065176016532 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/Tools/0000755000000000000000000000000014254065176015753 5ustar0000000000000000regex-1.1.0.2/Text/RE/ZeInternals/Types/0000755000000000000000000000000014254065176015757 5ustar0000000000000000regex-1.1.0.2/Text/RE.hs0000644000000000000000000000405114254065176012751 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-dodgy-exports #-} -- | -- Module : Text.RE -- Copyright : (C) 2016-17 Chris Dornan -- License : BSD3 (see the LICENSE file) -- Maintainer : Chris Dornan -- Stability : RFC -- Portability : portable module Text.RE ( -- * The Tutorial -- $tutorial -- * How to use this library -- $use -- * Further Use -- $further ) where -- $tutorial -- -- We have a regex tutorial at . -- $use -- -- This module just provides a brief overview of the regex package. You -- will need to import one of the API modules of which there is a choice -- which will depend upon two factors: -- -- * Which flavour of regular expression do you want to use? If you need -- Posix flavour REs then you will want the TDFA modules, otherwise its -- PCRE for Perl-style REs. -- -- * What type of text do you want to match: (slow) @String@s, @ByteString@, -- @ByteString.Lazy@, @Text@, @Text.Lazy@ or the anachronistic @Seq Char@ -- or indeed some good old-fashioned polymorphic operators? -- -- While we aim to provide all combinations of these choices, some of them -- are currently not available. In the regex package we have: -- -- * "Text.RE.TDFA.ByteString" -- * "Text.RE.TDFA.ByteString.Lazy" -- * "Text.RE.ZeInternals.TDFA" -- * "Text.RE.TDFA.Sequence" -- * "Text.RE.TDFA.String" -- * "Text.RE.TDFA.Text" -- * "Text.RE.TDFA.Text.Lazy" -- * "Text.RE.TDFA" -- -- The PCRE modules are contained in the separate @regex-with-pcre@ -- package: -- -- * Text.RE.PCRE.ByteString -- * Text.RE.PCRE.ByteString.Lazy -- * Text.RE.ZeInternals.PCRE -- * Text.RE.PCRE.Sequence -- * Text.RE.PCRE.String -- * Text.RE.PCRE -- $further -- For more specialist applications we have the following: -- -- * "Text.RE.REOptions" for specifying back-end specific options; -- * "Text.RE.Replace" for the full text-replacement toolkit; -- * "Text.RE.TestBench" for building up, testing and documenting; -- macro environments for use in REs; -- * "Text.RE.Tools" for an AWK-like text-processing toolkit. regex-1.1.0.2/Text/RE/REOptions.lhs0000644000000000000000000000650414254065176015014 0ustar0000000000000000\begin{code} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} {-# LANGUAGE DeriveLift #-} {-# LANGUAGE StandaloneDeriving #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.REOptions ( -- * The Options Tutorial -- $tutorial -- * 'SimpleREOptions' SimpleREOptions(..) -- * 'REOptions_' , REOptions_(..) -- * The Macro Tables , Macros , MacroID(..) , emptyMacros ) where import qualified Data.HashMap.Strict as HM import Data.Hashable import Data.String import Language.Haskell.TH.Syntax \end{code} The RE Options -------------- \begin{code} -- | the default API uses these simple, universal RE options, -- which get auto-converted into the appropriate back-end 'REOptions_' data SimpleREOptions = MultilineSensitive -- ^ case-sensitive with ^ and $ matching the start and end of a line | MultilineInsensitive -- ^ case-insensitive with ^ and $ matsh the start and end of a line | BlockSensitive -- ^ case-sensitive with ^ and $ matching the start and end of the input text | BlockInsensitive -- ^ case-insensitive with ^ and $ matching the start and end of the input text deriving (Bounded,Enum,Eq,Ord,Show) \end{code} \begin{code} -- | we need to use this in the quasi quoters to specify @SimpleREOptions@ -- selected by the quasi quoter deriving instance Lift SimpleREOptions \end{code} \begin{code} -- | the general options for an RE are dependent on which back end is -- being used and are parameterised over the @RE@ type for the back end, -- and its @CompOption@ and @ExecOption@ types (the compile-time and -- execution time options, respectively); each back end will define an -- @REOptions@ type that fills out these three type parameters with the -- appropriate types (see, for example, "Text.RE.TDFA") data REOptions_ r c e = REOptions { optionsMacs :: !(Macros r) -- ^ the available TestBench RE macros , optionsComp :: !c -- ^ the back end compile-time options , optionsExec :: !e -- ^ the back end execution-time options } deriving (Show) \end{code} The Macro Tables ---------------- \begin{code} -- | our macro tables are parameterised over the back end @RE@ type and -- and just associate each @MacroID@ with an @RE@ (which may in turn -- contain macros to be expanded) type Macros r = HM.HashMap MacroID r \end{code} \begin{code} -- | @MacroID@ is just a wrapped @String@ type with an @IsString@ -- instance newtype MacroID = MacroID { getMacroID :: String } deriving (IsString,Ord,Eq,Show) \end{code} \begin{code} -- | @MacroID@ is used with @HM.HashMap@ to build macro lookup tables instance Hashable MacroID where hashWithSalt i = hashWithSalt i . getMacroID \end{code} \begin{code} -- | a macro table containing no entries emptyMacros :: Macros r emptyMacros = HM.empty \end{code} \begin{code} -- $tutorial -- This API module provides the generic types used to specify the options -- when compiling REs for each of the backl ends. -- -- See the tutorials at http://re-tutorial-options.regex.uk \end{code} regex-1.1.0.2/Text/RE/Replace.hs0000644000000000000000000000312314254065176014323 0ustar0000000000000000module Text.RE.Replace ( -- * The Replacing Tutorial -- $tutorial -- * replaceAll replaceAll , replaceAllCaptures , replaceAllCaptures_ , replaceAllCapturesM -- * replace , replace , replaceCaptures , replaceCaptures_ , replaceCapturesM -- * REContext and RELocation , REContext(..) , RELocation(..) , isTopLocation -- * Matches , Matches(..) , anyMatches , countMatches , matches , mainCaptures -- * Match , Match(..) , noMatch , emptyMatchArray , matched , matchedText , matchCapture , matchCaptures , (!$$) , captureText , (!$$?) , captureTextMaybe , (!$) , capture , (!$?) , captureMaybe , convertMatchText -- * Capture , Capture(..) , hasCaptured , capturePrefix , captureSuffix -- * CaptureID , CaptureID(..) , CaptureNames , noCaptureNames , CaptureName(..) , CaptureOrdinal(..) , findCaptureID -- * Replace and ReplaceMethods , Replace(..) , ReplaceMethods(..) , replaceMethods ) where import Text.RE.ZeInternals.Replace import Text.RE.ZeInternals.Types.Capture import Text.RE.ZeInternals.Types.CaptureID import Text.RE.ZeInternals.Types.Match import Text.RE.ZeInternals.Types.Matches -- $tutorial -- This API module covers the specialised regex tools for doing general -- editing on text, including the internal details of the 'Matches' and -- 'Match' types and the associated functions for extracting captures -- and applying functions to them to transform the subject text. -- -- See the tutorials at http://re-tutorial-replacing.regex.uk regex-1.1.0.2/Text/RE/Summa.hs0000644000000000000000000000115114254065176014031 0ustar0000000000000000module Text.RE.Summa ( -- $collection module Text.RE.REOptions , module Text.RE.Replace , module Text.RE.TestBench , module Text.RE.Tools ) where import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench import Text.RE.Tools -- $collection -- -- This module collects together all of the generic regex APIs not -- exported by the principal API modules, specialised for each back end -- and text type. The regex API is modular with only the most common types -- and functions being exported by these modules but the remaining modules -- may be imported en masse by importing this module. regex-1.1.0.2/Text/RE/TDFA.hs0000644000000000000000000001623514254065176013476 0ustar0000000000000000{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif {-# OPTIONS_GHC -fno-warn-dodgy-exports #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} module Text.RE.TDFA ( -- * Tutorial -- $tutorial -- * About this Module -- $about -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , regexType , reOptions , reSource , reCaptureNames , reRegex -- * Options -- $options , SimpleREOptions(..) , IsOption(..) , REOptions , defaultREOptions , noPreludeREOptions , unpackSimpleREOptions -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileRegexWithOptions , compileSearchReplace , compileSearchReplaceWith , compileSearchReplaceWithOptions , escape , escapeWith , escapeWithOptions , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * RE Macros Standard Environment -- $prelude , prelude , preludeEnv , preludeTestsFailing , preludeTable , preludeSummary , preludeSources , preludeSource -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex -- * The IsRegex Instances -- $instances , module Text.RE.TDFA.ByteString , module Text.RE.TDFA.ByteString.Lazy , module Text.RE.TDFA.Sequence , module Text.RE.TDFA.String , module Text.RE.TDFA.Text , module Text.RE.TDFA.Text.Lazy ) where import Control.Monad.Fail import Text.RE.REOptions import Text.RE.Replace import Text.RE.TDFA.ByteString() import Text.RE.TDFA.ByteString.Lazy() import Text.RE.TDFA.Sequence() import Text.RE.TDFA.String() import Text.RE.TDFA.Text() import Text.RE.TDFA.Text.Lazy() import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA import Text.RE.ZeInternals.TDFA import qualified Text.Regex.Base as B import qualified Text.Regex.TDFA as TDFA -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: IsRegex RE s => s -> RE -> Matches s (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ matchMany rex bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: IsRegex RE s => s -> RE -> Match s (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ matchOnce rex bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: IsRegex RE s => s -> SearchReplace RE s -> s (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (?=~/) :: IsRegex RE s => s -> SearchReplace RE s -> s (?=~/) = flip searchReplaceFirst -- | the regex-base polymorphic match operator (=~) :: ( B.RegexContext TDFA.Regex s a , B.RegexMaker TDFA.Regex TDFA.CompOption TDFA.ExecOption s ) => s -> RE -> a (=~) bs rex = B.match (reRegex rex) bs -- | the regex-base monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , B.RegexContext TDFA.Regex s a , B.RegexMaker TDFA.Regex TDFA.CompOption TDFA.ExecOption s ) => s -> RE -> m a (=~~) bs rex = B.matchM (reRegex rex) bs -- $tutorial -- We have a regex tutorial at . -- $about -- This module provides access to the back end through polymorphic functions -- that operate over all of the String\/Text\/ByteString types supported by the -- back end. The module also provides all of the specialised back-end functionality -- that will not be needed by most regex clients. If you don't need this generality -- then you might want to consider using one of the simpler modules that have been -- specialised for each of these types: -- -- * "Text.RE.TDFA.ByteString" -- * "Text.RE.TDFA.ByteString.Lazy" -- * "Text.RE.TDFA.Sequence" -- * "Text.RE.TDFA.String" -- * "Text.RE.TDFA.Text" -- * "Text.RE.TDFA.Text.Lazy" -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. -- $instances -- -- These module exports merely provide the 'IsRegex' instances. regex-1.1.0.2/Text/RE/TDFA/ByteString.hs0000644000000000000000000001501614254065176015564 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.TDFA.ByteString ( -- * Tutorial -- $tutorial -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , reSource -- * Options -- $options , SimpleREOptions(..) -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileSearchReplace , compileSearchReplaceWith , escape , escapeWith , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex ) where import Control.Monad.Fail import qualified Data.ByteString as B import Data.Typeable import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA.ByteString import Text.RE.ZeInternals.TDFA import Text.Regex.Base import qualified Text.Regex.TDFA as TDFA -- NB regex-base instance imports maybe be needed for for some API modules -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: B.ByteString -> RE -> Matches B.ByteString (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ match (reRegex rex) bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: B.ByteString -> RE -> Match B.ByteString (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ match (reRegex rex) bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: B.ByteString -> SearchReplace RE B.ByteString -> B.ByteString (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the input text, -- if any, with @0x@: -- -- @(?=~\/ [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|])@ -- (?=~/) :: B.ByteString -> SearchReplace RE B.ByteString -> B.ByteString (?=~/) = flip searchReplaceFirst -- | the `regex-base` polymorphic match operator (=~) :: ( Typeable a , RegexContext TDFA.Regex B.ByteString a ) => B.ByteString -> RE -> a (=~) bs rex = addCaptureNames (reCaptureNames rex) $ match (reRegex rex) bs -- | the `regex-base` monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , Functor m , Typeable a , RegexContext TDFA.Regex B.ByteString a ) => B.ByteString -> RE -> m a (=~~) bs rex = addCaptureNames (reCaptureNames rex) <$> matchM (reRegex rex) bs instance IsRegex RE B.ByteString where matchOnce = flip (?=~) matchMany = flip (*=~) makeRegexWith = \o -> compileRegexWith o . unpackR makeSearchReplaceWith = \o r t -> compileSearchReplaceWith o (unpackR r) (unpackR t) regexSource = packR . reSource -- $tutorial -- We have a regex tutorial at . -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The @[ed|.*\/\/\/foo|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"). -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. regex-1.1.0.2/Text/RE/TDFA/ByteString/Lazy.hs0000644000000000000000000001507014254065176016503 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.TDFA.ByteString.Lazy ( -- * Tutorial -- $tutorial -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , reSource -- * Options -- $options , SimpleREOptions(..) -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileSearchReplace , compileSearchReplaceWith , escape , escapeWith , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex ) where import Control.Monad.Fail import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Typeable import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA.ByteString.Lazy import Text.RE.ZeInternals.TDFA import Text.Regex.Base import qualified Text.Regex.TDFA as TDFA -- NB regex-base instance imports maybe be needed for for some API modules -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: LBS.ByteString -> RE -> Matches LBS.ByteString (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ match (reRegex rex) bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: LBS.ByteString -> RE -> Match LBS.ByteString (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ match (reRegex rex) bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: LBS.ByteString -> SearchReplace RE LBS.ByteString -> LBS.ByteString (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the input text, -- if any, with @0x@: -- -- @(?=~\/ [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|])@ -- (?=~/) :: LBS.ByteString -> SearchReplace RE LBS.ByteString -> LBS.ByteString (?=~/) = flip searchReplaceFirst -- | the `regex-base` polymorphic match operator (=~) :: ( Typeable a , RegexContext TDFA.Regex LBS.ByteString a ) => LBS.ByteString -> RE -> a (=~) bs rex = addCaptureNames (reCaptureNames rex) $ match (reRegex rex) bs -- | the `regex-base` monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , Functor m , Typeable a , RegexContext TDFA.Regex LBS.ByteString a ) => LBS.ByteString -> RE -> m a (=~~) bs rex = addCaptureNames (reCaptureNames rex) <$> matchM (reRegex rex) bs instance IsRegex RE LBS.ByteString where matchOnce = flip (?=~) matchMany = flip (*=~) makeRegexWith = \o -> compileRegexWith o . unpackR makeSearchReplaceWith = \o r t -> compileSearchReplaceWith o (unpackR r) (unpackR t) regexSource = packR . reSource -- $tutorial -- We have a regex tutorial at . -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The @[ed|.*\/\/\/foo|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"). -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. regex-1.1.0.2/Text/RE/TDFA/Sequence.hs0000644000000000000000000001501214254065176015236 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.TDFA.Sequence ( -- * Tutorial -- $tutorial -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , reSource -- * Options -- $options , SimpleREOptions(..) -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileSearchReplace , compileSearchReplaceWith , escape , escapeWith , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex ) where import Control.Monad.Fail import qualified Data.Sequence as S import Data.Typeable import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA.Sequence import Text.RE.ZeInternals.TDFA import Text.Regex.Base import qualified Text.Regex.TDFA as TDFA -- NB regex-base instance imports maybe be needed for for some API modules -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: (S.Seq Char) -> RE -> Matches (S.Seq Char) (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ match (reRegex rex) bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: (S.Seq Char) -> RE -> Match (S.Seq Char) (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ match (reRegex rex) bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: (S.Seq Char) -> SearchReplace RE (S.Seq Char) -> (S.Seq Char) (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the input text, -- if any, with @0x@: -- -- @(?=~\/ [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|])@ -- (?=~/) :: (S.Seq Char) -> SearchReplace RE (S.Seq Char) -> (S.Seq Char) (?=~/) = flip searchReplaceFirst -- | the `regex-base` polymorphic match operator (=~) :: ( Typeable a , RegexContext TDFA.Regex (S.Seq Char) a ) => (S.Seq Char) -> RE -> a (=~) bs rex = addCaptureNames (reCaptureNames rex) $ match (reRegex rex) bs -- | the `regex-base` monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , Functor m , Typeable a , RegexContext TDFA.Regex (S.Seq Char) a ) => (S.Seq Char) -> RE -> m a (=~~) bs rex = addCaptureNames (reCaptureNames rex) <$> matchM (reRegex rex) bs instance IsRegex RE (S.Seq Char) where matchOnce = flip (?=~) matchMany = flip (*=~) makeRegexWith = \o -> compileRegexWith o . unpackR makeSearchReplaceWith = \o r t -> compileSearchReplaceWith o (unpackR r) (unpackR t) regexSource = packR . reSource -- $tutorial -- We have a regex tutorial at . -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The @[ed|.*\/\/\/foo|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"). -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. regex-1.1.0.2/Text/RE/TDFA/String.hs0000644000000000000000000001457114254065176014745 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.TDFA.String ( -- * Tutorial -- $tutorial -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , reSource -- * Options -- $options , SimpleREOptions(..) -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileSearchReplace , compileSearchReplaceWith , escape , escapeWith , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex ) where import Control.Monad.Fail import Data.Typeable import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA.String import Text.RE.ZeInternals.TDFA import Text.Regex.Base import qualified Text.Regex.TDFA as TDFA -- NB regex-base instance imports maybe be needed for for some API modules -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: String -> RE -> Matches String (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ match (reRegex rex) bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: String -> RE -> Match String (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ match (reRegex rex) bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: String -> SearchReplace RE String -> String (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the input text, -- if any, with @0x@: -- -- @(?=~\/ [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|])@ -- (?=~/) :: String -> SearchReplace RE String -> String (?=~/) = flip searchReplaceFirst -- | the `regex-base` polymorphic match operator (=~) :: ( Typeable a , RegexContext TDFA.Regex String a ) => String -> RE -> a (=~) bs rex = addCaptureNames (reCaptureNames rex) $ match (reRegex rex) bs -- | the `regex-base` monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , Functor m , Typeable a , RegexContext TDFA.Regex String a ) => String -> RE -> m a (=~~) bs rex = addCaptureNames (reCaptureNames rex) <$> matchM (reRegex rex) bs instance IsRegex RE String where matchOnce = flip (?=~) matchMany = flip (*=~) makeRegexWith = \o -> compileRegexWith o . unpackR makeSearchReplaceWith = \o r t -> compileSearchReplaceWith o (unpackR r) (unpackR t) regexSource = packR . reSource -- $tutorial -- We have a regex tutorial at . -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The @[ed|.*\/\/\/foo|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"). -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. regex-1.1.0.2/Text/RE/TDFA/Text.hs0000644000000000000000000001465014254065176014421 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.TDFA.Text ( -- * Tutorial -- $tutorial -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , reSource -- * Options -- $options , SimpleREOptions(..) -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileSearchReplace , compileSearchReplaceWith , escape , escapeWith , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex ) where import Control.Monad.Fail import qualified Data.Text as T import Data.Typeable import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA.Text import Text.RE.ZeInternals.TDFA import Text.Regex.Base import qualified Text.Regex.TDFA as TDFA -- NB regex-base instance imports maybe be needed for for some API modules -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: T.Text -> RE -> Matches T.Text (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ match (reRegex rex) bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: T.Text -> RE -> Match T.Text (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ match (reRegex rex) bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: T.Text -> SearchReplace RE T.Text -> T.Text (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the input text, -- if any, with @0x@: -- -- @(?=~\/ [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|])@ -- (?=~/) :: T.Text -> SearchReplace RE T.Text -> T.Text (?=~/) = flip searchReplaceFirst -- | the `regex-base` polymorphic match operator (=~) :: ( Typeable a , RegexContext TDFA.Regex T.Text a ) => T.Text -> RE -> a (=~) bs rex = addCaptureNames (reCaptureNames rex) $ match (reRegex rex) bs -- | the `regex-base` monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , Functor m , Typeable a , RegexContext TDFA.Regex T.Text a ) => T.Text -> RE -> m a (=~~) bs rex = addCaptureNames (reCaptureNames rex) <$> matchM (reRegex rex) bs instance IsRegex RE T.Text where matchOnce = flip (?=~) matchMany = flip (*=~) makeRegexWith = \o -> compileRegexWith o . unpackR makeSearchReplaceWith = \o r t -> compileSearchReplaceWith o (unpackR r) (unpackR t) regexSource = packR . reSource -- $tutorial -- We have a regex tutorial at . -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The @[ed|.*\/\/\/foo|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"). -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. regex-1.1.0.2/Text/RE/TDFA/Text/Lazy.hs0000644000000000000000000001470214254065176015336 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-duplicate-exports #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.TDFA.Text.Lazy ( -- * Tutorial -- $tutorial -- * The 'Matches' and 'Match' Operators (*=~) , (?=~) -- * The 'SearchReplace' Operators , (*=~/) , (?=~/) -- * The 'Matches' Type , Matches , matchesSource , allMatches , anyMatches , countMatches , matches -- * The 'Match' Type , Match , matchSource , matched , matchedText -- * The Macros and Parsers -- $macros , module Text.RE.TestBench.Parsers -- * The 'RE' Type , RE , reSource -- * Options -- $options , SimpleREOptions(..) -- * Compiling and Escaping REs , SearchReplace(..) , compileRegex , compileRegexWith , compileSearchReplace , compileSearchReplaceWith , escape , escapeWith , escapeREString -- * The Classic regex-base Match Operators , (=~) , (=~~) -- * The re Quasi Quoters -- $re , re , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , reMS , reMI , reBS , reBI , re_ -- * The Ed Quasi Quoters -- $ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed , edMS , edMI , edBS , edBI , ed_ -- * The cp Quasi Quoters , cp -- * IsRegex -- $isregex , module Text.RE.Tools.IsRegex ) where import Control.Monad.Fail import qualified Data.Text.Lazy as TL import Data.Typeable import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench.Parsers import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.SearchReplace.TDFA.Text.Lazy import Text.RE.ZeInternals.TDFA import Text.Regex.Base import qualified Text.Regex.TDFA as TDFA -- NB regex-base instance imports maybe be needed for for some API modules -- | find all the matches in the argument text; e.g., to count the number -- of naturals in s: -- -- @countMatches $ s *=~ [re|[0-9]+|]@ -- (*=~) :: TL.Text -> RE -> Matches TL.Text (*=~) bs rex = addCaptureNamesToMatches (reCaptureNames rex) $ match (reRegex rex) bs -- | find the first match in the argument text; e.g., to test if there -- is a natural number in the input text: -- -- @matched $ s ?=~ [re|[0-9]+|]@ -- (?=~) :: TL.Text -> RE -> Match TL.Text (?=~) bs rex = addCaptureNamesToMatch (reCaptureNames rex) $ match (reRegex rex) bs -- | search and replace all matches in the argument text; e.g., this section -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @(*=~\/ [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- (*=~/) :: TL.Text -> SearchReplace RE TL.Text -> TL.Text (*=~/) = flip searchReplaceAll -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the input text, -- if any, with @0x@: -- -- @(?=~\/ [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|])@ -- (?=~/) :: TL.Text -> SearchReplace RE TL.Text -> TL.Text (?=~/) = flip searchReplaceFirst -- | the `regex-base` polymorphic match operator (=~) :: ( Typeable a , RegexContext TDFA.Regex TL.Text a ) => TL.Text -> RE -> a (=~) bs rex = addCaptureNames (reCaptureNames rex) $ match (reRegex rex) bs -- | the `regex-base` monadic, polymorphic match operator (=~~) :: ( Monad m, MonadFail m , Functor m , Typeable a , RegexContext TDFA.Regex TL.Text a ) => TL.Text -> RE -> m a (=~~) bs rex = addCaptureNames (reCaptureNames rex) <$> matchM (reRegex rex) bs instance IsRegex RE TL.Text where matchOnce = flip (?=~) matchMany = flip (*=~) makeRegexWith = \o -> compileRegexWith o . unpackR makeSearchReplaceWith = \o r t -> compileSearchReplaceWith o (unpackR r) (unpackR t) regexSource = packR . reSource -- $tutorial -- We have a regex tutorial at . -- $macros -- There are a number of RE macros and corresponding Haskell parsers -- for parsing the matched text into appropriate Haskell types. See -- the [Macros Tables](http://regex.uk/macros) for details. -- $options -- You can specify different compilation options by appending a -- to the name of an [re| ... |] or [ed| ... \/\/\/ ... |] quasi quoter -- to select the corresponding compilation option. For example, the -- section, -- -- @(?=~/ [edBlockInsensitive|foo$\/\/\/bar|])@ -- -- will replace a @foo@ suffix of the argument text, of any -- capitalisation, with a (lower case) @bar@. If you need to specify the -- options dynamically, use the @[re_| ... |]@ and @[ed_| ... \/\/\/ ... |]@ -- quasi quoters, which generate functions that take an 'IsOption' option -- (e.g., a 'SimpleReOptions' value) and yields a 'RE' or 'SearchReplace' -- as appropriate. For example if you have a 'SimpleReOptions' value in -- @sro@ then -- -- @(?=~/ [ed_|foo$\/\/\/bar|] sro)@ -- -- will compile the @foo$@ RE according to the value of @sro@. For more -- on specifying RE options see "Text.RE.REOptions". -- $re -- The @[re|.*|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"), and the -- specialised back-end types and functions. -- $ed -- The @[ed|.*\/\/\/foo|]@ quasi quoters, with variants for specifying different -- options to the RE compiler (see "Text.RE.REOptions"). -- $ed -- The -- | the @[ed| ... \/\/\/ ... |]@ quasi quoters; for example, -- -- @[ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|])@ -- -- represents a @SearchReplace@ that will convert a YYYY-MM-DD format date -- into a DD\/MM\/YYYY format date. -- -- The only difference between these quasi quoters is the RE options that are set, -- using the same conventions as the @[re| ... |]@ quasi quoters. -- $isregex -- The 'IsRegex' class is used to abstract over the different regex back ends and -- the text types they work with -- see "Text.RE.Tools.IsRegex" for details. regex-1.1.0.2/Text/RE/TestBench.hs0000644000000000000000000000175514254065176014640 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} {-# LANGUAGE OverloadedStrings #-} module Text.RE.TestBench ( -- * The Test Bench Tutorial -- $tutorial -- * The Test Bench MacroEnv , MacroDescriptor(..) , RegexSource(..) , WithCaptures(..) , RegexType , isTDFA , isPCRE , presentRegexType -- ** Constructing a MacrosEnv , mkMacros -- ** Formatting Macros , formatMacroTable , formatMacroSummary , formatMacroSources , formatMacroSource , mdRegexSource -- ** Formatting Macros , testMacroEnv , runTests , runTests' -- * The Parsers , module Text.RE.TestBench.Parsers -- * The Match Type , Match ) where import Text.RE.TestBench.Parsers import Text.RE.ZeInternals.TestBench import Text.RE.ZeInternals.Types.Match -- $tutorial -- This API module provides a test bench for developing, documenting and -- testing regex RE macros. -- -- See the tutorials at http://re-tutorial-testbench.regex.uk regex-1.1.0.2/Text/RE/TestBench/Parsers.hs0000644000000000000000000000070114254065176016245 0ustar0000000000000000module Text.RE.TestBench.Parsers ( parseInteger , parseHex , parseDouble , parseString , parseSimpleString , parseDate , parseSlashesDate , parseTimeOfDay , parseTimeZone , parseDateTime , parseDateTime8601 , parseDateTimeCLF , parseShortMonth , shortMonthArray , IPV4Address , parseIPv4Address , Severity(..) , parseSeverity , severityKeywords ) where import Text.RE.ZeInternals.TestBench.Parsers regex-1.1.0.2/Text/RE/Tools.hs0000644000000000000000000000212014254065176014044 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-duplicate-exports #-} module Text.RE.Tools ( -- * The Tools Tutorial -- $tutorial -- * Sed sed , sed' -- * Grep , grep , Verbosity(..) , Line(..) , grepLines , grepFilter , GrepScript , grepWithScript , report , linesMatched -- * Lex , alex , alex' -- * Find , FindMethods(..) , findMatches_ , findMatches_' -- * IsRegex , IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst -- * Edit , Edits(..) , Edit(..) , LineEdit(..) , applyEdits , applyEdit , applyLineEdit -- * LineNo , LineNo(..) , firstLine , getLineNo , lineNo -- * Replace , module Text.RE.Replace ) where import Text.RE.Replace import Text.RE.Tools.Edit import Text.RE.Tools.Find import Text.RE.Tools.Grep import Text.RE.Tools.Lex import Text.RE.Tools.Sed -- $tutorial -- This API module provides some familiar RE tools on top of the core -- package functions and types. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk regex-1.1.0.2/Text/RE/Tools/Edit.lhs0000644000000000000000000001133114254065176015111 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif module Text.RE.Tools.Edit ( -- * Editing -- $tutorial Edits(..) , Edit(..) , LineEdit(..) , applyEdits , applyEdit , applyLineEdit -- * IsRegex , IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst -- * LineNo , LineNo(..) , firstLine , getLineNo , lineNo -- * Replace , module Text.RE.Replace ) where import Data.Maybe import Prelude.Compat import Text.RE.Replace import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.Types.LineNo \end{code} \begin{code} -- | an 'Edits' script will, for each line in the file, either perform -- the action selected by the first RE in the list, or perform all of the -- actions on line, arranged as a pipeline data Edits m re s = Select ![Edit m re s] -- ^ for each line select the first @Edit@ to match each line and edit the line with it | Pipe ![Edit m re s] -- ^ for each line apply every edit that matches in turn to the line -- | each Edit action specifies how the match should be processed data Edit m re s = Template !(SearchReplace re s) -- ^ replace the match with this template text, substituting ${capture} as appropriate | Function !re REContext !(LineNo->Match s->RELocation->Capture s->m (Maybe s)) -- ^ use this function to replace the 'REContext' specified captures in each line matched | LineEdit !re !(LineNo->Matches s->m (LineEdit s)) -- ^ use this function to edit each line matched -- | a LineEdit is the most general action that can be performed on a line -- and is the only means of deleting a line data LineEdit s = NoEdit -- ^ do not edit this line but leave as is | ReplaceWith !s -- ^ replace the line with this text (terminating newline should not be included) | Delete -- ^ delete the this line altogether deriving (Functor,Show) \end{code} \begin{code} -- | apply an 'Edit' script to a single line applyEdits :: (IsRegex re s,Monad m,Functor m) => LineNo -> Edits m re s -> s -> m s applyEdits lno ez0 s0 = case ez0 of Select ez -> select_edit_scripts lno ez s0 Pipe ez -> pipe_edit_scripts lno ez s0 -- | apply a single edit action to a line, the function in the first argument -- being used to add a new line onto the end of the line where appropriate; -- the function returns @Nothing@ if no edit is to be performed on the line, -- @Just mempty@ to delete the line applyEdit :: (IsRegex re s,Monad m,Functor m) => (s->s) -> LineNo -> Edit m re s -> s -> m (Maybe s) applyEdit anl lno edit s = case allMatches acs of [] -> return Nothing _ -> fmap Just $ case edit of Template srch_rpl -> return $ anl $ replaceAll (getTemplate srch_rpl) acs Function _ ctx f -> anl <$> replaceAllCapturesM replaceMethods ctx (f lno) acs LineEdit _ g -> fromMaybe (anl s) . applyLineEdit anl <$> g lno acs where acs = matchMany rex s rex = case edit of Template srch_rpl -> getSearch srch_rpl Function rex_ _ _ -> rex_ LineEdit rex_ _ -> rex_ -- | apply a 'LineEdit' to a line, using the function in the first -- argument to append a new line to the result; Nothing should be -- returned if no edit is to be performed, @Just mempty@ to -- delete the line applyLineEdit :: Monoid s => (s->s) -> LineEdit s -> Maybe s applyLineEdit _ NoEdit = Nothing applyLineEdit anl (ReplaceWith s) = Just $ anl s applyLineEdit _ Delete = Just mempty select_edit_scripts :: (IsRegex re s,Monad m,Functor m) => LineNo -> [Edit m re s] -> s -> m s select_edit_scripts lno ps0 s = select ps0 where select [] = return $ appendNewlineR s select (edit:edits) = applyEdit appendNewlineR lno edit s >>= maybe (select edits) return pipe_edit_scripts :: (IsRegex re s,Monad m,Functor m) => LineNo -> [Edit m re s] -> s -> m s pipe_edit_scripts lno edits s0 = appendNewlineR <$> foldr f (return s0) edits where f edit act = do s <- act fromMaybe s <$> applyEdit id lno edit s \end{code} \begin{code} -- $tutorial -- The Edit toolkit looks for REs that match a text and runs the -- associated actions. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk \end{code} regex-1.1.0.2/Text/RE/Tools/Find.lhs0000644000000000000000000000570014254065176015107 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif module Text.RE.Tools.Find ( -- * Find -- $tutorial FindMethods(..) , findMatches_ , findMatches_' -- * IsRegex , IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst -- * Replace , module Text.RE.Replace ) where import qualified Data.List as L import Prelude.Compat import Text.RE.Replace import Text.RE.Tools.IsRegex \end{code} \begin{code} -- | as we don't want the @directory@ and FilePath dependencies -- we will abstract the three calls we need into this record type data FindMethods s = FindMethods { doesDirectoryExistDM :: s -> IO Bool -- ^ doesDirectoryExist from -- System.Directory , listDirectoryDM :: s -> IO [s] -- ^ either getDirectoryContents -- or listDirectory from -- System.Directory , combineDM :: s -> s -> s -- ^ from System.FilePath } \end{code} \begin{code} -- | recursively list all files whose filename matches given RE, -- sorting the list into ascending order; if the argument path has a -- trailing '/' then it will be removed findMatches_ :: IsRegex re s => FindMethods s -> re -> s -> IO [s] findMatches_ fm = findMatches_' fm L.sort matched -- | recursively list all files whose filename matches given RE, -- using the given function to determine which matches to accept findMatches_' :: IsRegex re s => FindMethods s -- ^ the directory and filepath methods -> ([s]->[s]) -- ^ result post-processing function -> (Match s->Bool) -- ^ filtering function -> re -- ^ re to be matched against the leaf filename -> s -- ^ root directory of the search -> IO [s] findMatches_' fm srt tst re fp = srt <$> find_ fm tst re (packR "") fp find_ :: IsRegex re s => FindMethods s -> (Match s->Bool) -> re -> s -> s -> IO [s] find_ fm@FindMethods{..} tst re fn fp = do is_dir <- doesDirectoryExistDM fp case is_dir of True -> do fns <- filter ordinary <$> listDirectoryDM fp concat <$> mapM (uncurry $ find_ fm tst re) [ (fn_,abs_path fn_) | fn_<-fns ] False -> return [ fp | lengthR fp /= 0 && tst (matchOnce re fn) ] where abs_path fn_ = fp `combineDM` fn_ ordinary fn_ = not $ fn_ `elem` [packR ".",packR ".."] \end{code} \begin{code} -- $tutorial -- The Find toolkit traverses directory trees invoking actions for each -- file that matches a RE. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk \end{code} regex-1.1.0.2/Text/RE/Tools/Grep.lhs0000644000000000000000000000674714254065176015140 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE CPP #-} module Text.RE.Tools.Grep ( -- Grep -- $tutorial grep , Verbosity(..) , Line(..) , grepLines , grepFilter , GrepScript , grepWithScript , report , linesMatched -- * IsRegex , IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst -- * LineNo , LineNo(..) , firstLine , getLineNo , lineNo -- * Replace , module Text.RE.Replace ) where import qualified Data.ByteString.Lazy.Char8 as LBS import Prelude.Compat import Text.Printf import Text.RE.Replace import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.Types.LineNo \end{code} \begin{code} -- | operates a bit like classic @grep@ printing out the lines matched grep :: IsRegex re LBS.ByteString => Verbosity -> re -> FilePath -> IO () grep v rex fp = grepLines rex fp >>= putStr . report v \end{code} \begin{code} -- | specifies whether to return the lines matched or missed data Verbosity = LinesMatched | LinesNotMatched deriving (Show,Eq,Ord) \end{code} \begin{code} -- | 'grepLines' returns a 'Line' for each line in the file, listing all -- of the 'Matches' for that line data Line s = Line { getLineNumber :: LineNo -- ^ the 'LineNo' for this line , getLineMatches :: Matches s -- ^ all the 'Matches' of the RE on this line } deriving (Show) \end{code} \begin{code} -- | returns a 'Line' for each line in the file, enumerating all of the -- matches for that line grepLines :: IsRegex re LBS.ByteString => re -> FilePath -> IO [Line LBS.ByteString] grepLines rex fp = grepFilter rex <$> LBS.readFile fp \end{code} \begin{code} -- | returns a 'Line' for each line in the argument text, enumerating -- all of the matches for that line grepFilter :: IsRegex re s => re -> s -> [Line s] grepFilter rex = grepWithScript [(rex,mk)] . linesR where mk i mtchs = Just $ Line i mtchs \end{code} \begin{code} -- | a GrepScript lists RE-action associations, with the first RE to match -- a line selecting the action to be executed on each line in the file type GrepScript re s t = [(re,LineNo -> Matches s -> Maybe t)] -- | given a list of lines, apply the 'GrepScript' to each line of the file grepWithScript :: IsRegex re s => GrepScript re s t -> [s] -> [t] grepWithScript scr = loop firstLine where loop _ [] = [] loop i (ln:lns) = seq i $ choose i ln lns scr choose i _ lns [] = loop (succ i) lns choose i ln lns ((rex,f):scr') = case f i $ matchMany rex ln of Nothing -> choose i ln lns scr' Just t -> t : loop (succ i) lns -- | generate a grep report from a list of 'Line' report :: Verbosity -> [Line LBS.ByteString] -> String report v = unlines . map fmt . linesMatched v where fmt Line{..} = printf "%05d %s" (getLineNo getLineNumber) $ LBS.unpack $ matchesSource getLineMatches -- | given a 'velocity' flag filter out either the lines matched or not -- matched linesMatched :: Verbosity -> [Line s] -> [Line s] linesMatched v = filter $ f . anyMatches . getLineMatches where f = case v of LinesMatched -> id LinesNotMatched -> not \end{code} \begin{code} -- $tutorial -- The Grep toolkit matches REs against each line of a text. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk \end{code} regex-1.1.0.2/Text/RE/Tools/IsRegex.hs0000644000000000000000000000064514254065176015424 0ustar0000000000000000module Text.RE.Tools.IsRegex ( -- * IsRegex -- $tutorial IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst ) where import Text.RE.ZeInternals.Types.IsRegex -- $tutorial -- The @IsRegex@ class abstracts over each regex back end and the -- text types they support allowing general regex tools to constructed. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk regex-1.1.0.2/Text/RE/Tools/Lex.hs0000644000000000000000000000100714254065176014577 0ustar0000000000000000module Text.RE.Tools.Lex ( -- * Find -- $tutorial alex , alex' -- * IsRegex , IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst -- * Replace , module Text.RE.Replace ) where import Text.RE.Replace import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.Tools.Lex -- $tutorial -- The Lex toolkit uses REs to identify tokens in a file, returning a -- list of tokens. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk regex-1.1.0.2/Text/RE/Tools/Sed.lhs0000644000000000000000000000412614254065176014743 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif module Text.RE.Tools.Sed ( -- * Sed -- $tutorial sed , sed' -- * Edit , Edits(..) , Edit(..) , LineEdit(..) , applyEdits , applyEdit , applyLineEdit -- * IsRegex , IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst -- * LineNo , LineNo(..) , firstLine , getLineNo , lineNo -- * Replace , module Text.RE.Replace ) where import qualified Data.ByteString.Lazy.Char8 as LBS import Prelude.Compat import Text.RE.Replace import Text.RE.Tools.Edit \end{code} \begin{code} -- | read a file, apply an 'Edits' script to each line it and -- write the file out again; "-" is used to indicate standard input -- standard output as appropriate sed :: IsRegex re LBS.ByteString => Edits IO re LBS.ByteString -> FilePath -> FilePath -> IO () sed escr i_fp o_fp = do lns <- LBS.lines <$> read_file i_fp lns' <- sequence [ applyEdits lno escr s | (lno,s)<-zip [firstLine..] lns ] write_file o_fp $ LBS.concat lns' \end{code} \begin{code} -- | apply an 'Edits' script to each line of the argument text sed' :: (IsRegex re a,Monad m,Functor m) => Edits m re a -> a -> m a sed' escr t = do mconcat <$> sequence [ applyEdits lno escr s | (lno,s)<-zip [firstLine..] $ linesR t ] \end{code} \begin{code} read_file :: FilePath -> IO LBS.ByteString read_file "-" = LBS.getContents read_file fp = LBS.readFile fp write_file :: FilePath -> LBS.ByteString ->IO () write_file "-" = LBS.putStr write_file fp = LBS.writeFile fp \end{code} \begin{code} -- $tutorial -- The Sed toolkit applies @Edits@ scripts to each line -- of a text, running the actions and adjusting each line -- accordingly. -- -- See the Regex Tools tutorial at http://re-tutorial-tools.regex.uk \end{code} regex-1.1.0.2/Text/RE/ZeInternals.hs0000644000000000000000000000310014254065176015201 0ustar0000000000000000module Text.RE.ZeInternals ( -- * The regex Internal Modules -- $internals -- * Text.RE.ZeInternals.AddCaptureNames addCaptureNames , addCaptureNamesToMatches , addCaptureNamesToMatch -- * Text.RE.ZeInternals.EscapeREString , escapeREString -- * Text.RE.ZeInternals.NamedCaptures , cp , extractNamedCaptures , idFormatTokenREOptions , Token , validToken , formatTokens , formatTokens' , formatTokens0 , scan -- * Text.RE.ZeInternals.Replace , expandMacros -- * Text.RE.ZeInternals.PreludeMacros , PreludeMacro(..) , presentPreludeMacro , preludeMacros , preludeMacroTable , preludeMacroSummary , preludeMacroSources , preludeMacroSource , preludeMacroEnv -- * Text.RE.ZeInternals.SearchReplace , unsafeCompileSearchReplace_ , compileSearchReplace_ , compileSearchAndReplace_ -- * Text.RE.ZeInternals.QQ , QQFailure(..) , qq0 -- * Text.RE.ZeInternals.TestBench , mkTDFA , mkPCRE , badMacros ) where import Text.RE.ZeInternals.AddCaptureNames import Text.RE.ZeInternals.EscapeREString import Text.RE.ZeInternals.NamedCaptures import Text.RE.ZeInternals.PreludeMacros import Text.RE.ZeInternals.QQ import Text.RE.ZeInternals.Replace import Text.RE.ZeInternals.SearchReplace import Text.RE.ZeInternals.TestBench -- $internals -- This module contains just what the test suite (re-tests) in regex-examples -- needs from the package internals to do its job and the ZeInternals -- types and functions needed by the regex-with-pcre package regex-1.1.0.2/Text/RE/ZeInternals/Types/Poss.hs0000644000000000000000000000143614254065176017243 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-unused-imports #-} module Text.RE.ZeInternals.Types.Poss where import Control.Monad.Fail data Poss a = Eek String | Yup a deriving (Eq,Ord,Show) instance Functor Poss where fmap f p = case p of Eek m -> Eek m Yup x -> Yup $ f x instance Applicative Poss where pure = Yup (<*>) p1 p2 = case p1 of Eek m -> Eek m Yup f -> case p2 of Eek n -> Eek n Yup x -> Yup $ f x instance Monad Poss where return = pure (>>=) p f = case p of Eek m -> Eek m Yup x -> f x instance MonadFail Poss where fail = Eek poss :: (String->b) -> (a->b) -> Poss a -> b poss f _ (Eek s) = f s poss _ g (Yup x) = g x poss2either :: Poss a -> Either String a poss2either (Eek m) = Left m poss2either (Yup x) = Right x regex-1.1.0.2/Text/RE/ZeInternals/AddCaptureNames.hs0000644000000000000000000000567314254065176020222 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} module Text.RE.ZeInternals.AddCaptureNames where import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Dynamic import Data.Maybe import qualified Data.Sequence as S import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Prelude.Compat import Text.RE.ZeInternals.Types.CaptureID import Text.RE.ZeInternals.Types.Match import Text.RE.ZeInternals.Types.Matches import Unsafe.Coerce -- | a convenience function used by the API modules to insert -- capture names extracted from the parsed RE into the (*=~) result addCaptureNamesToMatches :: CaptureNames -> Matches a -> Matches a addCaptureNamesToMatches cnms mtchs = mtchs { allMatches = map (addCaptureNamesToMatch cnms) $ allMatches mtchs } -- | a convenience function used by the API modules to insert -- capture names extracted from the parsed RE into the (?=~) result addCaptureNamesToMatch :: CaptureNames -> Match a -> Match a addCaptureNamesToMatch cnms mtch = mtch { captureNames = cnms } -- | a hairy dynamically-typed function used with the legacy (=~) and (=~~) -- to see if it can/should add the capture names extracted from the RE -- into the polymorphic result of the operator (it does for any Match -- or Matches type, provided it is parameterised over a recognised type). -- The test suite is all over this one, testing all of these cases. addCaptureNames :: Typeable a => CaptureNames -> a -> a addCaptureNames cnms x = fromMaybe x $ listToMaybe $ catMaybes [ test_match x ( proxy :: String ) , test_matches x ( proxy :: String ) , test_match x ( proxy :: B.ByteString ) , test_matches x ( proxy :: B.ByteString ) , test_match x ( proxy :: LBS.ByteString ) , test_matches x ( proxy :: LBS.ByteString ) , test_match x ( proxy :: T.Text ) , test_matches x ( proxy :: T.Text ) , test_match x ( proxy :: TL.Text ) , test_matches x ( proxy :: TL.Text ) , test_match x ( proxy :: S.Seq Char ) , test_matches x ( proxy :: S.Seq Char ) ] where test_match :: Typeable t => r -> t -> Maybe r test_match r t = f r t $ addCaptureNamesToMatch cnms <$> fromDynamic dyn where f :: r' -> t' -> Maybe (Match t') -> Maybe r' f _ _ = unsafeCoerce test_matches :: Typeable t => r -> t -> Maybe r test_matches r t = f r t $ addCaptureNamesToMatches cnms <$> fromDynamic dyn where f :: r' -> t' -> Maybe (Matches t') -> Maybe r' f _ _ = unsafeCoerce dyn :: Dynamic dyn = toDyn x proxy :: a proxy = error "addCaptureNames" regex-1.1.0.2/Text/RE/ZeInternals/EscapeREString.hs0000644000000000000000000000120214254065176020020 0ustar0000000000000000module Text.RE.ZeInternals.EscapeREString where -- | Convert a string into a regular expression that will match that -- string escapeREString :: String -> String escapeREString = foldr esc [] where esc c t | isMetaChar c = '\\' : c : t | otherwise = c : t -- | returns True iff the character is an RE meta character -- ('[', '*', '{', etc.) isMetaChar :: Char -> Bool isMetaChar c = case c of '^' -> True '\\' -> True '.' -> True '|' -> True '*' -> True '?' -> True '+' -> True '(' -> True ')' -> True '[' -> True ']' -> True '{' -> True '}' -> True '$' -> True _ -> False regex-1.1.0.2/Text/RE/ZeInternals/NamedCaptures.lhs0000644000000000000000000001456614254065176020132 0ustar0000000000000000\begin{code} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.NamedCaptures ( cp , extractNamedCaptures , idFormatTokenREOptions , Token(..) , validToken , formatTokens , formatTokens' , formatTokens0 , scan ) where import Data.Char import qualified Data.HashMap.Strict as HM import qualified Data.Text as T import GHC.Generics import qualified Language.Haskell.TH as TH import Language.Haskell.TH.Quote import Text.RE.ZeInternals.PreludeMacros import Text.RE.ZeInternals.QQ import Text.RE.ZeInternals.TestBench import Text.RE.ZeInternals.Tools.Lex import Text.RE.ZeInternals.Types.CaptureID import Text.RE.ZeInternals.Types.Match import Text.RE.ZeInternals.Types.Poss import Text.Regex.TDFA -- | quasi quoter for CaptureID: @[cp|0|]@, @[cp|0|]@, etc., -- indexing captures by classic positional numbers, and @[cp|foo|]@, -- etc., referencing a named capture @[re| ... ${foo}( ... ) ... |]@. cp :: QuasiQuoter cp = (qq0 "cp") { quoteExp = parse_capture } -- | extract the CaptureNames from an RE or return an error diagnostic -- if the RE is not well formed; also returns the total number of captures -- in the RE extractNamedCaptures :: String -> Either String ((Int,CaptureNames),String) extractNamedCaptures s = Right (analyseTokens tks,formatTokens tks) where tks = scan s \end{code} Token ----- \begin{code} -- | our RE scanner returns a list of these tokens data Token = ECap (Maybe String) | PGrp | PCap | Bra | BS Char | Other Char deriving (Show,Generic,Eq) -- | check that a token is well formed validToken :: Token -> Bool validToken tkn = case tkn of ECap mb -> maybe True check_ecap mb PGrp -> True PCap -> True Bra -> True BS c -> is_dot c Other c -> is_dot c where check_ecap s = not (null s) && all not_br s is_dot c = c/='\n' not_br c = not $ c `elem` "{}\n" \end{code} Analysing [Token] -> CaptureNames --------------------------------- \begin{code} -- | analyse a token stream, returning the number of captures and the -- 'CaptureNames' analyseTokens :: [Token] -> (Int,CaptureNames) analyseTokens tks0 = case count_em 1 tks0 of (n,as) -> (n-1, HM.fromList as) where count_em n [] = (n,[]) count_em n (tk:tks) = case count_em (n `seq` n+d) tks of (n',as) -> (n',bd++as) where (d,bd) = case tk of ECap (Just nm) -> (,) 1 [(CaptureName $ T.pack nm,CaptureOrdinal n)] ECap Nothing -> (,) 1 [] PGrp -> (,) 0 [] PCap -> (,) 1 [] Bra -> (,) 1 [] BS _ -> (,) 0 [] Other _ -> (,) 0 [] \end{code} Scanning Regex Strings ---------------------- \begin{code} -- | scan a RE string into a list of RE Token scan :: String -> [Token] scan = alex' match al $ oops "top" where al :: [(Regex,Match String->Maybe Token)] al = [ mk "\\$\\{([^{}]+)\\}\\(" $ ECap . Just . x_1 , mk "\\$\\(" $ const $ ECap Nothing , mk "\\(\\?:" $ const PGrp , mk "\\(\\?" $ const PCap , mk "\\(" $ const Bra , mk "\\\\(.)" $ BS . s2c . x_1 , mk "(.|\n)" $ Other . s2c . x_1 ] x_1 = captureText $ IsCaptureOrdinal $ CaptureOrdinal 1 s2c [c] = c s2c _ = oops "s2c" mk s f = (poss error id $ makeRegexM s,Just . f) oops m = error $ "NamedCaptures.scan: " ++ m \end{code} Parsing captures ---------------- \begin{code} parse_capture :: String -> TH.Q TH.Exp parse_capture s = case all isDigit s of True -> [|IsCaptureOrdinal $ CaptureOrdinal $ read s|] False -> [|IsCaptureName $ CaptureName $ T.pack s|] \end{code} Formatting [Token] ------------------ \begin{code} -- | format [Token] into an RE string formatTokens :: [Token] -> String formatTokens = formatTokens' defFormatTokenREOptions -- | options for the general Token formatter below data FormatTokenREOptions = FormatTokenREOptions { _fto_regex_type :: Maybe RegexType -- ^ Posix, PCRE or indeterminate REs? , _fto_min_caps :: Bool -- ^ remove captures where possible , _fto_incl_caps :: Bool -- ^ include the captures in the output } deriving (Show) -- | the default configuration for the Token formatter defFormatTokenREOptions :: FormatTokenREOptions defFormatTokenREOptions = FormatTokenREOptions { _fto_regex_type = Nothing , _fto_min_caps = False , _fto_incl_caps = False } -- | a configuration that will preserve the parsed regular expression -- in the output idFormatTokenREOptions :: FormatTokenREOptions idFormatTokenREOptions = FormatTokenREOptions { _fto_regex_type = Nothing , _fto_min_caps = False , _fto_incl_caps = True } -- | the general Token formatter, generating REs according to the options formatTokens' :: FormatTokenREOptions -> [Token] -> String formatTokens' FormatTokenREOptions{..} = foldr f "" where f tk tl = t_s ++ tl where t_s = case tk of ECap mb -> ecap mb PGrp -> if maybe False isTDFA _fto_regex_type then "(" else "(?:" PCap -> "(?" Bra -> bra _fto_min_caps BS c -> "\\" ++ [c] Other c -> [c] ecap mb = case _fto_incl_caps of True -> case mb of Nothing -> "$(" Just nm -> "${"++nm++"}(" False -> bra _fto_min_caps bra mc = case mc && maybe False isPCRE _fto_regex_type of True -> "(?:" False -> "(" \end{code} \begin{code} -- this is a reference of formatTokens defFormatTokenREOptions, -- used for testing the latter formatTokens0 :: [Token] -> String formatTokens0 = foldr f "" where f tk tl = t_s ++ tl where t_s = case tk of ECap _ -> "(" PGrp -> "(?:" PCap -> "(?" Bra -> "(" BS c -> "\\" ++ [c] Other c -> [c] \end{code} regex-1.1.0.2/Text/RE/ZeInternals/PreludeMacros.hs0000644000000000000000000006361014254065176017762 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif module Text.RE.ZeInternals.PreludeMacros ( RegexType , WithCaptures(..) , MacroDescriptor(..) , RegexSource(..) , PreludeMacro(..) , presentPreludeMacro , preludeMacros , preludeMacroTable , preludeMacroSummary , preludeMacroSources , preludeMacroSource , preludeMacroEnv , preludeMacroDescriptor ) where import Data.Array import qualified Data.HashMap.Lazy as HML import Data.List import Data.Maybe import qualified Data.Text as T import Data.Time import Prelude.Compat import Text.RE.REOptions import Text.RE.ZeInternals.TestBench import Text.RE.ZeInternals.TestBench.Parsers -- | generate the standard prelude Macros used to parse REs preludeMacros :: (Monad m,Functor m) => (String->m r) -> RegexType -> WithCaptures -> m (Macros r) preludeMacros prs rty wc = mkMacros prs rty wc $ preludeMacroEnv rty -- | format the standard prelude macros in a markdown table preludeMacroTable :: RegexType -> String preludeMacroTable rty = formatMacroTable rty $ preludeMacroEnv rty -- | generate a textual summary of the prelude macros preludeMacroSummary :: RegexType -> PreludeMacro -> String preludeMacroSummary rty = formatMacroSummary rty (preludeMacroEnv rty) . prelude_macro_id -- | generate a plain text table giving the RE for each macro with all -- macros expanded (to NF) preludeMacroSources :: RegexType -> String preludeMacroSources rty = formatMacroSources rty ExclCaptures $ preludeMacroEnv rty -- | generate plain text giving the expanded RE for a single macro preludeMacroSource :: RegexType -> PreludeMacro -> String preludeMacroSource rty = formatMacroSource rty ExclCaptures (preludeMacroEnv rty) . prelude_macro_id -- | generate the `MacroEnv` for the standard prelude macros preludeMacroEnv :: RegexType -> MacroEnv preludeMacroEnv rty = fix $ prelude_macro_env rty prelude_macro_env :: RegexType -> MacroEnv -> MacroEnv prelude_macro_env rty env = HML.fromList $ catMaybes [ (,) (prelude_macro_id pm) <$> preludeMacroDescriptor rty env pm | pm<-[minBound..maxBound] ] -- | generate the `MacroDescriptor` for a given `PreludeMacro` preludeMacroDescriptor :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor preludeMacroDescriptor rty env pm = case pm of PM_nat -> natural_macro rty env pm PM_hex -> natural_hex_macro rty env pm PM_int -> integer_macro rty env pm PM_frac -> decimal_macro rty env pm PM_string -> string_macro rty env pm PM_string_simple -> string_simple_macro rty env pm PM_id -> id_macro rty env pm PM_id' -> id'_macro rty env pm PM_id_ -> id__macro rty env pm PM_date -> date_macro rty env pm PM_date_slashes -> date_slashes_macro rty env pm PM_time -> time_macro rty env pm PM_timezone -> timezone_macro rty env pm PM_datetime -> datetime_macro rty env pm PM_datetime_8601 -> datetime_8601_macro rty env pm PM_datetime_clf -> datetime_clf_macro rty env pm PM_shortmonth -> shortmonth_macro rty env pm PM_address_ipv4 -> address_ipv4_macros rty env pm PM_email_simple -> email_simple_macro rty env pm PM_url -> url_macro rty env pm PM_syslog_severity -> syslog_severity_macro rty env pm -- | an enumeration of all of the prelude macros data PreludeMacro -- numbers = PM_nat | PM_hex | PM_int | PM_frac -- strings | PM_string | PM_string_simple -- identifiers | PM_id | PM_id' | PM_id_ -- dates & times | PM_date | PM_date_slashes | PM_time | PM_timezone | PM_datetime | PM_datetime_8601 | PM_datetime_clf | PM_shortmonth -- addresses | PM_address_ipv4 | PM_email_simple | PM_url -- syslog | PM_syslog_severity deriving (Bounded,Enum,Ord,Eq,Show) -- | naming the macros presentPreludeMacro :: PreludeMacro -> String presentPreludeMacro pm = case pm of PM_id_ -> prelude_prefix++"id-" _ -> fmt pm where fmt = (prelude_prefix++) . map tr . drop 3 . show tr '_' = '.' tr c = c -- | all prelude macros are prefixed with this prelude_prefix :: String prelude_prefix = "%" prelude_macro_id :: PreludeMacro -> MacroID prelude_macro_id = MacroID . presentPreludeMacro natural_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor natural_macro rty env pm = Just $ run_tests rty parseInteger samples env pm MacroDescriptor { macroSource = "[0-9]+" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseInteger" , macroDescription = "a string of one or more decimal digits" } where samples :: [(String,Int)] samples = [ (,) "0" 0 , (,) "1234567890" 1234567890 , (,) "00" 0 , (,) "01" 1 ] counter_samples = [ "" , "0A" , "-1" ] natural_hex_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor natural_hex_macro rty env pm = Just $ run_tests rty parseHex samples env pm MacroDescriptor { macroSource = "[0-9a-fA-F]+" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseHex" , macroDescription = "a string of one or more hexadecimal digits" } where samples :: [(String,Int)] samples = [ (,) "0" 0x0 , (,) "12345678" 0x12345678 , (,) "0abcdef" 0xabcdef , (,) "0ABCDEF" 0xabcdef , (,) "00" 0x0 , (,) "010" 0x10 ] counter_samples = [ "" , "0x10" , "0z" , "-1a" ] integer_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor integer_macro rty env pm = Just $ run_tests rty parseInteger samples env pm MacroDescriptor { macroSource = "-?[0-9]+" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseInteger" , macroDescription = "a decimal integer" } where samples :: [(String,Int)] samples = [ (,) "0" 0 , (,) "1234567890" 1234567890 , (,) "00" 0 , (,) "01" 1 , (,) "-1" $ -1 , (,) "-0" 0 ] counter_samples = [ "" , "0A" , "+0" ] -- | a digit string macro decimal_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor decimal_macro rty env pm = Just $ run_tests rty parseDouble samples env pm MacroDescriptor { macroSource = "-?[0-9]+(?:\\.[0-9]+)?" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseInteger" , macroDescription = "a decimal natural number" } where samples :: [(String,Double)] samples = [ (,) "0" 0 , (,) "1234567890" 1234567890 , (,) "00" 0 , (,) "01" 1 , (,) "-1" $ -1 , (,) "-0" 0 , (,) "0.1234567890" 0.1234567890 , (,) "-1.0" $ -1.0 ] counter_samples = [ "" , "0A" , "+0" , "0." , ".0" , "." , "-" , "-." , "-1." , "-.1" ] string_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor string_macro rty env pm | isPCRE rty = Nothing | otherwise = Just $ run_tests rty (fmap T.unpack . parseString) samples env pm MacroDescriptor { macroSource = "\"(?:[^\"\\]+|\\\\[\\\"])*\"" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseString" , macroDescription = "a double-quote string, with simple \\ escapes for \\s and \"s" } where samples :: [(String,String)] samples = [ (,) "\"\"" "" , (,) "\"foo\"" "foo" , (,) "\"\\\"\"" "\"" , (,) "\"\\\"\\\"\"" "\"\"" , (,) "\"\\\"\\\\\\\"\"" "\"\\\"" , (,) "\"\\\"foo\\\"\"" "\"foo\"" , (,) "\"\"" "" ] counter_samples = [ "\"" , "\"aa" ] string_simple_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor string_simple_macro rty env pm = Just $ run_tests rty (fmap T.unpack . parseSimpleString) samples env pm MacroDescriptor { macroSource = "\"[^\"[:cntrl:]]*\"" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseSimpleString" , macroDescription = "a simple quoted string" } where samples :: [(String,String)] samples = [ (,) "\"\"" "" , (,) "\"foo\"" "foo" , (,) "\"\\\"" "\\" , (,) "\"\"" "" ] counter_samples = [ "" , "\"" , "\"\\\"\"" , "\"\\\"\\\"\"" , "\"\\\"\\\\\\\"\"" , "\"\\\"foo\\\"\"" , "\"aa" ] id_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor id_macro rty env pm = Just $ run_tests rty Just samples env pm MacroDescriptor { macroSource = "_*[a-zA-Z][a-zA-Z0-9_]*" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Nothing , macroDescription = "a standard C-style alphanumeric identifier (with _s)" } where samples :: [(String,String)] samples = [ f "a" , f "A" , f "A1" , f "a_" , f "a1_B2" , f "_abc" , f "__abc" ] where f s = (s,s) counter_samples = [ "" , "1" , "_" , "__" , "__1" , "1a" , "a'" ] id'_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor id'_macro rty env pm = Just $ run_tests rty Just samples env pm MacroDescriptor { macroSource = "_*[a-zA-Z][a-zA-Z0-9_']*" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Nothing , macroDescription = "a standard Haskell-style alphanumeric identifier (with '_'s and '''s)" } where samples :: [(String,String)] samples = [ f "a" , f "A" , f "A1" , f "a_" , f "a1_B2" , f "_abc" , f "__abc" , f "a'" , f "_a'" , f "a'b" ] where f s = (s,s) counter_samples = [ "" , "1" , "_" , "__" , "__1" , "1a" , "'" , "'a" , "_'" , "_1'" ] id__macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor id__macro rty env pm = Just $ run_tests rty Just samples env pm MacroDescriptor { macroSource = "_*[a-zA-Z][a-zA-Z0-9_'-]*" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Nothing , macroDescription = "an identifier with -s" } where samples :: [(String,String)] samples = [ f "a" , f "A" , f "A1" , f "a_" , f "a1_B2" , f "_abc" , f "__abc" , f "a'" , f "_a'" , f "a'b" , f "a-" , f "a1-B2" , f "a1-B2-" ] where f s = (s,s) counter_samples = [ "" , "1" , "_" , "__" , "__1" , "1a" , "'" , "'a" , "_'" , "_1'" ] date_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor date_macro rty env pm = Just $ run_tests rty parseDate samples env pm MacroDescriptor { macroSource = "[0-9]{4}-[0-9]{2}-[0-9]{2}" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseDate" , macroDescription = "a YYYY-MM-DD format date" } where samples :: [(String,Day)] samples = [ f "2016-12-31" , f "0001-01-01" , f "1000-01-01" ] where f s = (s,read s) counter_samples = [ "" , "2016/01/31" , "2016-1-31" , "2016-01-1" , "2016-001-01" ] date_slashes_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor date_slashes_macro rty env pm = Just $ run_tests rty parseSlashesDate samples env pm MacroDescriptor { macroSource = "[0-9]{4}/[0-9]{2}/[0-9]{2}" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseSlashesDate" , macroDescription = "a YYYY/MM/DD format date" } where samples :: [(String,Day)] samples = [ f "2016/12/31" , f "0001/01/01" , f "1000/01/01" ] where f s = (s,read $ map tr s) where tr '/' = '-' tr c = c counter_samples = [ "" , "2016-01-31" , "2016/1/31" , "2016/01/1" , "2016/001/01" ] time_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor time_macro rty env pm = Just $ run_tests rty parseTimeOfDay samples env pm MacroDescriptor { macroSource = "[0-9]{2}:[0-9]{2}:[0-9]{2}(?:[.][0-9]+)?" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseTimeOfDay" , macroDescription = "a HH:MM:SS[.Q+]" } where samples :: [(String,TimeOfDay)] samples = [ f "00:00:00" 00 00 0 , f "23:59:59" 23 59 59 , f "00:00:00.1234567890" 00 00 $ 123456789 / 1000000000 ] where f s h m ps = (s,TimeOfDay h m ps) counter_samples = [ "" , "235959" , "10:20" , "A00:00:00" , "00:00:00A" , "23:59:59." ] timezone_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor timezone_macro rty env pm = Just $ run_tests rty parseTimeZone samples env pm MacroDescriptor { macroSource = "(?:Z|[+-][0-9]{2}:?[0-9]{2})" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseTimeZone" , macroDescription = "an IOS-8601 TZ specification" } where samples :: [(String,TimeZone)] samples = [ f "Z" $ minutesToTimeZone 0 , f "+00:00" $ minutesToTimeZone 0 , f "+0000" $ minutesToTimeZone 0 , f "+0200" $ minutesToTimeZone 120 , f "-0100" $ minutesToTimeZone $ -60 ] where f = (,) counter_samples = [ "" , "00" , "A00:00" , "UTC" , "EST" , " EST" ] datetime_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor datetime_macro rty env pm = Just $ run_tests rty parseDateTime samples env pm MacroDescriptor { macroSource = "@{%date}[ T]@{%time}(?:@{%timezone}| UTC)?" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseDateTime" , macroDescription = "ISO-8601 format date and time + simple variants" } where samples :: [(String,UTCTime)] samples = [ f "2016-12-31 23:37:22.525343 UTC" "2016-12-31 23:37:22.525343Z" , f "2016-12-31 23:37:22.525343" "2016-12-31 23:37:22.525343Z" , f "2016-12-31 23:37:22" "2016-12-31 23:37:22Z" , f "2016-12-31T23:37:22+0100" "2016-12-31 23:37:22+0100" , f "2016-12-31T23:37:22-01:00" "2016-12-31 23:37:22-0100" , f "2016-12-31T23:37:22-23:59" "2016-12-31 23:37:22-2359" , f "2016-12-31T23:37:22Z" "2016-12-31 23:37:22Z" ] where f :: String -> String -> (String,UTCTime) f s r_s = (s,read r_s) counter_samples = [ "" , "2016-12-31 23:37:22.525343 EST" ] datetime_8601_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor datetime_8601_macro rty env pm = Just $ run_tests rty parseDateTime samples env pm MacroDescriptor { macroSource = "@{%date}T@{%time}@{%timezone}" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseDateTime8601" , macroDescription = "YYYY-MM-DDTHH:MM:SS[.Q*](Z|[+-]HHMM) format date and time" } where samples :: [(String,UTCTime)] samples = [ f "2016-12-31T23:37:22.343Z" "2016-12-31 23:37:22.343Z" , f "2016-12-31T23:37:22-0100" "2016-12-31 23:37:22-0100" , f "2016-12-31T23:37:22+23:59" "2016-12-31 23:37:22+2359" ] where f :: String -> String -> (String,UTCTime) f s r_s = (s,read r_s) counter_samples = [ "" , "2016-12-31 23:37:22.525343 EST" ] datetime_clf_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor datetime_clf_macro rty env pm = Just $ run_tests rty parseDateTimeCLF samples env pm MacroDescriptor { macroSource = re , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseDateTimeCLF" , macroDescription = "Common Log Format date+time: %d/%b/%Y:%H:%M:%S %z" } where samples :: [(String,UTCTime)] samples = [ f "10/Oct/2000:13:55:36 -0700" "2000-10-10 13:55:36-0700" , f "10/Oct/2000:13:55:36 +07:00" "2000-10-10 13:55:36+0700" ] where f :: String -> String -> (String,UTCTime) f s r_s = (s,read r_s) counter_samples = [ "" , "2016-12-31T23:37+0100" , "10/Oct/2000:13:55:36-0700" , "10/OCT/2000:13:55:36 -0700" , "10/Oct/2000:13:55 -0700" , "10/Oct/2000:13:55Z" ] re = RegexSource $ unwords [ "[0-9]{2}/@{%shortmonth}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}" , "[+-][0-9]{2}:?[0-9]{2}" ] shortmonth_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor shortmonth_macro rty env pm = Just $ run_tests rty parseShortMonth samples env pm MacroDescriptor { macroSource = bracketedRegexSource $ intercalate "|" $ map T.unpack $ elems shortMonthArray , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseShortMonth" , macroDescription = "three letter month name: Jan-Dec" } where samples :: [(String,Int)] samples = [ f "Jan" 1 , f "Feb" 2 , f "Dec" 12 ] where f = (,) counter_samples = [ "" , "jan" , "DEC" , "January" , "01" , "1" ] address_ipv4_macros :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor address_ipv4_macros rty env pm = Just $ run_tests rty parseIPv4Address samples env pm MacroDescriptor { macroSource = "[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,3}" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseSeverity" , macroDescription = "an a.b.c.d IPv4 address" } where samples :: [(String,IPV4Address)] samples = [ f "0.0.0.0" ( 0, 0, 0, 0) , f "123.45.6.78" (123, 45, 6, 78) , f "9.9.9.9" ( 9, 9, 9, 9) , f "255.255.255.255" (255,255,255,255) ] where f = (,) counter_samples = [ "" , "foo" , "1234.0.0.0" , "1.2.3" , "1.2.3." , "1.2..4" , "www.example.com" , "2001:0db8:85a3:0000:0000:8a2e:0370:7334" ] syslog_severity_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor syslog_severity_macro rty env pm = Just $ run_tests rty parseSeverity samples env pm MacroDescriptor { macroSource = re , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Just "parseSeverity" , macroDescription = "syslog severity keyword (debug-emerg)" } where samples :: [(String,Severity)] samples = [ f "emerg" Emerg , f "panic" Emerg , f "alert" Alert , f "crit" Crit , f "err" Err , f "error" Err , f "warn" Warning , f "warning" Warning , f "notice" Notice , f "info" Info , f "debug" Debug ] where f = (,) counter_samples = [ "" , "Emergency" , "ALERT" ] re = if isPCRE rty then re_pcre else re_tdfa re_tdfa = bracketedRegexSource $ intercalate "|" $ [ T.unpack kw | (kw0,kws) <- map severityKeywords [minBound..maxBound] , kw <- kw0:kws ] re_pcre = bracketedRegexSource $ intercalate "|" $ [ T.unpack kw | (kw0,kws) <- map severityKeywords $ filter (/=Err) [minBound..maxBound] , kw <- kw0:kws ] ++ ["err(?:or)?"] email_simple_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor email_simple_macro rty env pm = Just $ run_tests rty Just samples env pm MacroDescriptor { macroSource = "[a-zA-Z0-9%_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9.-]+" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Nothing , macroDescription = "an email address" } where samples :: [(String,String)] samples = [ f "user-name%foo.bar.com@an-example.com" ] where f s = (s,s) counter_samples = [ "" , "not-an-email-address" , "@not-an-email-address" ] -- | see https://mathiasbynens.be/demo/url-regex -- (based on @stephenhay URL) url_macro :: RegexType -> MacroEnv -> PreludeMacro -> Maybe MacroDescriptor url_macro rty env pm = Just $ run_tests rty Just samples env pm MacroDescriptor { macroSource = "([hH][tT][tT][pP][sS]?|[fF][tT][pP])://[^[:space:]/$.?#].[^[:space:]]*" , macroSamples = map fst samples , macroCounterSamples = counter_samples , macroTestResults = [] , macroParser = Nothing , macroDescription = "a URL" } where samples :: [(String,String)] samples = [ f "https://mathiasbynens.be/demo/url-regex" , f "http://foo.com/blah_blah" , f "http://foo.com/blah_blah/" , f "http://foo.com/blah_blah_(wikipedia)" , f "http://foo.com/blah_blah_(wikipedia)_(again)" , f "http://www.example.com/wpstyle/?p=364" , f "HTTPS://foo.bar/?q=Test%20URL-encoded%20stuff" , f "HTTP://223.255.255.254" , f "ftp://223.255.255.254" , f "FTP://223.255.255.254" ] where f s = (s,s) counter_samples = [ "" , "http://" , "http://." , "http://.." , "http://../" , "http://?" , "http://??" , "http://foo.bar?q=Spaces should be encoded" , "//" , "http://##/" , "http://##" , "http://##/" ] run_tests :: (Eq a,Show a) => RegexType -> (String->Maybe a) -> [(String,a)] -> MacroEnv -> PreludeMacro -> MacroDescriptor -> MacroDescriptor run_tests rty parser vector env = runTests rty parser vector env . prelude_macro_id bracketedRegexSource :: String -> RegexSource bracketedRegexSource re_s = RegexSource $ "(?:" ++ re_s ++ ")" fix :: (a->a) -> a fix f = f (fix f) regex-1.1.0.2/Text/RE/ZeInternals/QQ.hs0000644000000000000000000000163214254065176015532 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} module Text.RE.ZeInternals.QQ where import Control.Exception import Data.Typeable import Language.Haskell.TH.Quote -- | used to throw an exception reporting an abuse of a quasi quoter data QQFailure = QQFailure { _qqf_context :: String -- ^ in what context was the quasi quoter used , _qqf_component :: String -- ^ how was the quasi quoter being abused } deriving (Show,Typeable) instance Exception QQFailure where -- | a quasi quoter that can be used in no context (to be extended with -- the appropriate quasi quoter parser) qq0 :: String -> QuasiQuoter qq0 ctx = QuasiQuoter { quoteExp = const $ throw $ QQFailure ctx "expression" , quotePat = const $ throw $ QQFailure ctx "pattern" , quoteType = const $ throw $ QQFailure ctx "type" , quoteDec = const $ throw $ QQFailure ctx "declaration" } regex-1.1.0.2/Text/RE/ZeInternals/Replace.lhs0000644000000000000000000004017514254065176016745 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MonoLocalBinds #-} module Text.RE.ZeInternals.Replace ( -- * REContext and RELocation REContext(..) , RELocation(..) , isTopLocation -- * replaceAll , replaceAll , replaceAllCaptures , replaceAllCaptures_ , replaceAllCapturesM -- * replace , replace , replaceCaptures , replaceCaptures_ , replaceCapturesM -- * expandMacros , expandMacros , expandMacros' -- * templateCaptures , templateCaptures -- * Replace and ReplaceMethods , Replace(..) , ReplaceMethods(..) , replaceMethods ) where import Control.Applicative import Data.Array import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Char import qualified Data.Foldable as F import Data.Functor.Identity import qualified Data.HashMap.Strict as HM import Data.Maybe import qualified Data.Monoid as M import qualified Data.Sequence as S import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Lazy as LT import Prelude.Compat import Text.RE.REOptions import Text.RE.ZeInternals.Types.Capture import Text.RE.ZeInternals.Types.CaptureID import Text.RE.ZeInternals.Types.Match import Text.RE.ZeInternals.Types.Matches import Text.Read import Text.Regex.TDFA import Text.Regex.TDFA.Text() import Text.Regex.TDFA.Text.Lazy() \end{code} ReContext and RELocation ------------------------ \begin{code} -- | @REContext@ specifies which contexts the substitutions should be applied data REContext = TOP -- ^ substitutions should be applied to the top-level only, -- the text that matched the whole RE | SUB -- ^ substitutions should only be applied to the text -- captured by bracketed sub-REs | ALL -- ^ the substitution function should be applied to all -- captures, the top level and the sub-expression captures deriving (Show) -- | the @RELocation@ information passed into the substitution function -- specifies which sub-expression is being substituted data RELocation = RELocation { locationMatch :: Int -- ^ the zero-based, i-th string to be matched, -- when matching all strings, zero when only the -- first string is being matched , locationCapture :: CaptureOrdinal -- ^ 0, when matching the top-level string -- matched by the whole RE, 1 for the top-most, -- left-most redex captured by bracketed -- sub-REs, etc. } deriving (Show) \end{code} \begin{code} -- | True iff the location references a complete match -- (i.e., not a bracketed capture) isTopLocation :: RELocation -> Bool isTopLocation = (==0) . locationCapture \end{code} \begin{code} -- | replace all with a template, $0 for whole text, $1 for first -- capture, etc. replaceAll :: Replace a => a -> Matches a -> a replaceAll tpl ac = replaceAllCaptures TOP (parseTemplateR tpl) ac \end{code} \begin{code} -- | substitutes using a function that takes the full Match -- context and returns the same replacement text as the _phi_phi -- context. replaceAllCaptures :: Replace a => REContext -> (Match a->RELocation->Capture a->Maybe a) -> Matches a -> a \end{code} \begin{code} replaceAllCaptures = replaceAllCaptures_ replaceMethods \end{code} \begin{code} -- | replaceAllCaptures_ is like like replaceAllCaptures but takes the -- Replace methods through the ReplaceMethods argument replaceAllCaptures_ :: Extract a => ReplaceMethods a -> REContext -> (Match a->RELocation->Capture a->Maybe a) -> Matches a -> a replaceAllCaptures_ s ctx phi ac = runIdentity $ replaceAllCapturesM s ctx (lift_phi phi) ac \end{code} \begin{code} -- | replaceAllCapturesM is just a monadically generalised version of -- replaceAllCaptures_ replaceAllCapturesM :: (Extract a,Monad m) => ReplaceMethods a -> REContext -> (Match a->RELocation->Capture a->m (Maybe a)) -> Matches a -> m a replaceAllCapturesM r ctx phi_ Matches{..} = replaceCapturesM r ALL phi $ Match matchesSource cnms arr where phi _ (RELocation _ i) = case arr_c!i of Just caps -> phi_ caps . uncurry RELocation $ arr_i ! i Nothing -> const $ return Nothing arr_c = listArray bds $ concat $ [ repl (rangeSize $ bounds $ matchArray cs) cs | cs <- allMatches ] arr_i = listArray bds j_ks arr = listArray bds $ [ arr_ ! k | arr_ <- map matchArray allMatches , k <- indices arr_ ] bds = (0,CaptureOrdinal $ length j_ks-1) j_ks = [ (j,k) | (j,arr_) <- zip [0..] $ map matchArray allMatches , k <- indices arr_ ] repl 0 _ = [] repl n x = case ctx of TOP -> Just x : replicate (n-1) Nothing SUB -> Nothing : replicate (n-1) (Just x) ALL -> replicate n $ Just x cnms = fromMaybe noCaptureNames $ listToMaybe $ map captureNames allMatches \end{code} \begin{code} -- | replace with a template containing $0 for whole text, -- $1 for first capture, etc. replace :: Replace a => a -> Match a -> a replace tpl c = replaceCaptures TOP (parseTemplateR tpl) c \end{code} \begin{code} -- | substitutes using a function that takes the full Match -- context and returns the same replacement text as the _phi_phi -- context. replaceCaptures :: Replace a => REContext -> (Match a->RELocation->Capture a->Maybe a) -> Match a -> a replaceCaptures = replaceCaptures_ replaceMethods \end{code} \begin{code} -- | replaceCaptures_ is like replaceCaptures but takes the Replace methods -- through the ReplaceMethods argument replaceCaptures_ :: Extract a => ReplaceMethods a -> REContext -> (Match a->RELocation->Capture a->Maybe a) -> Match a -> a replaceCaptures_ s ctx phi caps = runIdentity $ replaceCapturesM s ctx (lift_phi phi) caps \end{code} \begin{code} -- | replaceCapturesM is just a monadically generalised version of -- replaceCaptures_ replaceCapturesM :: (Monad m,Extract a) => ReplaceMethods a -> REContext -> (Match a->RELocation->Capture a->m (Maybe a)) -> Match a -> m a replaceCapturesM ReplaceMethods{..} ctx phi_ caps@Match{..} = do (hay',_) <- foldr sc (return (matchSource,[])) $ zip [0..] $ elems matchArray return hay' where sc (i,cap0) act = do (hay,ds) <- act let ndl = capturedText cap cap = adj hay ds cap0 mb <- phi i cap case mb of Nothing -> return (hay,ds) Just ndl' -> return ( methodSubst (const ndl') cap , (captureOffset cap,len'-len) : ds ) where len' = methodLength ndl' len = methodLength ndl adj hay ds cap = Capture { captureSource = hay , capturedText = before len $ after off0 hay , captureOffset = off0 , captureLength = len } where len = len0 + sum [ delta | (off,delta) <- ds , off < off0 + len0 ] len0 = captureLength cap off0 = captureOffset cap phi i cap = case ctx of TOP | i/=0 -> return Nothing SUB | i==0 ->return Nothing _ -> case not $ hasCaptured cap of True -> return Nothing False -> phi_ caps (RELocation 0 i) cap \end{code} expandMacros ------------ \begin{code} -- | expand all of the @{..} macros in the RE in the argument String -- according to the Macros argument, preprocessing the RE String -- according to the Mode argument (used internally) expandMacros :: (r->String) -> Macros r -> String -> String expandMacros x_src hm s = case HM.null hm of True -> s False -> expandMacros' (fmap x_src . flip HM.lookup hm) s \end{code} \begin{code} -- | expand the @{..} macros in the argument string using the given -- function expandMacros' :: (MacroID->Maybe String) -> String -> String expandMacros' lu = fixpoint e_m where e_m re_s = replaceAllCaptures TOP phi $ re_s $=~ "@(@|\\{([^{}]+)\\})" where phi mtch _ cap = case txt == "@@" of True -> Just "@" False -> Just $ fromMaybe txt $ lu ide where txt = capturedText cap ide = MacroID $ capturedText $ capture c2 mtch c2 = IsCaptureOrdinal $ CaptureOrdinal 2 \end{code} \begin{code} lift_phi :: Monad m => (Match a->RELocation->Capture a->Maybe a) -> (Match a->RELocation->Capture a->m (Maybe a)) lift_phi phi_ = phi where phi caps' loc' cap' = return $ phi_ caps' loc' cap' \end{code} templateCaptures ---------------- \begin{code} -- | list all of the CaptureID references in the replace template in -- the second argument templateCaptures :: ( Replace a , RegexContext Regex a (Matches a) , RegexMaker Regex CompOption ExecOption String ) => (a->String) -> a -> [CaptureID] templateCaptures unpack tpl = [ cid | mtch <- allMatches $ scan_template tpl , Right cid <- [parse_template_capture unpack mtch] ] -- | parse a Match generated by acan_template, returning @Left "$") -- iff the capture reference is an escaped @$@ (i.e., @$$@) parse_template_capture :: (a->String) -> Match a -> Either a CaptureID parse_template_capture unpack t_mtch = case t_mtch !$? c2 of Just cap -> case readMaybe stg of Nothing -> Right $ IsCaptureName $ CaptureName $ T.pack stg Just cn -> Right $ IsCaptureOrdinal $ CaptureOrdinal cn where stg = unpack $ capturedText cap Nothing -> case s == "$" of True -> Left t False -> Right $ IsCaptureOrdinal $ CaptureOrdinal $ read s where s = unpack t t = capturedText $ capture c1 t_mtch c1 = IsCaptureOrdinal $ CaptureOrdinal 1 c2 = IsCaptureOrdinal $ CaptureOrdinal 2 -- | scan a replacement template, returning a Match for each capture -- reference in the template (like $1, ${foo}) scan_template :: ( Replace a , RegexContext Regex a (Matches a) , RegexMaker Regex CompOption ExecOption String ) => a -> Matches a scan_template tpl = tpl $=~ "\\$(\\$|[0-9]|\\{([^{}]+)\\})" \end{code} Replace and ReplaceMethods -------------------------- \begin{code} -- | Replace provides the missing needed to replace the matched -- text in a @Replace a => Match a@. class (Show a,Eq a,Ord a,Extract a,Monoid a) => Replace a where -- | length function for a lengthR :: a -> Int -- | inject String into a packR :: String -> a -- | project a onto a String unpackR :: a -> String -- | inject into Text textifyR :: a -> T.Text -- | project Text onto a detextifyR :: T.Text -> a -- | split into lines linesR :: a -> [a] -- | concatenate a list of lines unlinesR :: [a] -> a -- | append a newline appendNewlineR :: a -> a -- | apply a substitution function to a Capture substR :: (a->a) -> Capture a -> a -- | convert a template containing $0, $1, etc., in the first -- argument, into a 'phi' replacement function for use with -- replaceAllCaptures and replaceCaptures parseTemplateR :: a -> Match a -> RELocation -> Capture a -> Maybe a textifyR = T.pack . unpackR detextifyR = packR . T.unpack appendNewlineR = (M.<> packR "\n") substR f m@Capture{..} = capturePrefix m M.<> f capturedText M.<> captureSuffix m \end{code} \begin{code} -- | a selection of the Replace methods can be encapsulated with ReplaceMethods -- for the higher-order replacement functions data ReplaceMethods a = ReplaceMethods { methodLength :: a -> Int , methodSubst :: (a->a) -> Capture a -> a } -- | replaceMethods encapsulates ReplaceMethods a from a Replace a context replaceMethods :: Replace a => ReplaceMethods a replaceMethods = ReplaceMethods { methodLength = lengthR , methodSubst = substR } \end{code} The Replace Instances --------------------- \begin{code} instance Replace [Char] where lengthR = length packR = id unpackR = id textifyR = T.pack detextifyR = T.unpack linesR = lines unlinesR = unlines appendNewlineR = (M.<>"\n") parseTemplateR = parseTemplateR' id instance Replace B.ByteString where lengthR = B.length packR = B.pack unpackR = B.unpack textifyR = TE.decodeUtf8 detextifyR = TE.encodeUtf8 linesR = B.lines unlinesR = B.unlines appendNewlineR = (M.<>"\n") parseTemplateR = parseTemplateR' B.unpack instance Replace LBS.ByteString where lengthR = fromEnum . LBS.length packR = LBS.pack unpackR = LBS.unpack textifyR = TE.decodeUtf8 . LBS.toStrict linesR = LBS.lines unlinesR = LBS.unlines detextifyR = LBS.fromStrict . TE.encodeUtf8 appendNewlineR = (M.<>"\n") parseTemplateR = parseTemplateR' LBS.unpack instance Replace (S.Seq Char) where lengthR = S.length packR = S.fromList unpackR = F.toList linesR = map packR . lines . unpackR unlinesR = packR . unlines . map unpackR parseTemplateR = parseTemplateR' F.toList instance Replace T.Text where lengthR = T.length packR = T.pack unpackR = T.unpack textifyR = id detextifyR = id linesR = T.lines unlinesR = T.unlines appendNewlineR = (M.<>"\n") parseTemplateR = parseTemplateR' T.unpack instance Replace LT.Text where lengthR = fromEnum . LT.length packR = LT.pack unpackR = LT.unpack textifyR = LT.toStrict detextifyR = LT.fromStrict linesR = LT.lines unlinesR = LT.unlines appendNewlineR = (M.<>"\n") parseTemplateR = parseTemplateR' LT.unpack \end{code} Parsing Replace Templates ------------------------- \begin{code} -- | parse the replacement template in second argument, substituting -- the capture references with corresponding captures from the Match -- in the third argument (the result of a single match of the RE -- against the input text to be matched); Nothing is returned if the -- inputs are not well formed (currently all inputs are well formed) parseTemplateR' :: ( Replace a , RegexContext Regex a (Matches a) , RegexMaker Regex CompOption ExecOption String ) => (a->String) -> a -> Match a -> RELocation -> Capture a -> Maybe a parseTemplateR' unpack tpl mtch _ _ = Just $ replaceAllCaptures TOP phi $ scan_template tpl where phi t_mtch _ _ = either Just this $ parse_template_capture unpack t_mtch this cid = capturedText <$> mtch !$? cid \end{code} Helpers ------- \begin{code} fixpoint :: (Eq a) => (a->a) -> a -> a fixpoint f = chk . iterate f where chk (x:x':_) | x==x' = x chk xs = chk $ tail xs \end{code} \begin{code} ($=~) :: ( RegexContext Regex source target , RegexMaker Regex CompOption ExecOption String ) => source -> String -> target ($=~) = (=~) \end{code} regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace.hs0000644000000000000000000000700314254065176017710 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} module Text.RE.ZeInternals.SearchReplace ( unsafeCompileSearchReplace_ , compileSearchReplace_ , compileSearchAndReplace_ ) where import Control.Monad.Fail import qualified Data.HashMap.Strict as HMS import Prelude.Compat hiding (fail) import Text.RE.ZeInternals.NamedCaptures import Text.RE.ZeInternals.Replace import Text.RE.ZeInternals.Types.Capture import Text.RE.ZeInternals.Types.CaptureID import Text.RE.ZeInternals.Types.Matches import Text.RE.ZeInternals.Types.Poss import Text.RE.ZeInternals.Types.SearchReplace import qualified Text.Regex.TDFA as TDFA -- | warapper on 'compileSearchReplace_' that will generate an error -- if any compilation errors are found unsafeCompileSearchReplace_ :: (String->s) -> (String->Either String re) -> String -> SearchReplace re s unsafeCompileSearchReplace_ pk cf = poss err id . compileSearchReplace_ pk cf where err msg = error $ "unsafeCompileSearchReplace_: " ++ msg -- | compile a SearchReplace template generating errors if the RE or -- the template are not well formed -- all capture references being checked compileSearchReplace_ :: (Monad m,MonadFail m,Functor m) => (String->s) -> (String->Either String re) -> String -> m (SearchReplace re s) compileSearchReplace_ pack compile_re sr_tpl = poss fail return $ do case mainCaptures $ sr_tpl $=~ "///" of [cap] -> compileSearchAndReplace_ pack compile_re (capturePrefix cap) (captureSuffix cap) _ -> Eek $ "bad search-replace template syntax: " ++ sr_tpl -- | compile 'SearcgReplace' from two strings containing the RE -- and the replacement template compileSearchAndReplace_ :: (Monad m,MonadFail m,Functor m) => (String->s) -> (String->Either String re) -> String -> String -> m (SearchReplace re s) compileSearchAndReplace_ pack compile_re re_s tpl = either fail return $ do re <- compile_re re_s ((n,cnms),_) <- extractNamedCaptures re_s mapM_ (check n cnms) $ templateCaptures id tpl return $ SearchReplace re $ pack tpl where check :: Int -> CaptureNames -> CaptureID -> Either String () check n cnms cid = case cid of IsCaptureOrdinal co -> check_co n co IsCaptureName cn -> check_cn cnms cn check_co n (CaptureOrdinal i) = case i <= n of True -> return () False -> Left $ "capture ordinal out of range: " ++ show i ++ " >= " ++ show n check_cn cnms cnm = case cnm `HMS.member` cnms of True -> return () False -> Left $ "capture name not defined: " ++ show (getCaptureName cnm) ($=~) :: String -> String -> Matches String ($=~) = (TDFA.=~) regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA.hs0000644000000000000000000000265014254065176020431 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA ( ed , edMS , edMI , edBS , edBI , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed_ ) where import Language.Haskell.TH import Language.Haskell.TH.Quote import Prelude.Compat import Text.RE.REOptions import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime -- | the @[ed| ... /// ... |]@ quasi quoters ed , edMS , edMI , edBS , edBI , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , ed_ :: QuasiQuoter ed = ed' cast $ Just minBound edMS = edMultilineSensitive edMI = edMultilineInsensitive edBS = edBlockSensitive edBI = edBlockInsensitive edMultilineSensitive = ed' cast $ Just MultilineSensitive edMultilineInsensitive = ed' cast $ Just MultilineInsensitive edBlockSensitive = ed' cast $ Just BlockSensitive edBlockInsensitive = ed' cast $ Just BlockInsensitive ed_ = ed' cast Nothing cast :: Q Exp cast = [|id|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/ByteString.hs0000644000000000000000000000642514254065176022527 0ustar0000000000000000{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA.ByteString ( ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ ) where import qualified Data.ByteString.Char8 as B import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.RE.REOptions import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime import Text.RE.ZeInternals.TDFA -- | @[ed| ... \/\/\/ ... |]@, is equivalent to @[edMultilineSensitive| ... \/\/\/ ... |]@, -- compiling a case-sensitive, multi-line 'SearchReplace' ed :: QuasiQuoter ed = ed' sr_cast $ Just minBound -- | @[edMultilineSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, multi-line 'SearchReplace' template edMultilineSensitive :: QuasiQuoter edMultilineSensitive = ed' sr_cast $ Just MultilineSensitive -- | @[edMultilineInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, multi-line 'SearchReplace' template edMultilineInsensitive :: QuasiQuoter edMultilineInsensitive = ed' sr_cast $ Just MultilineInsensitive -- | @[edBlockSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, non-multi-line 'SearchReplace' template edBlockSensitive :: QuasiQuoter edBlockSensitive = ed' sr_cast $ Just BlockSensitive -- | @[edBlockInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, non-multi-line 'SearchReplace' template edBlockInsensitive :: QuasiQuoter edBlockInsensitive = ed' sr_cast $ Just BlockInsensitive -- | @[edMS| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineSensitive| ... \/\/\/ ... |]@ edMS :: QuasiQuoter edMS = edMultilineSensitive -- | @[edMI| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineInsensitive| ... \/\/\/ ... |]@ edMI :: QuasiQuoter edMI = edMultilineInsensitive -- | @[edBS| ... \/\/\/ ... |]@ is a shorthand for @[edBlockSensitive| ... \/\/\/ ... |]@ edBS :: QuasiQuoter edBS = edBlockSensitive -- | @[edBI| ... \/\/\/ ... |]@ is a shorthand for @[edBlockInsensitive| ... \/\/\/ ... |]@ edBI :: QuasiQuoter edBI = edBlockInsensitive -- | @[ed_| ... \/\/\/ ... |]@ compiles a 'SearchReplace' template to produce a function that -- takes the RE options (e.g., a 'SimpleREOptions' value) and yields the -- 'SearchReplace' template compiled with those options. For example, -- -- @s *=~/ [ed_|${hex}([0-9a-f]+)\/\/\/0x${hex}|] MultilineInsensitive@ -- -- prefixes the hexadecimal digit strings in s with @0x@, allowing for -- upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[edMultilineInsensitive|[0-9a-f]+|]@). ed_ :: QuasiQuoter ed_ = ed' fn_cast Nothing sr_cast :: Q Exp sr_cast = [|\x -> x :: SearchReplace RE B.ByteString|] fn_cast :: Q Exp fn_cast = [|\f x -> f x :: SearchReplace RE B.ByteString|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/ByteString/Lazy.hs0000644000000000000000000000644014254065176023443 0ustar0000000000000000{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA.ByteString.Lazy ( ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ ) where import qualified Data.ByteString.Lazy.Char8 as LBS import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.RE.REOptions import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime import Text.RE.ZeInternals.TDFA -- | @[ed| ... \/\/\/ ... |]@, is equivalent to @[edMultilineSensitive| ... \/\/\/ ... |]@, -- compiling a case-sensitive, multi-line 'SearchReplace' ed :: QuasiQuoter ed = ed' sr_cast $ Just minBound -- | @[edMultilineSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, multi-line 'SearchReplace' template edMultilineSensitive :: QuasiQuoter edMultilineSensitive = ed' sr_cast $ Just MultilineSensitive -- | @[edMultilineInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, multi-line 'SearchReplace' template edMultilineInsensitive :: QuasiQuoter edMultilineInsensitive = ed' sr_cast $ Just MultilineInsensitive -- | @[edBlockSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, non-multi-line 'SearchReplace' template edBlockSensitive :: QuasiQuoter edBlockSensitive = ed' sr_cast $ Just BlockSensitive -- | @[edBlockInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, non-multi-line 'SearchReplace' template edBlockInsensitive :: QuasiQuoter edBlockInsensitive = ed' sr_cast $ Just BlockInsensitive -- | @[edMS| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineSensitive| ... \/\/\/ ... |]@ edMS :: QuasiQuoter edMS = edMultilineSensitive -- | @[edMI| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineInsensitive| ... \/\/\/ ... |]@ edMI :: QuasiQuoter edMI = edMultilineInsensitive -- | @[edBS| ... \/\/\/ ... |]@ is a shorthand for @[edBlockSensitive| ... \/\/\/ ... |]@ edBS :: QuasiQuoter edBS = edBlockSensitive -- | @[edBI| ... \/\/\/ ... |]@ is a shorthand for @[edBlockInsensitive| ... \/\/\/ ... |]@ edBI :: QuasiQuoter edBI = edBlockInsensitive -- | @[ed_| ... \/\/\/ ... |]@ compiles a 'SearchReplace' template to produce a function that -- takes the RE options (e.g., a 'SimpleREOptions' value) and yields the -- 'SearchReplace' template compiled with those options. For example, -- -- @s *=~/ [ed_|${hex}([0-9a-f]+)\/\/\/0x${hex}|] MultilineInsensitive@ -- -- prefixes the hexadecimal digit strings in s with @0x@, allowing for -- upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[edMultilineInsensitive|[0-9a-f]+|]@). ed_ :: QuasiQuoter ed_ = ed' fn_cast Nothing sr_cast :: Q Exp sr_cast = [|\x -> x :: SearchReplace RE LBS.ByteString|] fn_cast :: Q Exp fn_cast = [|\f x -> f x :: SearchReplace RE LBS.ByteString|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/Sequence.hs0000644000000000000000000000642314254065176022203 0ustar0000000000000000{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA.Sequence ( ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ ) where import qualified Data.Sequence as S import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.RE.REOptions import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime import Text.RE.ZeInternals.TDFA -- | @[ed| ... \/\/\/ ... |]@, is equivalent to @[edMultilineSensitive| ... \/\/\/ ... |]@, -- compiling a case-sensitive, multi-line 'SearchReplace' ed :: QuasiQuoter ed = ed' sr_cast $ Just minBound -- | @[edMultilineSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, multi-line 'SearchReplace' template edMultilineSensitive :: QuasiQuoter edMultilineSensitive = ed' sr_cast $ Just MultilineSensitive -- | @[edMultilineInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, multi-line 'SearchReplace' template edMultilineInsensitive :: QuasiQuoter edMultilineInsensitive = ed' sr_cast $ Just MultilineInsensitive -- | @[edBlockSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, non-multi-line 'SearchReplace' template edBlockSensitive :: QuasiQuoter edBlockSensitive = ed' sr_cast $ Just BlockSensitive -- | @[edBlockInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, non-multi-line 'SearchReplace' template edBlockInsensitive :: QuasiQuoter edBlockInsensitive = ed' sr_cast $ Just BlockInsensitive -- | @[edMS| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineSensitive| ... \/\/\/ ... |]@ edMS :: QuasiQuoter edMS = edMultilineSensitive -- | @[edMI| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineInsensitive| ... \/\/\/ ... |]@ edMI :: QuasiQuoter edMI = edMultilineInsensitive -- | @[edBS| ... \/\/\/ ... |]@ is a shorthand for @[edBlockSensitive| ... \/\/\/ ... |]@ edBS :: QuasiQuoter edBS = edBlockSensitive -- | @[edBI| ... \/\/\/ ... |]@ is a shorthand for @[edBlockInsensitive| ... \/\/\/ ... |]@ edBI :: QuasiQuoter edBI = edBlockInsensitive -- | @[ed_| ... \/\/\/ ... |]@ compiles a 'SearchReplace' template to produce a function that -- takes the RE options (e.g., a 'SimpleREOptions' value) and yields the -- 'SearchReplace' template compiled with those options. For example, -- -- @s *=~/ [ed_|${hex}([0-9a-f]+)\/\/\/0x${hex}|] MultilineInsensitive@ -- -- prefixes the hexadecimal digit strings in s with @0x@, allowing for -- upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[edMultilineInsensitive|[0-9a-f]+|]@). ed_ :: QuasiQuoter ed_ = ed' fn_cast Nothing sr_cast :: Q Exp sr_cast = [|\x -> x :: SearchReplace RE (S.Seq Char)|] fn_cast :: Q Exp fn_cast = [|\f x -> f x :: SearchReplace RE (S.Seq Char)|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/String.hs0000644000000000000000000000632214254065176021677 0ustar0000000000000000{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA.String ( ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ ) where import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.RE.REOptions import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime import Text.RE.ZeInternals.TDFA -- | @[ed| ... \/\/\/ ... |]@, is equivalent to @[edMultilineSensitive| ... \/\/\/ ... |]@, -- compiling a case-sensitive, multi-line 'SearchReplace' ed :: QuasiQuoter ed = ed' sr_cast $ Just minBound -- | @[edMultilineSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, multi-line 'SearchReplace' template edMultilineSensitive :: QuasiQuoter edMultilineSensitive = ed' sr_cast $ Just MultilineSensitive -- | @[edMultilineInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, multi-line 'SearchReplace' template edMultilineInsensitive :: QuasiQuoter edMultilineInsensitive = ed' sr_cast $ Just MultilineInsensitive -- | @[edBlockSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, non-multi-line 'SearchReplace' template edBlockSensitive :: QuasiQuoter edBlockSensitive = ed' sr_cast $ Just BlockSensitive -- | @[edBlockInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, non-multi-line 'SearchReplace' template edBlockInsensitive :: QuasiQuoter edBlockInsensitive = ed' sr_cast $ Just BlockInsensitive -- | @[edMS| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineSensitive| ... \/\/\/ ... |]@ edMS :: QuasiQuoter edMS = edMultilineSensitive -- | @[edMI| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineInsensitive| ... \/\/\/ ... |]@ edMI :: QuasiQuoter edMI = edMultilineInsensitive -- | @[edBS| ... \/\/\/ ... |]@ is a shorthand for @[edBlockSensitive| ... \/\/\/ ... |]@ edBS :: QuasiQuoter edBS = edBlockSensitive -- | @[edBI| ... \/\/\/ ... |]@ is a shorthand for @[edBlockInsensitive| ... \/\/\/ ... |]@ edBI :: QuasiQuoter edBI = edBlockInsensitive -- | @[ed_| ... \/\/\/ ... |]@ compiles a 'SearchReplace' template to produce a function that -- takes the RE options (e.g., a 'SimpleREOptions' value) and yields the -- 'SearchReplace' template compiled with those options. For example, -- -- @s *=~/ [ed_|${hex}([0-9a-f]+)\/\/\/0x${hex}|] MultilineInsensitive@ -- -- prefixes the hexadecimal digit strings in s with @0x@, allowing for -- upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[edMultilineInsensitive|[0-9a-f]+|]@). ed_ :: QuasiQuoter ed_ = ed' fn_cast Nothing sr_cast :: Q Exp sr_cast = [|\x -> x :: SearchReplace RE String|] fn_cast :: Q Exp fn_cast = [|\f x -> f x :: SearchReplace RE String|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/Text.hs0000644000000000000000000000640314254065176021355 0ustar0000000000000000{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA.Text ( ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ ) where import qualified Data.Text as T import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.RE.REOptions import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime import Text.RE.ZeInternals.TDFA -- | @[ed| ... \/\/\/ ... |]@, is equivalent to @[edMultilineSensitive| ... \/\/\/ ... |]@, -- compiling a case-sensitive, multi-line 'SearchReplace' ed :: QuasiQuoter ed = ed' sr_cast $ Just minBound -- | @[edMultilineSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, multi-line 'SearchReplace' template edMultilineSensitive :: QuasiQuoter edMultilineSensitive = ed' sr_cast $ Just MultilineSensitive -- | @[edMultilineInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, multi-line 'SearchReplace' template edMultilineInsensitive :: QuasiQuoter edMultilineInsensitive = ed' sr_cast $ Just MultilineInsensitive -- | @[edBlockSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, non-multi-line 'SearchReplace' template edBlockSensitive :: QuasiQuoter edBlockSensitive = ed' sr_cast $ Just BlockSensitive -- | @[edBlockInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, non-multi-line 'SearchReplace' template edBlockInsensitive :: QuasiQuoter edBlockInsensitive = ed' sr_cast $ Just BlockInsensitive -- | @[edMS| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineSensitive| ... \/\/\/ ... |]@ edMS :: QuasiQuoter edMS = edMultilineSensitive -- | @[edMI| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineInsensitive| ... \/\/\/ ... |]@ edMI :: QuasiQuoter edMI = edMultilineInsensitive -- | @[edBS| ... \/\/\/ ... |]@ is a shorthand for @[edBlockSensitive| ... \/\/\/ ... |]@ edBS :: QuasiQuoter edBS = edBlockSensitive -- | @[edBI| ... \/\/\/ ... |]@ is a shorthand for @[edBlockInsensitive| ... \/\/\/ ... |]@ edBI :: QuasiQuoter edBI = edBlockInsensitive -- | @[ed_| ... \/\/\/ ... |]@ compiles a 'SearchReplace' template to produce a function that -- takes the RE options (e.g., a 'SimpleREOptions' value) and yields the -- 'SearchReplace' template compiled with those options. For example, -- -- @s *=~/ [ed_|${hex}([0-9a-f]+)\/\/\/0x${hex}|] MultilineInsensitive@ -- -- prefixes the hexadecimal digit strings in s with @0x@, allowing for -- upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[edMultilineInsensitive|[0-9a-f]+|]@). ed_ :: QuasiQuoter ed_ = ed' fn_cast Nothing sr_cast :: Q Exp sr_cast = [|\x -> x :: SearchReplace RE T.Text|] fn_cast :: Q Exp fn_cast = [|\f x -> f x :: SearchReplace RE T.Text|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFA/Text/Lazy.hs0000644000000000000000000000641314254065176022275 0ustar0000000000000000{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFA.Text.Lazy ( ed , edMultilineSensitive , edMultilineInsensitive , edBlockSensitive , edBlockInsensitive , edMS , edMI , edBS , edBI , ed_ ) where import qualified Data.Text.Lazy as TL import Language.Haskell.TH import Language.Haskell.TH.Quote import Text.RE.REOptions import Text.RE.Tools.IsRegex import Text.RE.ZeInternals.SearchReplace.TDFAEdPrime import Text.RE.ZeInternals.TDFA -- | @[ed| ... \/\/\/ ... |]@, is equivalent to @[edMultilineSensitive| ... \/\/\/ ... |]@, -- compiling a case-sensitive, multi-line 'SearchReplace' ed :: QuasiQuoter ed = ed' sr_cast $ Just minBound -- | @[edMultilineSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, multi-line 'SearchReplace' template edMultilineSensitive :: QuasiQuoter edMultilineSensitive = ed' sr_cast $ Just MultilineSensitive -- | @[edMultilineInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, multi-line 'SearchReplace' template edMultilineInsensitive :: QuasiQuoter edMultilineInsensitive = ed' sr_cast $ Just MultilineInsensitive -- | @[edBlockSensitive| ... \/\/\/ ... |]@ compiles a case-sensitive, non-multi-line 'SearchReplace' template edBlockSensitive :: QuasiQuoter edBlockSensitive = ed' sr_cast $ Just BlockSensitive -- | @[edBlockInsensitive| ... \/\/\/ ... |]@ compiles a case-insensitive, non-multi-line 'SearchReplace' template edBlockInsensitive :: QuasiQuoter edBlockInsensitive = ed' sr_cast $ Just BlockInsensitive -- | @[edMS| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineSensitive| ... \/\/\/ ... |]@ edMS :: QuasiQuoter edMS = edMultilineSensitive -- | @[edMI| ... \/\/\/ ... |]@ is a shorthand for @[edMultilineInsensitive| ... \/\/\/ ... |]@ edMI :: QuasiQuoter edMI = edMultilineInsensitive -- | @[edBS| ... \/\/\/ ... |]@ is a shorthand for @[edBlockSensitive| ... \/\/\/ ... |]@ edBS :: QuasiQuoter edBS = edBlockSensitive -- | @[edBI| ... \/\/\/ ... |]@ is a shorthand for @[edBlockInsensitive| ... \/\/\/ ... |]@ edBI :: QuasiQuoter edBI = edBlockInsensitive -- | @[ed_| ... \/\/\/ ... |]@ compiles a 'SearchReplace' template to produce a function that -- takes the RE options (e.g., a 'SimpleREOptions' value) and yields the -- 'SearchReplace' template compiled with those options. For example, -- -- @s *=~/ [ed_|${hex}([0-9a-f]+)\/\/\/0x${hex}|] MultilineInsensitive@ -- -- prefixes the hexadecimal digit strings in s with @0x@, allowing for -- upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[edMultilineInsensitive|[0-9a-f]+|]@). ed_ :: QuasiQuoter ed_ = ed' fn_cast Nothing sr_cast :: Q Exp sr_cast = [|\x -> x :: SearchReplace RE TL.Text|] fn_cast :: Q Exp fn_cast = [|\f x -> f x :: SearchReplace RE TL.Text|] regex-1.1.0.2/Text/RE/ZeInternals/SearchReplace/TDFAEdPrime.hs0000644000000000000000000000405414254065176021677 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif module Text.RE.ZeInternals.SearchReplace.TDFAEdPrime ( ed' ) where import Language.Haskell.TH import Language.Haskell.TH.Quote import Prelude.Compat import Text.RE.REOptions import Text.RE.Replace import Text.RE.Tools.IsRegex import Text.RE.ZeInternals import Text.RE.ZeInternals.TDFA import Text.RE.ZeInternals.Types.Poss -- | construct a quasi quoter from a casting function and @Just sro@ -- if the options are known, otherwise a function take takes the -- 'SimpleREOptions' and constructs the 'SearchReplace' template ed' :: Q Exp -> Maybe SimpleREOptions -> QuasiQuoter ed' qe mb = case mb of Nothing -> (qq0 "ed'") { quoteExp = parse minBound $ \rs -> AppE <$> qe <*> [|flip unsafe_compile_sr rs|] } Just sro -> (qq0 "ed'") { quoteExp = parse sro $ \rs -> AppE <$> qe <*> [|unsafe_compile_sr_simple sro rs|] } where parse :: SimpleREOptions -> (String->Q Exp) -> String -> Q Exp parse sro mk ts = either error (\_->mk ts) ei where ei :: Either String (SearchReplace RE String) ei = poss2either $ compileSearchReplace_ id (poss2either . compileRegexWith sro) ts unsafe_compile_sr_simple :: IsRegex RE s => SimpleREOptions -> String -> SearchReplace RE s unsafe_compile_sr_simple sro = unsafe_compile_sr $ unpackSimpleREOptions sro unsafe_compile_sr :: (IsOption o, IsRegex RE s) => o -> String -> SearchReplace RE s unsafe_compile_sr os = unsafeCompileSearchReplace_ packR $ poss2either . compileRegexWithOptionsForQQ os regex-1.1.0.2/Text/RE/ZeInternals/TDFA.hs0000644000000000000000000003767414254065176015746 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# LANGUAGE TemplateHaskellQuotes #-} #else {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} #endif {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} module Text.RE.ZeInternals.TDFA ( -- * About -- $about -- * RE Type RE , regexType , reOptions , reSource , reCaptureNames , reRegex -- * IsOptions Class and REOptions Type , IsOption(..) , REOptions , defaultREOptions , noPreludeREOptions , unpackSimpleREOptions -- * Compiling Regular Expressions , compileRegex , compileRegexWith , compileRegexWithOptions , compileRegexWithOptionsForQQ -- * Compiling Search-Replace Templates , compileSearchReplace , compileSearchReplaceWith , compileSearchReplaceWithOptions -- * Escaping String , escape , escapeWith , escapeWithOptions , escapeREString -- * Macros Standard Environment , prelude , preludeEnv , preludeTestsFailing , preludeTable , preludeSummary , preludeSources , preludeSource -- * The Quasi Quoters , re , reMS , reMI , reBS , reBI , reMultilineSensitive , reMultilineInsensitive , reBlockSensitive , reBlockInsensitive , re_ , cp ) where import Control.Monad.Fail import Data.Functor.Identity import Language.Haskell.TH import Language.Haskell.TH.Quote import Prelude.Compat hiding (fail) import Text.RE.REOptions import Text.RE.Replace import Text.RE.TestBench import Text.RE.Tools import Text.RE.ZeInternals import Text.RE.ZeInternals.Types.Poss import Text.Regex.TDFA -- | the RE type for this back end representing a well-formed, compiled -- RE data RE = RE { _re_options :: !REOptions , _re_source :: !String , _re_cnames :: !CaptureNames , _re_regex :: !Regex } -- | some functions in the "Text.RE.TestBench" need the back end to -- be passed dynamically as a 'RegexType' parameters: use 'regexType' -- for this regexType :: RegexType regexType = mkTDFA $ \txt env md -> txt =~ mdRegexSource regexType ExclCaptures env md -- | extract the 'REOptions' from the @RE@ reOptions :: RE -> REOptions reOptions = _re_options -- | extract the RE source string from the @RE@ reSource :: RE -> String reSource = _re_source -- | extract the 'CaptureNames' from the @RE@ reCaptureNames :: RE -> CaptureNames reCaptureNames = _re_cnames -- | extract the back end compiled 'Regex' type from the @RE@ reRegex :: RE -> Regex reRegex = _re_regex ------------------------------------------------------------------------ -- IsOption & REOptions ------------------------------------------------------------------------ -- | a number of types can be used to encode 'REOptions_', each of which -- is made a member of this class class IsOption o where -- | convert the @o@ type into an @REOptions@ makeREOptions :: o -> REOptions -- | and the REOptions for this back end (see "Text.RE.REOptions" -- for details) type REOptions = REOptions_ RE CompOption ExecOption instance IsOption SimpleREOptions where makeREOptions = unpackSimpleREOptions instance IsOption (Macros RE) where makeREOptions ms = REOptions ms def_comp_option def_exec_option instance IsOption CompOption where makeREOptions co = REOptions prelude co def_exec_option instance IsOption ExecOption where makeREOptions eo = REOptions prelude def_comp_option eo instance IsOption REOptions where makeREOptions = id instance IsOption () where makeREOptions _ = unpackSimpleREOptions minBound -- | the default 'REOptions' defaultREOptions :: REOptions defaultREOptions = makeREOptions (minBound::SimpleREOptions) -- | the default 'REOptions' but with no RE macros defined noPreludeREOptions :: REOptions noPreludeREOptions = defaultREOptions { optionsMacs = emptyMacros } -- | convert a universal 'SimpleReOptions' into the 'REOptions' used -- by this back end unpackSimpleREOptions :: SimpleREOptions -> REOptions unpackSimpleREOptions sro = REOptions { optionsMacs = prelude , optionsComp = comp , optionsExec = defaultExecOpt } where comp = defaultCompOpt { caseSensitive = cs , multiline = ml } (ml,cs) = case sro of MultilineSensitive -> (,) True True MultilineInsensitive -> (,) True False BlockSensitive -> (,) False True BlockInsensitive -> (,) False False ------------------------------------------------------------------------ -- Compiling Regular Expressions ------------------------------------------------------------------------ -- | compile a 'String' into a 'RE' with the default options, -- generating an error if the RE is not well formed compileRegex :: (Functor m,Monad m,MonadFail m) => String -> m RE compileRegex = compileRegexWith minBound -- | compile a 'String' into a 'RE' using the given @SimpleREOptions@, -- generating an error if the RE is not well formed compileRegexWith :: (Functor m,Monad m,MonadFail m) => SimpleREOptions -> String -> m RE compileRegexWith = compileRegexWithOptions -- | compile a 'String' into a 'RE' using the given @SimpleREOptions@, -- generating an error if the RE is not well formed compileRegexWithOptions :: (IsOption o, Functor m, Monad m, MonadFail m) => o -> String -> m RE compileRegexWithOptions = compileRegex_ RPM_raw . makeREOptions -- | compile a 'String' into a 'RE' for q quasi quoter, using the given -- @SimpleREOptions@, generating an error if the RE is not well formed compileRegexWithOptionsForQQ :: (IsOption o, Functor m, Monad m,MonadFail m) => o -> String -> m RE compileRegexWithOptionsForQQ = compileRegex_ RPM_qq . makeREOptions ------------------------------------------------------------------------ -- Compiling Search Replace Templates ------------------------------------------------------------------------ -- | compile a SearchReplace template generating errors if the RE or -- the template are not well formed, all capture references being checked compileSearchReplace :: (Monad m,MonadFail m,Functor m,IsRegex RE s) => String -> String -> m (SearchReplace RE s) compileSearchReplace = compileSearchReplaceWith minBound -- | compile a SearchReplace template, with simple options, generating -- errors if the RE or the template are not well formed, all capture -- references being checked compileSearchReplaceWith :: (Monad m,MonadFail m,Functor m,IsRegex RE s) => SimpleREOptions -> String -> String -> m (SearchReplace RE s) compileSearchReplaceWith sro = compileSearchAndReplace_ packR $ poss2either . compileRegexWith sro -- | compile a SearchReplace template, with general options, generating -- errors if the RE or the template are not well formed, all capture -- references being checked compileSearchReplaceWithOptions :: (Monad m,MonadFail m,Functor m,IsRegex RE s) => REOptions -> String -> String -> m (SearchReplace RE s) compileSearchReplaceWithOptions os = compileSearchAndReplace_ packR $ poss2either . compileRegexWithOptions os ------------------------------------------------------------------------ -- Escaping Strings ------------------------------------------------------------------------ -- | convert a string into a RE that matches that string, and apply it -- to an argument continuation function to make up the RE string to be -- compiled; e.g., to compile a RE that will only match the string: -- -- @maybe undefined id . escape ((\"^\"++) . (++\"$\"))@ -- escape :: (Functor m,Monad m,MonadFail m) => (String->String) -> String -> m RE escape = escapeWith minBound -- | a variant of 'escape' where the 'SimpleREOptions' are specified escapeWith :: (Functor m,Monad m,MonadFail m) => SimpleREOptions -> (String->String) -> String -> m RE escapeWith = escapeWithOptions -- | a variant of 'escapeWith' that allows an 'IsOption' RE option -- to be specified escapeWithOptions :: ( IsOption o, Functor m, Monad m,MonadFail m) => o -> (String->String) -> String -> m RE escapeWithOptions o f = compileRegexWithOptions o . f . escapeREString ------------------------------------------------------------------------ -- Macro Standard Environment ------------------------------------------------------------------------ -- | the standard table of 'Macros' used to compile REs (which can be -- extended or replace: see "Text.RE.TestBench") prelude :: Macros RE prelude = runIdentity $ preludeMacros mk regexType ExclCaptures where mk = Identity . unsafeCompileRegex_ RPM_raw noPreludeREOptions -- | the standard 'MacroEnv' for this back end (see "Text.RE.TestBench") preludeEnv :: MacroEnv preludeEnv = preludeMacroEnv regexType -- | the macros in the standard environment that are failing their tests -- (checked by the test suite to be empty) preludeTestsFailing :: [MacroID] preludeTestsFailing = badMacros $ preludeMacroEnv regexType -- | a table the standard macros in markdown format preludeTable :: String preludeTable = preludeMacroTable regexType -- | a summary of the macros in the standard environment for this back -- end in plain text preludeSummary :: PreludeMacro -> String preludeSummary = preludeMacroSummary regexType -- | a listing of the RE text for each macro in the standard environment -- with all macros expanded to normal form preludeSources :: String preludeSources = preludeMacroSources regexType -- | the prelude source of a given macro in the standard environment preludeSource :: PreludeMacro -> String preludeSource = preludeMacroSource regexType ------------------------------------------------------------------------ -- Quasi Quoters ------------------------------------------------------------------------ -- | @[re| ... |]@, is equivalent to @[reMultilineSensitive| ... |]@, -- compiling a case-sensitive, multi-line RE re :: QuasiQuoter re = re' $ Just minBound -- | @[reMultilineSensitive| ... |]@, compiles a case-sensitive, multi-line RE reMultilineSensitive :: QuasiQuoter reMultilineSensitive = re' $ Just MultilineSensitive -- | @[reMultilineInsensitive| ... |]@, compiles a case-insensitive, multi-line RE reMultilineInsensitive :: QuasiQuoter reMultilineInsensitive = re' $ Just MultilineInsensitive -- | @[reMultilineInsensitive| ... |]@, compiles a case-sensitive, non-multi-line RE reBlockSensitive :: QuasiQuoter reBlockSensitive = re' $ Just BlockSensitive -- | @[reMultilineInsensitive| ... |]@, compiles a case-insensitive, non-multi-line RE reBlockInsensitive :: QuasiQuoter reBlockInsensitive = re' $ Just BlockInsensitive -- | @[reMS| ... |]@ is a shorthand for @[reMultilineSensitive| ... |]@ reMS :: QuasiQuoter reMS = reMultilineSensitive -- | @[reMI| ... |]@ is a shorthand for @[reMultilineInsensitive| ... |]@ reMI :: QuasiQuoter reMI = reMultilineInsensitive -- | @[reBS| ... |]@ is a shorthand for @[reBlockSensitive| ... |]@ reBS :: QuasiQuoter reBS = reBlockSensitive -- | @[reBI| ... |]@ is a shorthand for @[reBlockInsensitive| ... |]@ reBI :: QuasiQuoter reBI = reBlockInsensitive -- | @[re_| ... |]@ compiles a RE to produce a function that takes -- the RE options (e.g., a 'SimpleREOptions' value) and yields the -- RE compiled with those options. For example, -- -- @countMatches $ s *=~ [re_|[0-9a-f]+|] MultilineInsensitive@ -- -- counts the number of hexadecimal digit strings in 's', allowing -- for upper- or lower-case hex digits (which is entirely equivalent -- in this example to just using @[reMultilineInsensitive|[0-9a-f]+|]@). re_ :: QuasiQuoter re_ = re' Nothing ------------------------------------------------------------------------ -- re Helpers ------------------------------------------------------------------------ re' :: Maybe SimpleREOptions -> QuasiQuoter re' mb = case mb of Nothing -> (qq0 "re'") { quoteExp = parse minBound (\rs->[|flip unsafeCompileRegex rs|]) } Just sro -> (qq0 "re'") { quoteExp = parse sro (\rs->[|unsafeCompileRegexSimple sro rs|]) } where parse :: SimpleREOptions -> (String->Q Exp) -> String -> Q Exp parse sro mk rs = poss error (\_->mk rs) $ compileRegex_ RPM_qq os rs where os = unpackSimpleREOptions sro data RegexParseMode = RPM_qq | RPM_raw deriving (Eq,Show) unsafeCompileRegexSimple :: SimpleREOptions -> String -> RE unsafeCompileRegexSimple sro re_s = unsafeCompileRegex_ RPM_qq os re_s where os = unpackSimpleREOptions sro unsafeCompileRegex :: IsOption o => o -> String -> RE unsafeCompileRegex = unsafeCompileRegex_ RPM_qq . makeREOptions unsafeCompileRegex_ :: RegexParseMode -> REOptions -> String -> RE unsafeCompileRegex_ rpm os = poss oops id . compileRegex_ rpm os where oops = error . ("unsafeCompileRegex: " ++) compileRegex_ :: (Functor m,MonadFail m,Monad m) => RegexParseMode -> REOptions -> String -> m RE compileRegex_ rpm os re_s = uncurry mk <$> compileRegex' rpm os re_s where mk cnms rx = RE { _re_options = os , _re_source = re_s , _re_cnames = cnms , _re_regex = rx } compileRegex' :: (Functor m,MonadFail m,Monad m) => RegexParseMode -> REOptions -> String -> m (CaptureNames,Regex) compileRegex' rpm REOptions{..} s0 = do ((_,cnms),s2) <- either fail return $ extractNamedCaptures s1 (,) cnms <$> makeRegexOptsM optionsComp optionsExec s2 where s1 = expandMacros reSource optionsMacs $ pp s0 pp = case rpm of RPM_qq -> qq_prep RPM_raw -> id ------------------------------------------------------------------------ -- Preprocessing Literal REs ------------------------------------------------------------------------ qq_prep :: String -> String qq_prep s0 = case s0 of "" -> "" c:s -> case c of '\\' -> backslash s _ -> c : qq_prep s where backslash s1 = case s1 of "" -> "\\" c:s -> case c of 'a' -> '\a' : qq_prep s 'b' -> '\b' : qq_prep s 'f' -> '\f' : qq_prep s 'n' -> '\n' : qq_prep s 'r' -> '\r' : qq_prep s 't' -> '\t' : qq_prep s 'v' -> '\v' : qq_prep s _ -> '\\': c : qq_prep s ------------------------------------------------------------------------ -- Options Helpers ------------------------------------------------------------------------ def_comp_option :: CompOption def_comp_option = optionsComp defaultREOptions def_exec_option :: ExecOption def_exec_option = optionsExec defaultREOptions ------------------------------------------------------------------------ -- Haddock Sections ------------------------------------------------------------------------ -- $about -- -- This module provides the regex PCRE back end. Most of the functions that -- you will need for day to day use are provided by the primary API modules -- (e.g., "Text.RE.TDFA.Text"). regex-1.1.0.2/Text/RE/ZeInternals/TestBench.lhs0000644000000000000000000003727214254065176017255 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif module Text.RE.ZeInternals.TestBench ( MacroID(..) , RegexType , mkTDFA , mkPCRE , isTDFA , isPCRE , presentRegexType , MacroEnv , WithCaptures(..) , MacroDescriptor(..) , TestResult(..) , RegexSource(..) , FunctionID(..) , mkMacros , testMacroEnv , badMacros , runTests , runTests' , formatMacroTable , formatMacroSummary , formatMacroSources , formatMacroSource , testMacroDescriptors , mdRegexSource ) where import Data.Array import qualified Data.HashMap.Lazy as HML import qualified Data.List as L import Data.Maybe import Data.Ord import Data.String import Prelude.Compat import Text.Printf import Text.RE.REOptions import Text.RE.ZeInternals.Replace import Text.RE.ZeInternals.Types.Capture import Text.RE.ZeInternals.Types.Match import Text.RE.ZeInternals.Types.Matches \end{code} Types ----- \begin{code} type TestBenchMatcher = String -> MacroEnv -> MacroDescriptor -> Matches String -- | what flavour of regex are we dealing with data RegexType = TDFA TestBenchMatcher | PCRE TestBenchMatcher -- | test RegexType for TDFA/PCREness isTDFA, isPCRE :: RegexType -> Bool isTDFA (TDFA _) = True isTDFA (PCRE _) = False isPCRE (TDFA _) = False isPCRE (PCRE _) = True mkTDFA, mkPCRE :: TestBenchMatcher -> RegexType mkTDFA = TDFA mkPCRE = PCRE presentRegexType :: RegexType -> String presentRegexType (TDFA _) = "TDFA" presentRegexType (PCRE _) = "PCRE" instance Show RegexType where show (TDFA _) = "TDFA " show (PCRE _) = "PCRE " -- | do we need the captures in the RE or would they be stripped out -- where possible data WithCaptures = InclCaptures -- ^ include all captures | ExclCaptures -- ^ remove captures where possible deriving (Eq,Ord,Show) -- | each macro can reference others, the whole environment being -- required for each macro, so we use a Lazy HashMap type MacroEnv = HML.HashMap MacroID MacroDescriptor -- | describes a macro, giving the text of the RE and a summary -- description data MacroDescriptor = MacroDescriptor { macroSource :: !RegexSource -- ^ the RE , macroSamples :: ![String] -- ^ some sample matches , macroCounterSamples :: ![String] -- ^ some sample non-matches , macroTestResults :: ![TestResult] -- ^ validation test results , macroParser :: !(Maybe FunctionID) -- ^ WA, the parser function , macroDescription :: !String -- ^ summary comment } deriving (Show) -- | list of failures on a validation run newtype TestResult = TestResult { _TestResult :: String } deriving (IsString,Show) -- | a RE that should work for POSIX and PCRE with open brackets ('(') -- represented as follows: -- \( mere symbol -- (?: used for grouping only, not for captures -- (}: used for captures only, not for grouping -- (]: used for captures and grouping -- ( do not modify newtype RegexSource = RegexSource { _RegexSource :: String } deriving (IsString,Show) -- | name of the Haskell parser function for parsing the text matched -- by a macro newtype FunctionID = FunctionID { _FunctionID :: String } deriving (IsString,Show) -- | we are only interested in the open parentheses used for -- grouping and/or capturing; if neither grouping or capturing then -- there is no initial '(' or '(?:', just the suffic text data REToken = REToken { _ret_prefix :: String -- ^ following text optional ( or (?: , _ret_fixed :: Bool -- ^ a '(' that is not safe to modify , _ret_grouping :: Bool -- ^ is this a grouping group , _ret_capturing :: Bool -- ^ is this a capturing group } deriving (Show) \end{code} mkMacros -------- \begin{code} -- | construct a macro table suitable for use with the RE compilers mkMacros :: (Monad m,Functor m) => (String->m r) -> RegexType -> WithCaptures -> MacroEnv -> m (Macros r) mkMacros prs rty wc env = HML.fromList <$> mapM (uncurry mk) (HML.toList env) where mk mid md = (,) mid <$> prs (mdRegexSource rty wc env md) \end{code} testMacroEnv, badMacros ----------------------- \begin{code} -- | test that a MacroEnv is passing all of its built-in tests testMacroEnv :: String -> RegexType -> MacroEnv -> IO Bool testMacroEnv lab rty m_env = case badMacros m_env of [] -> return True fails -> do putStrLn $ lab' ++ " has failing tests for these macros: " putStr $ unlines $ [ " "++getMacroID mid | mid<-fails ] putStrLn $ "The whole table:" putStrLn $ "========================================================" putStr $ formatMacroTable rty m_env putStrLn $ "========================================================" return False where lab' = lab ++ " [" ++ presentRegexType rty ++"]" badMacros :: MacroEnv -> [MacroID] badMacros m_env = [ mid | (mid,MacroDescriptor{..}) <- HML.toList m_env , not $ null macroTestResults ] runTests :: (Eq a,Show a) => RegexType -> (String->Maybe a) -> [(String,a)] -> MacroEnv -> MacroID -> MacroDescriptor -> MacroDescriptor runTests rty parser = runTests' rty parser' where parser' caps = fmap capturedText (matchCapture caps) >>= parser runTests' :: (Eq a,Show a) => RegexType -> (Match String->Maybe a) -> [(String,a)] -> MacroEnv -> MacroID -> MacroDescriptor -> MacroDescriptor runTests' rty parser vector env mid md@MacroDescriptor{..} = md { macroTestResults = test_results } where test_results = concat [ concat $ map test vector , concat $ map test_neg macroCounterSamples ] test (src,x) = test' mid rty parser x $ match_ src env md test_neg src = test_neg' mid rty parser $ match_ src env md match_ = case rty of TDFA tbmf -> tbmf PCRE tbmf -> tbmf \end{code} formatMacroTable, formatMacroSummary, formatMacroSources, formatMacroSource --------------------------------------------------------------------------- \begin{code} -- | format a macros table as a markdown table formatMacroTable :: RegexType -> MacroEnv -> String formatMacroTable rty env = unlines $ format_table macro_table_hdr [ macro_table_row rty mid md | (mid,md) <- L.sortBy (comparing fst) $ HML.toList env ] \end{code} \begin{code} -- | generate a plain text summary of a macro formatMacroSummary :: RegexType -> MacroEnv -> MacroID -> String formatMacroSummary rty env mid = maybe oops prep $ HML.lookup mid env where prep :: MacroDescriptor -> String prep md = unlines $ concat $ map (fmt md) [minBound..maxBound] fmt :: MacroDescriptor -> Col -> [String] fmt md c = [ printf "%-15s : %s" (present_col c) ini ] ++ map (" "++) lns where (ini,lns) = case macro_attribute rty mid md c of [] -> (,) "" [] [ln] -> (,) ln [] lns_ -> (,) "" lns_ oops = error $ getMacroID mid ++ ": macro not defined in this environment" \end{code} \begin{code} -- | list the source REs for each macro in plain text formatMacroSources :: RegexType -> WithCaptures -> MacroEnv -> String formatMacroSources rty wc env = unlines $ [ printf "%-20s : %s" (getMacroID mid) $ formatMacroSource rty wc env mid | mid <- L.sort $ HML.keys env ] \end{code} \begin{code} -- | list the source of a single macro in plain text formatMacroSource :: RegexType -> WithCaptures -> MacroEnv -> MacroID -> String formatMacroSource rty wc env mid = mdRegexSource rty wc env $ fromMaybe oops $ HML.lookup mid env where oops = error $ "formatMacroSource: not found: " ++ getMacroID mid \end{code} testMacroDescriptors, regexSource --------------------------------- \begin{code} testMacroDescriptors :: [MacroDescriptor] -> [TestResult] testMacroDescriptors = concat . map macroTestResults regexSource :: RegexType -> WithCaptures -> RegexSource -> String regexSource rty wc = format_tokens rty wc . scan_re \end{code} Formatting helpers ------------------ \begin{code} type TableRow = Array Col [String] data Col = C_name | C_caps | C_regex | C_examples | C_anti_examples | C_fails | C_parser | C_comment deriving (Ix,Bounded,Enum,Ord,Eq,Show) present_col :: Col -> String present_col = map tr . drop 2 . show where tr '_' = '-' tr c = c macro_table_hdr :: TableRow macro_table_hdr = listArray (minBound,maxBound) [ [present_col c] | c<-[minBound..maxBound] ] macro_table_row :: RegexType -> MacroID -> MacroDescriptor -> TableRow macro_table_row rty mid md = listArray (minBound,maxBound) $ map (macro_attribute rty mid md) [minBound..maxBound] macro_attribute :: RegexType -> MacroID -> MacroDescriptor -> Col -> [String] macro_attribute rty mid MacroDescriptor{..} c = case c of C_name -> [getMacroID mid] C_caps -> [show $ min_captures rty $ scan_re macroSource] C_regex -> [regexSource rty ExclCaptures macroSource] C_examples -> macroSamples C_anti_examples -> macroCounterSamples C_fails -> map _TestResult macroTestResults C_parser -> [maybe "-" _FunctionID macroParser] C_comment -> [macroDescription] format_table :: TableRow -> [TableRow] -> [String] format_table hdr rows0 = concat [ format_row cws hdr' , format_row cws dsh , concat $ map (format_row cws) rows ] where dsh = listArray (minBound,maxBound) [ [replicate n '-'] | n<-elems cws ] hdr' = hdr // [(,) C_regex $ [take n $ concat $ repeat "regex="] ] where n = min 29 $ cws!C_regex cws = widths $ hdr : rows rows = map wrap_row rows0 field_width :: Int field_width = 40 wrap_row :: TableRow -> TableRow wrap_row = fmap $ concat . map f where f, g :: String -> [String] f cts = (ini ++ ['\\' | not (null rst)]) : g rst where (ini,rst) = splitAt (1+field_width) cts g "" = [] g cts = ('\\' : ini ++ ['\\' | not (null rst)]) : g rst where (ini,rst) = splitAt field_width cts widths :: [TableRow] -> Array Col Int widths rows = listArray (minBound,maxBound) [ maximum $ concat [ map length $ row!c | row<-rows ] | c<-[minBound..maxBound] ] format_row :: Array Col Int -> TableRow -> [String] format_row cw_arr row = [ ("|"++) $ L.intercalate "|" [ field cw_arr row c i | c<-[minBound..maxBound] ] | i <- [0..depth-1] ] where depth = maximum [ length $ row!c | c<-[minBound..maxBound] ] field :: Array Col Int -> TableRow -> Col -> Int -> String field cws row c i = ljust (cws!c) $ sel i $ row!c sel :: Int -> [String] -> String sel i ss = case drop i ss of [] -> "" s:_ -> s ljust :: Int -> String -> String ljust w s = s ++ replicate n ' ' where n = max 0 $ w - length s min_captures :: RegexType -> [REToken] -> Int min_captures rty rets = length [ () | REToken{..}<-rets , _ret_fixed || (_ret_grouping && isTDFA rty) ] \end{code} Formatting tokens ----------------- \begin{code} format_tokens :: RegexType -> WithCaptures -> [REToken] -> String format_tokens rty wc = foldr f "" where f REToken{..} rst = _ret_prefix ++ bra ++ xket rst where bra = case _ret_fixed of True -> "(" False -> case (,) _ret_grouping (_ret_capturing && wc==InclCaptures) of (False,False) -> "" (True ,False) -> if isPCRE rty then "(?:" else "(" (False,True ) -> "(" (True ,True ) -> "(" xket = case not _ret_grouping && _ret_capturing && wc==ExclCaptures of True -> delete_ket 0 False -> id delete_ket :: Int -> String -> String delete_ket _ "" = error "delete_ket: end of input" delete_ket n (c:t) = case c of '\\' -> case t of "" -> error "delete_ket: end of input" c':t' -> c : c' : delete_ket n t' ')' -> case n of 0 -> t _ -> c : delete_ket (n-1) t '(' -> c : delete_ket (n+1) t _ -> c : delete_ket n t \end{code} scan_re ------- \begin{code} scan_re :: RegexSource -> [REToken] scan_re (RegexSource src0) = loop src0 where loop "" = [] loop src = case rst of '\\':t -> case t of "" -> REToken (ini++['\\']) False False False : [] c':t' -> REToken (ini++['\\',c']) False False False : loop t' '(' :t -> case t of c:':':t' | c=='?' -> REToken ini False True False : loop t' | c=='}' -> REToken ini False False True : loop t' | c==']' -> REToken ini False True True : loop t' _ -> REToken ini True True True : loop t _ -> [REToken src False False False] where (ini,rst) = break chk src chk '\\' = True chk '(' = True chk _ = False \end{code} mdRegexSource ------------- \begin{code} mdRegexSource :: RegexType -> WithCaptures -> MacroEnv -> MacroDescriptor -> String mdRegexSource rty wc env md = expandMacros' lu $ regexSource rty wc $ macroSource md where lu = fmap (regexSource rty wc . macroSource) . flip HML.lookup env \end{code} test', test_neg' ---------------- \begin{code} test' :: (Eq a,Show a) => MacroID -> RegexType -> (Match String->Maybe a) -> a -> Matches String -> [TestResult] test' mid rty prs x Matches{..} = either (:[]) (const []) $ do cs <- case allMatches of [cs] -> return cs _ -> oops "RE failed to parse" mtx <- case matchCapture cs of Nothing -> oops $ "RE parse failure: " ++ show cs Just c -> return $ capturedText c case mtx == matchesSource of True -> return () False -> oops "RE failed to match the whole text" x' <- case prs cs of Nothing -> oops "matched text failed to parse" Just x' -> return x' case x'==x of True -> return () False -> oops "parser failed to yield the expected result" where oops = Left . test_diagnostic mid False rty matchesSource test_neg' :: MacroID -> RegexType -> (Match String->Maybe a) -> Matches String -> [TestResult] test_neg' mid rty prs Matches{..} = either id (const []) $ do case allMatches of [] -> return () cz -> case ms of [] -> return () _ -> Left [oops "RE parse succeeded"] where ms = [ () | cs <- cz , Just c <- [matchCapture cs] , let t = capturedText c , t == matchesSource , isJust $ prs cs ] where oops = test_diagnostic mid True rty matchesSource test_diagnostic :: MacroID -> Bool -> RegexType -> String -> String -> TestResult test_diagnostic mid is_neg rty tst msg = TestResult $ printf "%-20s [%s %s] : %s (%s)" mid_s neg_s rty_s msg tst where mid_s = getMacroID mid neg_s = if is_neg then "-ve" else "+ve" :: String rty_s = presentRegexType rty \end{code} regex-1.1.0.2/Text/RE/ZeInternals/TestBench/Parsers.hs0000644000000000000000000001211714254065176020507 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} module Text.RE.ZeInternals.TestBench.Parsers ( -- * The Test Bench MacroEnv , MacroDescriptor(..) , RegexSource(..) , WithCaptures(..) , RegexType , isTDFA , isPCRE , presentRegexType -- ** Constructing a MacrosEnv , mkMacros -- ** Formatting Macros , formatMacroTable , formatMacroSummary , formatMacroSources , formatMacroSource -- ** Formatting Macros , testMacroEnv , runTests , runTests' -- * Parsing , parseInteger , parseHex , parseDouble , parseString , parseSimpleString , parseDate , parseSlashesDate , parseTimeOfDay , parseTimeZone , parseDateTime , parseDateTime8601 , parseDateTimeCLF , parseShortMonth , shortMonthArray , IPV4Address , parseIPv4Address , Severity(..) , parseSeverity , severityKeywords ) where import Data.Array import qualified Data.HashMap.Strict as HM import Data.Maybe import qualified Data.Text as T import Data.Time import qualified Data.Time.Locale.Compat as LC import Data.Word import Text.Printf import Text.RE.Replace import Text.RE.ZeInternals.TestBench import Text.Read parseInteger :: Replace a => a -> Maybe Int parseInteger = readMaybe . unpackR parseHex :: Replace a => a -> Maybe Int parseHex = readMaybe . ("0x"++) . unpackR parseDouble :: Replace a => a -> Maybe Double parseDouble = readMaybe . unpackR parseString :: Replace a => a -> Maybe T.Text parseString = readMaybe . unpackR parseSimpleString :: Replace a => a -> Maybe T.Text parseSimpleString = Just . T.dropEnd 1 . T.drop 1 . textifyR date_templates, time_templates, timezone_templates, date_time_8601_templates, date_time_templates :: [String] date_templates = ["%F"] time_templates = ["%H:%M:%S","%H:%M:%S%Q","%H:%M"] timezone_templates = ["Z","%z"] date_time_8601_templates = [ printf "%sT%s%s" dt tm tz | dt <- date_templates , tm <- time_templates , tz <- timezone_templates ] date_time_templates = [ printf "%s%c%s%s" dt sc tm tz | dt <- date_templates , sc <- ['T',' '] , tm <- time_templates , tz <- timezone_templates ++ [" UTC",""] ] parseDate :: Replace a => a -> Maybe Day parseDate = parse_time date_templates parseSlashesDate :: Replace a => a -> Maybe Day parseSlashesDate = parse_time ["%Y/%m/%d"] parseTimeOfDay :: Replace a => a -> Maybe TimeOfDay parseTimeOfDay = parse_time time_templates parseTimeZone :: Replace a => a -> Maybe TimeZone parseTimeZone = parse_time timezone_templates parseDateTime :: Replace a => a -> Maybe UTCTime parseDateTime = parse_time date_time_templates parseDateTime8601 :: Replace a => a -> Maybe UTCTime parseDateTime8601 = parse_time date_time_8601_templates parseDateTimeCLF :: Replace a => a -> Maybe UTCTime parseDateTimeCLF = parse_time ["%d/%b/%Y:%H:%M:%S %z"] parseShortMonth :: Replace a => a -> Maybe Int parseShortMonth = flip HM.lookup short_month_hm . unpackR parse_time :: (ParseTime t,Replace s) => [String] -> s -> Maybe t parse_time tpls = prs . unpackR where prs s = listToMaybe $ catMaybes [ parseTimeM True LC.defaultTimeLocale fmt s | fmt<-tpls ] #if !MIN_VERSION_time(1,5,0) parseTimeM _ = parseTime #endif short_month_hm :: HM.HashMap String Int short_month_hm = HM.fromList [ (T.unpack $ shortMonthArray!i,i) | i<-[1..12] ] shortMonthArray :: Array Int T.Text shortMonthArray = listArray (1,12) $ T.words "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec" type IPV4Address = (Word8,Word8,Word8,Word8) parseIPv4Address :: Replace a => a -> Maybe IPV4Address parseIPv4Address = prs . words_by (=='.') . unpackR where prs [a_s,b_s,c_s,d_s] = do a <- readMaybe a_s b <- readMaybe b_s c <- readMaybe c_s d <- readMaybe d_s case all is_o [a,b,c,d] of True -> Just (toEnum a,toEnum b,toEnum c,toEnum d) False -> Nothing prs _ = Nothing is_o x = 0 <= x && x <= 255 data Severity = Emerg | Alert | Crit | Err | Warning | Notice | Info | Debug deriving (Bounded,Enum,Ord,Eq,Show) parseSeverity :: Replace a => a -> Maybe Severity parseSeverity = flip HM.lookup severity_hm . textifyR severity_hm :: HM.HashMap T.Text Severity severity_hm = HM.fromList [ (kw,pri) | pri<-[minBound..maxBound] , let (kw0,kws) = severityKeywords pri , kw <- kw0:kws ] severityKeywords :: Severity -> (T.Text,[T.Text]) severityKeywords pri = case pri of Emerg -> (,) "emerg" ["panic"] Alert -> (,) "alert" [] Crit -> (,) "crit" [] Err -> (,) "err" ["error"] Warning -> (,) "warning" ["warn"] Notice -> (,) "notice" [] Info -> (,) "info" [] Debug -> (,) "debug" [] words_by :: (Char->Bool) -> String -> [String] words_by f s = case dropWhile f s of "" -> [] s' -> w : words_by f s'' where (w, s'') = break f s' regex-1.1.0.2/Text/RE/ZeInternals/Tools/Lex.lhs0000644000000000000000000000213514254065176017214 0ustar0000000000000000\begin{code} {-# LANGUAGE NoImplicitPrelude #-} module Text.RE.ZeInternals.Tools.Lex ( alex , alex' ) where import Prelude.Compat import Text.RE.Replace import Text.RE.ZeInternals.Types.IsRegex \end{code} \begin{code} -- | a simple regex-based scanner interpreter for prototyping -- scanners alex :: IsRegex re s => [(re,Match s->Maybe t)] -> t -> s -> [t] alex = alex' matchOnce -- | a higher order version of 'alex' parameterised over the @matchOnce@ -- function alex' :: Replace s => (re->s->Match s) -> [(re,Match s->Maybe t)] -> t -> s -> [t] alex' mo al t_err = loop where loop s = case lengthR s == 0 of True -> [] False -> choose al s choose [] _ = [t_err] choose ((re,f):al') s = case mb_p of Just (s',t) -> t : loop s' _ -> choose al' s where mb_p = do cap <- matchCapture mtch case captureOffset cap == 0 of True -> (,) (captureSuffix cap) <$> f mtch False -> Nothing mtch = mo re s \end{code} regex-1.1.0.2/Text/RE/ZeInternals/Types/Capture.lhs0000644000000000000000000000324414254065176020075 0ustar0000000000000000\begin{code} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE DeriveDataTypeable #-} \end{code} \begin{code} module Text.RE.ZeInternals.Types.Capture ( Capture(..) , hasCaptured , capturePrefix , captureSuffix ) where \end{code} \begin{code} import Text.Regex.Base \end{code} \begin{code} -- | the matching of a single sub-expression against part of the source -- text data Capture a = Capture { captureSource :: !a -- ^ the whole text that was searched , capturedText :: !a -- ^ the text that was matched , captureOffset :: !Int -- ^ the number of characters preceding the -- match with -1 used if no text was captured -- by the RE (not even the empty string) , captureLength :: !Int -- ^ the number of chacter in the captured -- sub-string } deriving (Show,Eq) \end{code} \begin{code} instance Functor Capture where fmap f c@Capture{..} = c { captureSource = f captureSource , capturedText = f capturedText } \end{code} \begin{code} -- | test if the capture has matched any text hasCaptured :: Capture a -> Bool hasCaptured = (>=0) . captureOffset -- | returns the text preceding the match capturePrefix :: Extract a => Capture a -> a capturePrefix Capture{..} = before captureOffset captureSource -- | returns the text after the match captureSuffix :: Extract a => Capture a -> a captureSuffix Capture{..} = after (captureOffset+captureLength) captureSource \end{code} regex-1.1.0.2/Text/RE/ZeInternals/Types/CaptureID.hs0000644000000000000000000000322614254065176020136 0ustar0000000000000000{-# LANGUAGE GeneralizedNewtypeDeriving #-} module Text.RE.ZeInternals.Types.CaptureID where import qualified Data.HashMap.Strict as HMS import Data.Hashable import Data.Ix import qualified Data.Text as T -- | CaptureID identifies captures, either by number -- (e.g., [cp|1|]) or name (e.g., [cp|foo|]). data CaptureID = IsCaptureOrdinal CaptureOrdinal -- [cp|3|] | IsCaptureName CaptureName -- [cp|y|] deriving (Show,Ord,Eq) -- | the dictionary for named captures stored in compiled regular -- expressions associates type CaptureNames = HMS.HashMap CaptureName CaptureOrdinal -- | an empty 'CaptureNames' dictionary noCaptureNames :: CaptureNames noCaptureNames = HMS.empty -- | a 'CaptureName' is just the text of the name newtype CaptureName = CaptureName { getCaptureName :: T.Text } deriving (Show,Ord,Eq) instance Hashable CaptureName where hashWithSalt i = hashWithSalt i . getCaptureName -- | a 'CaptureOrdinal' is just the number of the capture, starting -- with 0 for the whole of the text matched, then in leftmost, -- outermost newtype CaptureOrdinal = CaptureOrdinal { getCaptureOrdinal :: Int } deriving (Show,Ord,Eq,Enum,Ix,Num) -- | look up a 'CaptureID' in the 'CaptureNames' dictionary findCaptureID :: CaptureID -> CaptureNames -> Either String CaptureOrdinal findCaptureID (IsCaptureOrdinal o) _ = Right o findCaptureID (IsCaptureName n) hms = maybe oops Right $ HMS.lookup n hms where oops = Left $ unlines $ ("lookupCaptureID: " ++ T.unpack t ++ " not found in:") : [ " "++T.unpack (getCaptureName nm) | nm <- HMS.keys hms ] t = getCaptureName n regex-1.1.0.2/Text/RE/ZeInternals/Types/IsRegex.lhs0000644000000000000000000000632614254065176020044 0ustar0000000000000000\begin{code} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} #endif module Text.RE.ZeInternals.Types.IsRegex ( IsRegex(..) , SearchReplace(..) , searchReplaceAll , searchReplaceFirst ) where import Control.Monad.Fail import Text.RE.REOptions import Text.RE.Replace import Text.RE.ZeInternals.EscapeREString import Text.RE.ZeInternals.Types.SearchReplace \end{code} \begin{code} -- | the 'IsRegex' class allows polymorhic tools to be written that -- will work with a variety of regex back ends and text types class Replace s => IsRegex re s where -- | finding the first match matchOnce :: re -> s -> Match s -- | finding all matches matchMany :: re -> s -> Matches s -- | compiling an RE, failing if the RE is not well formed makeRegex :: (Functor m,Monad m, MonadFail m) => s -> m re -- | comiling an RE, specifying the 'SimpleREOptions' makeRegexWith :: (Functor m,Monad m, MonadFail m) => SimpleREOptions -> s -> m re -- | compiling a 'SearchReplace' template from the RE text and the template Text, failing if they are not well formed makeSearchReplace :: (Functor m,Monad m, MonadFail m,IsRegex re s) => s -> s -> m (SearchReplace re s) -- | compiling a 'SearchReplace' template specifying the 'SimpleREOptions' for the RE makeSearchReplaceWith :: (Functor m,Monad m, MonadFail m,IsRegex re s) => SimpleREOptions -> s -> s -> m (SearchReplace re s) -- | incorporate an escaped string into a compiled RE with the default options makeEscaped :: (Functor m,Monad m, MonadFail m) => (s->s) -> s -> m re -- | incorporate an escaped string into a compiled RE with the specified 'SimpleREOptions' makeEscapedWith :: (Functor m,Monad m, MonadFail m) => SimpleREOptions -> (s->s) -> s -> m re -- | extract the text of the RE from the RE regexSource :: re -> s makeRegex = makeRegexWith minBound makeSearchReplace = makeSearchReplaceWith minBound makeEscaped = makeEscapedWith minBound makeEscapedWith o f = makeRegexWith o . f . packR . escapeREString . unpackR \end{code} \begin{code} -- | search and replace all matches in the argument text; e.g., this function -- will convert every YYYY-MM-DD format date in its argument text into a -- DD\/MM\/YYYY date: -- -- @searchReplaceAll [ed|${y}([0-9]{4})-0*${m}([0-9]{2})-0*${d}([0-9]{2})\/\/\/${d}\/${m}\/${y}|]@ -- searchReplaceAll :: IsRegex re s => SearchReplace re s -> s -> s searchReplaceAll SearchReplace{..} = replaceAll getTemplate . matchMany getSearch -- | search and replace the first occurrence only (if any) in the input text -- e.g., to prefix the first string of four hex digits in the imput text, -- if any, with @0x@: -- -- @searchReplaceFirst [ed|[0-9A-Fa-f]{4}\/\/\/0x$0|]@ -- searchReplaceFirst :: IsRegex re s => SearchReplace re s -> s -> s searchReplaceFirst SearchReplace{..} = replace getTemplate . matchOnce getSearch \end{code} regex-1.1.0.2/Text/RE/ZeInternals/Types/LineNo.hs0000644000000000000000000000103214254065176017473 0ustar0000000000000000{-# LANGUAGE GeneralizedNewtypeDeriving #-} module Text.RE.ZeInternals.Types.LineNo where -- | our line numbers are of the proper zero-based kind newtype LineNo = ZeroBasedLineNo { getZeroBasedLineNo :: Int } deriving (Show,Enum) -- | the first line in a file firstLine :: LineNo firstLine = ZeroBasedLineNo 0 -- | extract a conventional 1-based line number getLineNo :: LineNo -> Int getLineNo = succ . getZeroBasedLineNo -- | inject a conventional 1-based line number lineNo :: Int -> LineNo lineNo = ZeroBasedLineNo . pred regex-1.1.0.2/Text/RE/ZeInternals/Types/Match.lhs0000644000000000000000000002234114254065176017525 0ustar0000000000000000\begin{code} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE MonoLocalBinds #-} \end{code} \begin{code} module Text.RE.ZeInternals.Types.Match ( Match(..) , noMatch , emptyMatchArray , matched , matchedText , matchCapture , matchCaptures , (!$$) , captureText , (!$$?) , captureTextMaybe , (!$) , capture , (!$?) , captureMaybe , RegexFix(..) , convertMatchText ) where \end{code} \begin{code} import Data.Array import Data.Bits import qualified Data.ByteString as BW import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as LBS import qualified Data.ByteString.UTF8 as B import Data.Maybe import qualified Data.Sequence as S import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as LT import Data.Typeable import Data.Word import Text.RE.ZeInternals.Types.Capture import Text.RE.ZeInternals.Types.CaptureID import Text.Regex.Base import qualified Text.Regex.PCRE as PCRE import qualified Text.Regex.TDFA as TDFA infixl 9 !$, !$$ \end{code} \begin{code} -- | the result of matching a RE to a text once (with @?=~@), retaining -- the text that was matched against data Match a = Match { matchSource :: !a -- ^ the whole source text , captureNames :: !CaptureNames -- ^ the RE's capture names , matchArray :: !(Array CaptureOrdinal (Capture a)) -- ^ 0..n-1 captures, -- starting with the -- text matched by the -- whole RE } deriving (Show,Eq,Typeable) \end{code} \begin{code} -- | Construct a Match that does not match anything. noMatch :: a -> Match a noMatch t = Match t noCaptureNames emptyMatchArray -- | an empty array of Capture emptyMatchArray :: Array CaptureOrdinal (Capture a) emptyMatchArray = listArray (CaptureOrdinal 0,CaptureOrdinal $ -1) [] \end{code} \begin{code} instance Functor Match where fmap f Match{..} = Match { matchSource = f matchSource , captureNames = captureNames , matchArray = fmap (fmap f) matchArray } \end{code} \begin{code} -- | tests whether the RE matched the source text at all matched :: Match a -> Bool matched = isJust . matchCapture -- | yields the text matched by the RE, Nothing if no match matchedText :: Match a -> Maybe a matchedText = fmap capturedText . matchCapture -- | the top-level capture if the source text matched the RE, -- Nothing otherwise matchCapture :: Match a -> Maybe (Capture a) matchCapture = fmap fst . matchCaptures -- | the main top-level capture (capture \'0'') and the sub captures -- if the text matched the RE, @Nothing@ otherwise matchCaptures :: Match a -> Maybe (Capture a,[Capture a]) matchCaptures Match{..} = case rangeSize (bounds matchArray) == 0 of True -> Nothing False -> Just (matchArray!0,drop 1 $ elems matchArray) -- | an alternative for captureText (!$$) :: Match a -> CaptureID -> a (!$$) = flip captureText -- | look up the text of the nth capture, 0 being the match of the whole -- RE against the source text, 1, the first bracketed sub-expression to -- be matched and so on captureText :: CaptureID -> Match a -> a captureText cid mtch = capturedText $ capture cid mtch -- | an alternative for captureTextMaybe (!$$?) :: Match a -> CaptureID -> Maybe a (!$$?) = flip captureTextMaybe -- | look up the text of the nth capture (0 being the match of the -- whole), returning Nothing if the Match doesn't contain the capture captureTextMaybe :: CaptureID -> Match a -> Maybe a captureTextMaybe cid mtch = do cap <- mtch !$? cid case hasCaptured cap of True -> Just $ capturedText cap False -> Nothing -- | an alternative for capture (!$) :: Match a -> CaptureID -> Capture a (!$) = flip capture -- | look up the nth capture, 0 being the match of the whole RE against -- the source text, 1, the first bracketed sub-expression to be matched -- and so on capture :: CaptureID -> Match a -> Capture a capture cid mtch = fromMaybe oops $ mtch !$? cid where oops = error $ "capture: out of bounds (" ++ show cid ++ ")" -- | an alternative for capture captureMaybe (!$?) :: Match a -> CaptureID -> Maybe (Capture a) (!$?) = flip captureMaybe -- | look up the nth capture, 0 being the match of the whole RE against -- the source text, 1, the first bracketed sub-expression to be matched -- and so on, returning Nothing if there is no such capture, or if the -- capture failed to capture anything (being in a failed alternate) captureMaybe :: CaptureID -> Match a -> Maybe (Capture a) captureMaybe cid mtch@Match{..} = do i <- lookupCaptureID cid mtch cap <- case bounds matchArray `inRange` i of True -> Just $ matchArray ! i False -> Nothing case hasCaptured cap of True -> Just cap False -> Nothing lookupCaptureID :: CaptureID -> Match a -> Maybe CaptureOrdinal lookupCaptureID cid Match{..} = either (const Nothing) Just $ findCaptureID cid captureNames \end{code} \begin{code} -- | this instance hooks 'Match' into regex-base: regex consumers need -- not worry about any of this instance ( RegexContext regex source (AllTextSubmatches (Array Int) (source,(Int,Int))) , RegexLike regex source , RegexFix regex source ) => RegexContext regex source (Match source) where match r s = convertMatchText r s $ getAllTextSubmatches $ match r s matchM r s = do y <- matchM r s return $ convertMatchText r s $ getAllTextSubmatches y \end{code} \begin{code} -- | convert a regex-base native MatchText into a regex Match type convertMatchText :: RegexFix regex source => regex -> source -> MatchText source -> Match source convertMatchText re hay arr = Match { matchSource = hay , captureNames = noCaptureNames , matchArray = ixmap (CaptureOrdinal lo,CaptureOrdinal hi) getCaptureOrdinal $ fmap f arr } where (lo,hi) = bounds arr f (ndl,(off_,len_)) = Capture { captureSource = hay , capturedText = ndl , captureOffset = off , captureLength = len } where CharRange off len = utf8_correct re hay off_ len_ \end{code} \begin{code} data CharRange = CharRange !Int !Int deriving (Show) class RegexFix regex source where utf8_correct :: regex -> source -> Int -> Int -> CharRange utf8_correct _ _ = CharRange instance RegexFix TDFA.Regex [Char] where instance RegexFix TDFA.Regex B.ByteString where instance RegexFix TDFA.Regex LBS.ByteString where instance RegexFix TDFA.Regex T.Text where instance RegexFix TDFA.Regex LT.Text where instance RegexFix TDFA.Regex (S.Seq Char) where instance RegexFix PCRE.Regex [Char] where utf8_correct _ = utf8_correct_bs . B.fromString instance RegexFix PCRE.Regex B.ByteString where instance RegexFix PCRE.Regex LBS.ByteString where instance RegexFix PCRE.Regex T.Text where utf8_correct _ = utf8_correct_bs . T.encodeUtf8 instance RegexFix PCRE.Regex LT.Text where utf8_correct _ = utf8_correct_bs . T.encodeUtf8 . LT.toStrict instance RegexFix PCRE.Regex (S.Seq Char) where -- convert a byte offset+length in a UTF-8-encoded ByteString -- into a character offset+length utf8_correct_bs :: B.ByteString -> Int -> Int -> CharRange utf8_correct_bs bs ix0 ln0 = case ix0+ln0 > BW.length bs of True -> error "utf8_correct_bs: index+length out of range" False -> skip 0 0 -- BW.index calls below should not fail where skip ix di = case compare ix ix0 of GT -> case ix0 of -- -1 is used as a magic number to indicate failure to match -1 -> CharRange ix0 ln0 _ -> error "utf8_correct_bs: UTF-8 decoding error" EQ -> count ix di 0 ln0 LT -> case u8_width $ BW.index bs ix of Single -> skip (ix+1) di Double -> skip (ix+2) $ di+1 Triple -> skip (ix+3) $ di+2 Quadruple -> skip (ix+4) $ di+3 count ix di dl c = case compare c 0 of LT -> error "utf8_correct_bs: length ends inside character" EQ -> CharRange (ix0-di) (ln0-dl) GT -> case u8_width $ BW.index bs ix of Single -> count (ix+1) di dl $ c-1 Double -> count (ix+2) di (dl+1) $ c-2 Triple -> count (ix+3) di (dl+2) $ c-3 Quadruple -> count (ix+4) di (dl+3) $ c-4 data UTF8Size = Single | Double | Triple | Quadruple deriving (Show) u8_width :: Word8 -> UTF8Size u8_width w8 = case w8 .&. 0x80 == 0x00 of True -> Single False -> case w8 .&. 0xE0 == 0xC0 of True -> Double False -> case w8 .&. 0xF0 == 0xE0 of True -> Triple False -> case w8 .&. 0xF8 == 0xF0 of True -> Quadruple False -> error "u8_width: UTF-8 decoding error" \end{code} regex-1.1.0.2/Text/RE/ZeInternals/Types/Matches.lhs0000644000000000000000000000436014254065176020056 0ustar0000000000000000\begin{code} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE MonoLocalBinds #-} \end{code} \begin{code} module Text.RE.ZeInternals.Types.Matches ( Matches(..) , anyMatches , countMatches , matches , mainCaptures ) where \end{code} \begin{code} import Data.Typeable import Text.RE.ZeInternals.Types.Capture import Text.RE.ZeInternals.Types.CaptureID import Text.RE.ZeInternals.Types.Match import Text.Regex.Base \end{code} \begin{code} -- | the result of matching a RE against a text (with @*=~@), retaining -- the text that was matched against data Matches a = Matches { matchesSource :: !a -- ^ the source text being matched , allMatches :: ![Match a] -- ^ all 'Match' instances found, left to right } deriving (Show,Eq,Typeable) \end{code} \begin{code} instance Functor Matches where fmap f Matches{..} = Matches { matchesSource = f matchesSource , allMatches = map (fmap f) allMatches } \end{code} \begin{code} -- | tests whether the RE matched the source text at all anyMatches :: Matches a -> Bool anyMatches = not . null . allMatches -- | count the matches countMatches :: Matches a -> Int countMatches = length . allMatches -- | list the texts that Matched matches :: Matches a -> [a] matches = map capturedText . mainCaptures -- | extract the main capture from each match mainCaptures :: Matches a -> [Capture a] mainCaptures ac = [ capture c0 cs | cs<-allMatches ac ] where c0 = IsCaptureOrdinal $ CaptureOrdinal 0 \end{code} \begin{code} -- | this instance hooks 'Matches' into regex-base: regex consumers need -- not worry about any of this instance ( RegexContext regex source [MatchText source] , RegexLike regex source , RegexFix regex source ) => RegexContext regex source (Matches source) where match r s = Matches s $ map (convertMatchText r s) $ match r s matchM r s = do y <- matchM r s return $ Matches s $ map (convertMatchText r s) y \end{code} regex-1.1.0.2/Text/RE/ZeInternals/Types/SearchReplace.lhs0000644000000000000000000000130514254065176021167 0ustar0000000000000000\begin{code} module Text.RE.ZeInternals.Types.SearchReplace ( SearchReplace(..) ) where \end{code} \begin{code} -- | contains a compiled RE and replacement template data SearchReplace re s = SearchReplace { getSearch :: !re -- ^ the RE to match a string to replace , getTemplate :: !s -- ^ the replacement template with ${cap} -- used to identify a capture (by number or -- name if one was given) and '$$' being -- used to escape a single '$' } deriving (Show) \end{code} \begin{code} instance Functor (SearchReplace re) where fmap f (SearchReplace re x) = SearchReplace re (f x) \end{code} regex-1.1.0.2/README.md0000644000000000000000000001044314254072337012441 0ustar0000000000000000# regex regex is a regular expression toolkit for regex-base with: * a text-replacement toolkit with type-safe text-replacement templates; * special datatypes for matches and captures; * compile-time checking of RE syntax; * a unified means of controlling case-sensitivity and multi-line options; * high-level AWK-like tools for building text processing apps; * the option of using match operators with reduced polymorphism on the text and result types; * regular expression macros including: + a number of useful RE macros; + a test bench for testing and documenting new macro environments; * built-in support for the TDFA and PCRE back ends; * comprehensive documentation, tutorials and copious examples. See the [About page](http://about.regex.uk) for details. ## regex and regex-examples The library and tutorial, tests and examples have been split across two packages: * the `regex` package contains the regex library with the Posix TDFA back end * the `regex-with-pcre` library package contains the extra modules needed for the PCRE back end * the `regex-examples` package contains the tutorial, tests and example programs. ## Road Map - [X] 2017-04-10 v1.0.0.0 [First stable release](https://github.com/iconnect/regex/milestone/3) - [X] 2017-06-03 v1.0.1.0 [PCRE.Text, strict PVP, Update Stackage vrns, add re-top](https://github.com/iconnect/regex/milestone/19) - [X] 2017-06-04 v1.0.1.1 [Fix 1.0.1.0 release bug and provisionally fix UTF8/PCRE interworking](https://github.com/iconnect/regex/milestone/20) - [X] 2017-06-05 v1.0.1.2 [Permit utf8-string-1](https://github.com/iconnect/regex/milestone/21) - [X] 2017-06-05 v1.0.1.3 [Suspend Windows tests for PCRE on UTF-8 text](https://github.com/iconnect/regex/milestone/22) - [X] 2018-12-14 v1.0.1.4 [Fix for GHC 8.4.4, GHC-8.6.2](https://github.com/iconnect/regex/milestone/23) - [X] 2018-12-18 v1.0.1.5 [TDFA quasi quoters not dealing with newlines](https://github.com/iconnect/regex/milestone/24) - [X] 2018-12-19 v1.0.2.0 [Tidy build issues](https://github.com/iconnect/regex/milestone/25) - [X] 2020-01-27 v1.1.0.0 [Adapt for MonadFail/base-4.13/GHC-8.8](https://github.com/iconnect/regex/milestone/26) - [X] 2021-12-18 v1.1.0.1 [Fix for base-4.16.0.0/GHC 9.2.1](https://github.com/iconnect/regex/milestone/27) See the [Roadmap page](http://roadmap.regex.uk) for details. ## The regex blog Check out the [regex blog](http://blog.regex.uk) for news articles and discussion concerning all things regex. ## Build Status [![Hackage](http://regex.uk/badges/hackage.svg)](https://hackage.haskell.org/package/regex) [![BSD3 License](http://regex.uk/badges/license.svg)](https://tldrlegal.com/license/bsd-3-clause-license-%28revised%29) [![Un*x build](http://regex.uk/badges/unix-build.svg)](https://travis-ci.org/iconnect/regex) [![Windows build](http://regex.uk/badges/windows-build.svg)](https://ci.appveyor.com/project/engineerirngirisconnectcouk/regex/branch/master) [![Coverage](http://regex.uk/badges/coverage.svg)](https://coveralls.io/github/iconnect/regex?branch=master) See [build status page](http://regex.uk/build-status) for details. ## Installing the Package The package can be easily installed with cabal or stack on GHC-8.0, 7.10 or 7.8 for the above platforms. See the [Installation page](http://installation.regex.uk) for details. ## The Tutorial Tests and Examples See the [Tutorial page](http://tutorial.regex.uk) and [Examples page](http://examples.regex.uk) for details. ## Helping Out If you have any feedback or suggestion then please drop us a line. * `t` [@hregex](https://twitter.com/hregex)
* `e` maintainers@regex.uk
* `w` http://issues.regex.uk The [Contact page](http://contact.regex.uk) has more details. ## The API The Haddocks can be found at http://hs.regex.uk. ## The Macro Tables The macro environments are an important part of the package and are documented [here](http://macros.regex.uk). ## The regex.uk Directory A handy overview of the regex.uk domain can be found [here](http://directory.regex.uk). ## The Changelog The `changelog` is posted [here](http://changelog.regex.uk). ## The Authors This library was written and is currently maintained by [Chris Dornan](mailto:chris.dornan@irisconnect.com) aka [@cdornan](https://twitter.com/cdornan) regex-1.1.0.2/changelog0000644000000000000000000001572414254067266013050 0ustar0000000000000000-*-change-log-*- 1.1.0.2 Igor Ranieri Elland 2022-05-20 * fix for [#171](https://github.com/iconnect/regex/issues/171) 1.1.0.1 Chris Dornan 2018-12-19 * fix for base-4.16.0.0/GHC 9.2.1 * eliminate http-conduit dependency (currently stymied by cryptonite) 1.1.0.0 Chris Dornan 2018-12-19 * drop support for GHC 8.0 and below * adapt for base 4.13 (MonadFail) 1.0.2.0 Chris Dornan 2018-12-19 * cabal-install build with 8.0 and 8.2 failing (#163) * Switch to GHC-8.6.3/nightly-2018-12-19 (#164) * Switch from Default-Extensions to Other-Extensions (#165) 1.0.1.5 Chris Dornan 2018-12-18 * TDFA quasi quoters not dealing with newlines (#157) 1.0.1.4 Chris Dornan 2018-12-14 * GHC-8.4.4, GHC-8.6.2 (#160) 1.0.1.3 Chris Dornan 2017-06-05 * Suspending UTF-8 tests for Windows/PCRE (#144) 1.0.1.2 Chris Dornan 2017-06-05 * Permit utf8-string-1 (#142) 1.0.1.1 Chris Dornan 2017-06-04 * regex-pcre not working properly with UTF-8 text (#141) * ZeInternals/SearchReplace (#140) 1.0.1.0 Chris Dornan 2017-06-03 * Text.RE.PCRE.Text[.Lazy] (#58) * Update LSTHaskell versions (#136) * Add re-top example (#137) * Regex library to conform with strict PVP (#138) 1.0.0.0 Chris Dornan 2017-04-10 * Rewrite tutorials (#113) * Add Text.RE.Tools.IsRegex (#122) * Include Macro Parsers in Core API (#123) * Adjust type of lookupCaptureID and friends (#124) * re-prep to handle cast example expressions (#125) * Rename compileSearchReplaceWithOptions (#126) * Hide ZeInternals, Evacuate Text.RE (#127) * Proper Haddocks for the quasi quoters (#128) * ed_ quasi quoters are being restricted to SimpleREOptions (#129) * regex dependeny on pcre-builtin (#130) 0.14.0.0 Chris Dornan 2017-04-05 * Move IsOption into the back ends (#115) * Rename Find functions (#116) * re-gen-cabals: 'sdist' to prepare final commit message (#117) * re-prep: 'blog-badge' to update the blog badge (#118) * re-prep: include_code_pp for all .lhs files (#119) * re-gen-cabals: do 'gen' after 'bump-version' (#120) 0.13.0.0 Chris Dornan 2017-04-03 * Add a Find Tool (#106) * TestBench to export Text.RE (#107) * Consolidate cabal templates (#108) * Add special tutorials (#109) * Add sort-imports example (#110) * Generalise Grep (#111) * Tighten up findCaptureID (#112) 0.12.0.0 Chris Dornan 2017-03-31 * Add Text.RE.REOptions to RE.Summa (#103) * Move IsRegex into Text.RE (#104) 0.11.1.0 Chris Dornan 2017-03-30 * Cannot hide Text.RE.ZeInternals.SearchReaplace modules (#101) 0.11.0.0 Chris Dornan 2017-03-29 * Simplify API (#97) * Rename Location to RELocation (#98) * Rename the MacrosDescriptor Fields #99 0.10.0.3 Chris Dornan 2017-03-28 * Update to LTS-8.6 (#95) * Improve Haddocks for Text.RE.{TDFA,PCRE} (#94) 0.10.0.2 Chris Dornan 2017-03-27 * Constrain the types of the template quasi quoters (#86) * Add escape methods to IsRegex (#87) * Better Haddock Commentary (#90) * Better release-testing scripts (#91) * Make travis stack-release tests advisory (#92) 0.10.0.1 Chris Dornan 2017-03-26 Withdrawn 0.10.0.0 Chris Dornan 2017-03-25 Withdrawn 0.9.0.0 Chris Dornan 2017-03-23 * Flip the order of the arguments to replace (#78) * Add type-safe replacement templates (#60) * Finish tidying up the API (#80) * Make `regex` compatible w/ TH-less GHCs (#70) * Declare extensions the compiler must support (#83) * Fix curl for AppVeyor build (#79) * Fix AppVeyor badge (#81) * Remove QQ from code coverage stats (#82) * Rename Options, Context and Replace methods (#84) 0.8.0.0 Chris Dornan 2017-03-16 * Tidy up the API after recent reorganization (#76) 0.7.0.0 Chris Dornan 2017-03-15 * Fix and extend Replace class (#74) * Better package organisation (#73) * Generalise sed' in progress (#72) * compileRegex to take just a string (#68) * Fix comment reference in Text.RE.PCRE in progress (#66) 0.6.0.1 Chris Dornan 2017-03-13 * Fix .travis.yml release-stack script (#67) 0.6.0.0 Chris Dornan 2017-03-13 * Split out regex-with-pcre package (#65) 0.5.0.0 Chris Dornan 2017-03-05 * Fix inter-operation of =~ & =~~ and named captures (#55) * Add escaping functions (#37) * Test Hackage release tarballs on Travis CI (#51) * Fix up template replace ordinals (#52) * Complete the web site (#39) * Complete the Tutorial, Tests and Examples (#38) * Complete narrative in literate modules (#8) 0.3.0.0 Chris Dornan 2017-02-26 * Clean up API to use camelCase conventions * Use -Werror in development and testing, -Warn for Hackage * Integrate the regex blog * Better presentation of ghci tryouts in the Tutorial * Various minor README upgrades and fixes 0.2.0.4 Chris Dornan 2017-02-22 * Repair re-gen-modules-test for Windows 0.2.0.3 Chris Dornan 2017-02-21 * README/index layout tweak 0.2.0.2 Chris Dornan 2017-02-21 * Fix re-gen-modules-test for Hackage (#45) * Minor style tweks to README/index 0.2.0.1 Chris Dornan 2017-02-20 * remove library from regex-examples (#43) 0.2.0.0 Chris Dornan 2017-02-19 * Split off the tutorial tests and examples into regex-examples, leaving just the library in regex 0.1.0.0 Chris Dornan 2017-02-18 * Cabal file generated from a DRY template * Library dependencies minimised, test depndencies moved into examples/re-tests * A proper static website generated by 're-prep all' * README/sidebar Badges are a static record of current release with links to a live build-status reporting on HEAD * Added !$, !$?, !$$ and !$$? alternatives for looking up captures in a Match * Text.RE.Parsers now generate Text where they were generating String * %nat, %int, %frac and %hex get their new names 0.0.0.2 Chris Dornan 2017-01-30 * Fix for Windows * Remove hsyslog dependency * Establish Travis CI, AppVeyor and coveralls.io integrations * Fix time parser to use Fixed arithmetic * Miscelaneous minor adjustments 0.0.0.1 Chris Dornan 2017-01-26 * First public release regex-1.1.0.2/LICENSE0000644000000000000000000000276714254065176012204 0ustar0000000000000000Copyright (c) 2016-2017, Chris Dornan All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Iris Connect nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. regex-1.1.0.2/Setup.hs0000644000000000000000000000005614254065176012620 0ustar0000000000000000import Distribution.Simple main = defaultMain regex-1.1.0.2/regex.cabal0000644000000000000000000001054314254075251013257 0ustar0000000000000000Name: regex Version: 1.1.0.2 Synopsis: Toolkit for regex-base Description: A regular expression toolkit for regex-base with compile-time checking of RE syntax, data types for matches and captures, a text replacement toolkit, portable options, high-level AWK-like tools for building text processing apps, regular expression macros with parsers and test bench, comprehensive documentation, tutorials and copious examples. Homepage: http://regex.uk Author: Chris Dornan License: BSD3 license-file: LICENSE Maintainer: Chris Dornan Copyright: Chris Dornan 2016-2017 Category: Text Build-type: Simple Stability: Stable bug-reports: http://issues.regex.uk Extra-Source-Files: README.md changelog Cabal-Version: >= 1.10 Source-Repository head type: git location: https://github.com/iconnect/regex.git Source-Repository this Type: git Location: https://github.com/iconnect/regex.git Tag: 1.1.0.2 Library Hs-Source-Dirs: . Exposed-Modules: Text.RE Text.RE.REOptions Text.RE.Replace Text.RE.Summa Text.RE.TDFA Text.RE.TDFA.ByteString Text.RE.TDFA.ByteString.Lazy Text.RE.TDFA.Sequence Text.RE.TDFA.String Text.RE.TDFA.Text Text.RE.TDFA.Text.Lazy Text.RE.TestBench Text.RE.TestBench.Parsers Text.RE.Tools Text.RE.Tools.Edit Text.RE.Tools.Find Text.RE.Tools.Grep Text.RE.Tools.IsRegex Text.RE.Tools.Lex Text.RE.Tools.Sed Text.RE.ZeInternals Text.RE.ZeInternals.Types.Poss Other-Modules: Text.RE.ZeInternals.AddCaptureNames Text.RE.ZeInternals.EscapeREString Text.RE.ZeInternals.NamedCaptures Text.RE.ZeInternals.PreludeMacros Text.RE.ZeInternals.QQ Text.RE.ZeInternals.Replace Text.RE.ZeInternals.SearchReplace Text.RE.ZeInternals.SearchReplace.TDFA Text.RE.ZeInternals.SearchReplace.TDFA.ByteString Text.RE.ZeInternals.SearchReplace.TDFA.ByteString.Lazy Text.RE.ZeInternals.SearchReplace.TDFA.Sequence Text.RE.ZeInternals.SearchReplace.TDFA.String Text.RE.ZeInternals.SearchReplace.TDFA.Text Text.RE.ZeInternals.SearchReplace.TDFA.Text.Lazy Text.RE.ZeInternals.SearchReplace.TDFAEdPrime Text.RE.ZeInternals.TDFA Text.RE.ZeInternals.TestBench Text.RE.ZeInternals.TestBench.Parsers Text.RE.ZeInternals.Tools.Lex Text.RE.ZeInternals.Types.Capture Text.RE.ZeInternals.Types.CaptureID Text.RE.ZeInternals.Types.IsRegex Text.RE.ZeInternals.Types.LineNo Text.RE.ZeInternals.Types.Match Text.RE.ZeInternals.Types.Matches Text.RE.ZeInternals.Types.SearchReplace Default-Language: Haskell2010 Other-Extensions: AllowAmbiguousTypes CPP DeriveDataTypeable DeriveGeneric ExistentialQuantification FlexibleContexts FlexibleInstances FunctionalDependencies GeneralizedNewtypeDeriving MultiParamTypeClasses NoImplicitPrelude OverloadedStrings QuasiQuotes RecordWildCards ScopedTypeVariables TemplateHaskell TypeSynonymInstances UndecidableInstances if !impl(ghc >= 8.0) Other-Extensions: TemplateHaskell else Other-Extensions: TemplateHaskellQuotes GHC-Options: -Wall -fwarn-tabs -Wwarn Build-depends: array >= 0.4 , base >= 4 && < 5 , base-compat >= 0.6 && < 1 , bytestring >= 0.10 , containers >= 0.4 , hashable >= 1.2 , regex-base >= 0.93 , regex-pcre-builtin >= 0.94 , regex-tdfa >= 1.3.1.0 , template-haskell >= 2.7 , text >= 1.2 , time >= 1.4.2 , time-locale-compat >= 0.1 , transformers >= 0.2.2 , unordered-containers >= 0.2 , utf8-string >= 1 -- Generated with re-gen-cabals