regex-tdfa-1.1.8/0000755000000000000000000000000011536532137011743 5ustar0000000000000000regex-tdfa-1.1.8/LICENSE0000644000000000000000000000275111536532137012755 0ustar0000000000000000This modile is under this "3 clause" BSD license: Copyright (c) 2007-2009, Christopher Kuklewicz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. regex-tdfa-1.1.8/regex-tdfa.cabal0000644000000000000000000001153611536532137014763 0ustar0000000000000000Name: regex-tdfa Version: 1.1.8 -- 0.99.4 tests pnonempty' = \ p -> POr [ PEmpty, p ] instead of PNonEmpty -- 0.99.5 remove PNonEmpty constructor -- 0.99.6 change to nested nonEmpty calls for PBound -- 0.99.7 Use (PGroup Nothing) in Pattern to decompose PBound -- 0.99.8 testing chaning Maximize to Minimize for Tags, decide (a*)* is canonical problem -- 0.99.9 testing changing bestTrans/chooseWith/choose to include enterOrbit/newFlags/(_,True) info -- 0.99.10 fixed ((.?)*)* patterns by changing PStar nullView when mayFirstBeNull -- 0.99.11 improve above fix and make stuff work better -- HAS BUG, along with old TDFA! -- 0.99.12 try to debug 0.99.11 : fixed updateWinner -- 0.99.13 more cleanup -- 0.99.14 start changing to the new real DFA -- 0.99.15 get string with NewDFA testing, unit tests and 1000 random regex pass -- 0.99.16 performance? up to v15 -- 0.99.17 radical removal of flag array and adding of SetVal to handle groups -- 0.99.18 try alternate lazy/strict strategy in NewDFA. Fix offset laziness. -- 0.99.19 try for pre-comparison of orbit-logs! -- 0.99.20 go to many vs single? -- 1.0.0 -- 1.0.1 add NewDFATest.hs -- 1.0.2 arg, the prof is fast and the normal slow! -- 1.0.3 try to alter matchTest to not have the Bool args? No -- np2 comment out all Testing code? No -- np3 !off the multi? No -- np4 comment out all Single0 and Single code? No -- np5 comment out all Multi0 code? No -- np6 comment out ans check? No -- np7 just return True? Fast -- np8 np6 and NOINLINE endOff? No -- np9 INLINE endOf? No -- np10 Peel off CharMap/IntMap and DFA/DT with pattern matching? No -- np11 break multi to not look at o and just return True? Yes !!!! -- np12 expand o in the case where t lookup get Nothing? Yes--this is the fix!? -- np13 try to improve readability with the "mm" combinator? Yes! -- 1.0.4 try repaired NewDFATest_SBS -- 1.0.5 use "uncons" on SBS -- 1.0.6 try NewDFATest_SBS with uncons -- 1.0.7 make NewDFA directory and String_NC -- 1.1.0 NewDFA code working -- 1.1.1 add gnu escapes -- 1.1.2 worked -- 1.1.3 BROKEN after 100 characters the compressOrbit dies! -- 1.1.4 fixed -- 1.1.5 try needUniqTags in POr in CorePattern.hs, try (toAdvice b) for PStar child -- 1.1.6 Fix bug preventing []] [-] [^]] [^-] (thanks to Maxime Henrion) -- 1.1.7 fix url below -- 1.1.8 Make ghc-7.0.2 on platorm 2011.2.0.0.0 happy License: BSD3 License-File: LICENSE Copyright: Copyright (c) 2007, Christopher Kuklewicz Author: Christopher Kuklewicz Maintainer: TextRegexLazy@personal.mightyreason.com Stability: Seems to work, but not POSIX yet Homepage: http://hackage.haskell.org/package/regex-tdfa Package-URL: http://code.haskell.org/regex-tdfa/ Synopsis: Replaces/Enhances Text.Regex Description: A new all Haskell "tagged" DFA regex engine, inspired by libtre Category: Text Tested-With: GHC Build-Type: Simple Cabal-Version: >= 1.2.3 flag base4 library Build-Depends: regex-base >= 0.93.1, parsec, mtl, containers, array, bytestring if flag(base4) Build-Depends: base >= 4.0 && < 5, ghc-prim else Build-Depends: base < 4.0 other-modules: Paths_regex_tdfa Exposed-Modules: Data.IntMap.CharMap2 Data.IntMap.EnumMap2 Data.IntSet.EnumSet2 Text.Regex.TDFA Text.Regex.TDFA.ByteString Text.Regex.TDFA.ByteString.Lazy Text.Regex.TDFA.Common Text.Regex.TDFA.CorePattern Text.Regex.TDFA.IntArrTrieSet Text.Regex.TDFA.NewDFA.Engine Text.Regex.TDFA.NewDFA.Engine_FA Text.Regex.TDFA.NewDFA.Engine_NC Text.Regex.TDFA.NewDFA.Engine_NC_FA Text.Regex.TDFA.NewDFA.Tester Text.Regex.TDFA.NewDFA.Uncons Text.Regex.TDFA.NewDFA.MakeTest Text.Regex.TDFA.Pattern Text.Regex.TDFA.ReadRegex Text.Regex.TDFA.Sequence Text.Regex.TDFA.String Text.Regex.TDFA.TDFA Text.Regex.TDFA.TNFA Buildable: True Extensions: MultiParamTypeClasses, FunctionalDependencies, BangPatterns, MagicHash, RecursiveDo, NoMonoPatBinds, ForeignFunctionInterface, UnboxedTuples, TypeOperators, FlexibleContexts, ExistentialQuantification, UnliftedFFITypes, TypeSynonymInstances, FlexibleInstances GHC-Options: -Wall -funbox-strict-fields -fspec-constr-count=10 -O2 -fno-warn-orphans GHC-Prof-Options: -auto-all regex-tdfa-1.1.8/Setup.hs0000644000000000000000000000022511536532137013376 0ustar0000000000000000#!/usr/bin/env runhaskell -- I usually compile this with "ghc --make -o setup Setup.hs" import Distribution.Simple(defaultMain) main = defaultMain regex-tdfa-1.1.8/Data/0000755000000000000000000000000011536532136012613 5ustar0000000000000000regex-tdfa-1.1.8/Data/IntMap/0000755000000000000000000000000011536532136014003 5ustar0000000000000000regex-tdfa-1.1.8/Data/IntMap/CharMap2.hs0000644000000000000000000002557611536532136015753 0ustar0000000000000000{-# LANGUAGE CPP #-} module Data.IntMap.CharMap2 where #ifdef __GLASGOW_HASKELL__ import GHC.Base(unsafeChr) #else import Data.Char (chr) #endif import Data.Char as C(ord) import Data.List as L (map) import qualified Data.IntMap as M import qualified Data.IntSet as S(IntSet) import Data.Monoid(Monoid(..)) #ifndef __GLASGOW_HASKELL__ unsafeChr = chr #endif newtype CharMap a = CharMap {unCharMap :: M.IntMap a} deriving (Eq,Ord,Read,Show) instance Monoid (CharMap a) where mempty = CharMap mempty CharMap x `mappend` CharMap y = CharMap (x `mappend` y) instance Functor CharMap where fmap f (CharMap m) = CharMap (fmap f m) type Key = Char (!) :: CharMap a -> Key -> a (!) (CharMap m) k = (M.!) m (C.ord k) (\\) :: CharMap a -> CharMap b -> CharMap a (\\) (CharMap m1) (CharMap m2) = CharMap ((M.\\) m1 m2) null :: CharMap a -> Bool null (CharMap m) = M.null m size :: CharMap a -> Int size (CharMap m) = M.size m member :: Key -> CharMap a -> Bool member k (CharMap m) = M.member (C.ord k) m notMember :: Key -> CharMap a -> Bool notMember k (CharMap m) = M.notMember (C.ord k) m lookup :: Key -> CharMap a -> Maybe a lookup k (CharMap m) = M.lookup (C.ord k) m findWithDefault :: a -> Key -> CharMap a -> a findWithDefault a k (CharMap m) = M.findWithDefault a (C.ord k) m empty :: CharMap a empty = CharMap M.empty singleton :: Key -> a -> CharMap a singleton k a = CharMap (M.singleton (C.ord k) a) insert :: Key -> a -> CharMap a -> CharMap a insert k a (CharMap m) = CharMap (M.insert (C.ord k) a m) insertWith :: (a -> a -> a) -> Key -> a -> CharMap a -> CharMap a insertWith f k a (CharMap m) = CharMap (M.insertWith f (C.ord k) a m) insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> CharMap a insertWithKey f k a (CharMap m) = CharMap (M.insertWithKey f' (C.ord k) a m) where f' b a1 a2 = f (unsafeChr b) a1 a2 insertLookupWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> (Maybe a, CharMap a) insertLookupWithKey f k a (CharMap m) = (ma,CharMap m') where (ma,m') = M.insertLookupWithKey f' (C.ord k) a m f' b a1 a2 = f (unsafeChr b) a1 a2 delete :: Key -> CharMap a -> CharMap a delete k (CharMap m) = CharMap (M.delete (C.ord k) m) adjust :: (a -> a) -> Key -> CharMap a -> CharMap a adjust f k (CharMap m) = CharMap (M.adjust f (C.ord k) m) adjustWithKey :: (Key -> a -> a) -> Key -> CharMap a -> CharMap a adjustWithKey f k (CharMap m) = CharMap (M.adjustWithKey f' (C.ord k) m) where f' b a = f (unsafeChr b) a update :: (a -> Maybe a) -> Key -> CharMap a -> CharMap a update f k (CharMap m) = CharMap (M.update f (C.ord k) m) updateWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> CharMap a updateWithKey f k (CharMap m) = CharMap (M.updateWithKey f' (C.ord k) m) where f' b a = f (unsafeChr b) a updateLookupWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> (Maybe a, CharMap a) updateLookupWithKey f k (CharMap m) = (a,CharMap m') where (a,m') = M.updateLookupWithKey f' (C.ord k) m f' b a1 = f (unsafeChr b) a1 union :: CharMap a -> CharMap a -> CharMap a union (CharMap m1) (CharMap m2) = CharMap (M.union m1 m2) unionWith :: (a -> a -> a) -> CharMap a -> CharMap a -> CharMap a unionWith f (CharMap m1) (CharMap m2) = CharMap (M.unionWith f m1 m2) unionWithKey :: (Key -> a -> a -> a) -> CharMap a -> CharMap a -> CharMap a unionWithKey f (CharMap m1) (CharMap m2) = CharMap (M.unionWithKey f' m1 m2) where f' b a1 a2 = f (unsafeChr b) a1 a2 unions :: [CharMap a] -> CharMap a unions cs = CharMap (M.unions (L.map unCharMap cs)) unionsWith :: (a -> a -> a) -> [CharMap a] -> CharMap a unionsWith f cs = CharMap (M.unionsWith f (L.map unCharMap cs)) difference :: CharMap a -> CharMap b -> CharMap a difference (CharMap m1) (CharMap m2) = CharMap (M.difference m1 m2) differenceWith :: (a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a differenceWith f (CharMap m1) (CharMap m2) = CharMap (M.differenceWith f m1 m2) differenceWithKey :: (Key -> a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a differenceWithKey f (CharMap m1) (CharMap m2) = CharMap (M.differenceWithKey f' m1 m2) where f' b a1 a2 = f (unsafeChr b) a1 a2 intersection :: CharMap a -> CharMap b -> CharMap a intersection (CharMap m1) (CharMap m2) = CharMap (M.intersection m1 m2) intersectionWith :: (a -> b -> a) -> CharMap a -> CharMap b -> CharMap a intersectionWith f (CharMap m1) (CharMap m2) = CharMap (M.intersectionWith f m1 m2) intersectionWithKey :: (Key -> a -> b -> a) -> CharMap a -> CharMap b -> CharMap a intersectionWithKey f (CharMap m1) (CharMap m2) = CharMap (M.intersectionWithKey f' m1 m2) where f' b a1 a2 = f (unsafeChr b) a1 a2 map :: (a -> b) -> CharMap a -> CharMap b map f (CharMap m) = CharMap (M.map f m) mapWithKey :: (Key -> a -> b) -> CharMap a -> CharMap b mapWithKey f (CharMap m) = CharMap (M.mapWithKey f' m) where f' b a = f (unsafeChr b) a mapAccum :: (a -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) mapAccum f a (CharMap m) = (a',CharMap m') where (a',m') = M.mapAccum f a m mapAccumWithKey :: (a -> Key -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) mapAccumWithKey f a (CharMap m) = (a',CharMap m') where (a',m') = M.mapAccumWithKey f' a m f' a1 b a2 = f a1 (unsafeChr b) a2 fold :: (a -> b -> b) -> b -> CharMap a -> b fold f a (CharMap m) = M.fold f a m foldWithKey :: (Key -> a -> b -> b) -> b -> CharMap a -> b foldWithKey f a (CharMap m) = M.foldWithKey f' a m where f' b a1 a2 = f (unsafeChr b) a1 a2 elems :: CharMap a -> [a] elems (CharMap m) = M.elems m keys :: CharMap a -> [Key] keys (CharMap m) = L.map unsafeChr (M.keys m) keysSet :: CharMap a -> S.IntSet keysSet (CharMap m) = M.keysSet m assocs :: CharMap a -> [(Key, a)] assocs (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.assocs m) toList :: CharMap a -> [(Key, a)] toList (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.toList m) fromList :: [(Key, a)] -> CharMap a fromList ka = CharMap (M.fromList (L.map (\(k,a) -> (C.ord k,a)) ka)) fromListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a fromListWith f ka = CharMap (M.fromListWith f (L.map (\(k,a) -> (C.ord k,a)) ka)) fromListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a fromListWithKey f ka = CharMap (M.fromListWithKey f' (L.map (\(k,a) -> (C.ord k,a)) ka)) where f' b a1 a2 = f (unsafeChr b) a1 a2 toAscList :: CharMap a -> [(Key, a)] toAscList (CharMap m) = L.map (\(b,a) -> (unsafeChr b,a)) (M.toAscList m) fromAscList :: [(Key, a)] -> CharMap a fromAscList ka = CharMap (M.fromAscList (L.map (\(k,a) -> (C.ord k,a)) ka)) fromAscListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a fromAscListWith f ka = CharMap (M.fromAscListWith f (L.map (\(k,a) -> (C.ord k,a)) ka)) fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a fromAscListWithKey f ka = CharMap (M.fromAscListWithKey f' (L.map (\(k,a) -> (C.ord k,a)) ka)) where f' b a1 a2 = f (unsafeChr b) a1 a2 fromDistinctAscList :: [(Key, a)] -> CharMap a fromDistinctAscList ka = CharMap (M.fromDistinctAscList (L.map (\(k,a) -> (C.ord k,a)) ka)) filter :: (a -> Bool) -> CharMap a -> CharMap a filter f (CharMap m) = CharMap (M.filter f m) filterWithKey :: (Key -> a -> Bool) -> CharMap a -> CharMap a filterWithKey f (CharMap m) = CharMap (M.filterWithKey f' m) where f' b a = f (unsafeChr b) a partition :: (a -> Bool) -> CharMap a -> (CharMap a, CharMap a) partition f (CharMap m) = (CharMap m1', CharMap m2') where (m1',m2') = M.partition f m partitionWithKey :: (Key -> a -> Bool) -> CharMap a -> (CharMap a, CharMap a) partitionWithKey f (CharMap m) = (CharMap m1', CharMap m2') where (m1',m2') = M.partitionWithKey f' m f' b a = f (unsafeChr b) a mapMaybe :: (a -> Maybe b) -> CharMap a -> CharMap b mapMaybe f (CharMap m) = CharMap (M.mapMaybe f m) mapMaybeWithKey :: (Key -> a -> Maybe b) -> CharMap a -> CharMap b mapMaybeWithKey f (CharMap m) = CharMap (M.mapMaybeWithKey f' m) where f' b a = f (unsafeChr b) a mapEither :: (a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) mapEither f (CharMap m) = (CharMap m1', CharMap m2') where (m1',m2') = M.mapEither f m mapEitherWithKey :: (Key -> a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) mapEitherWithKey f (CharMap m) = (CharMap m1', CharMap m2') where (m1',m2') = M.mapEitherWithKey f' m f' b a = f (unsafeChr b) a split :: Key -> CharMap a -> (CharMap a, CharMap a) split k (CharMap m) = (CharMap m1', CharMap m2') where (m1',m2') = M.split (C.ord k) m splitLookup :: Key -> CharMap a -> (CharMap a, Maybe a, CharMap a) splitLookup k (CharMap m) = (CharMap m1', a, CharMap m2') where (m1',a,m2') = M.splitLookup (C.ord k) m isSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool isSubmapOf (CharMap m1) (CharMap m2) = M.isSubmapOf m1 m2 isSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool isSubmapOfBy f (CharMap m1) (CharMap m2) = M.isSubmapOfBy f m1 m2 isProperSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool isProperSubmapOf (CharMap m1) (CharMap m2) = M.isProperSubmapOf m1 m2 isProperSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool isProperSubmapOfBy f (CharMap m1) (CharMap m2) = M.isProperSubmapOfBy f m1 m2 showTree :: Show a => CharMap a -> String showTree (CharMap m) = M.showTree m showTreeWith :: Show a => Bool -> Bool -> CharMap a -> String showTreeWith b1 b2 (CharMap m) = M.showTreeWith b1 b2 m {-# INLINE (!) #-} {-# INLINE (\\) #-} {-# INLINE null #-} {-# INLINE size #-} {-# INLINE member #-} {-# INLINE notMember #-} {-# INLINE lookup #-} {-# INLINE findWithDefault #-} {-# INLINE empty #-} {-# INLINE singleton #-} {-# INLINE insert #-} {-# INLINE insertWith #-} {-# INLINE insertWithKey #-} {-# INLINE insertLookupWithKey #-} {-# INLINE delete #-} {-# INLINE adjust #-} {-# INLINE adjustWithKey #-} {-# INLINE update #-} {-# INLINE updateWithKey #-} {-# INLINE updateLookupWithKey #-} {-# INLINE union #-} {-# INLINE unionWith #-} {-# INLINE unionWithKey #-} {-# INLINE unions #-} {-# INLINE unionsWith #-} {-# INLINE difference #-} {-# INLINE differenceWith #-} {-# INLINE differenceWithKey #-} {-# INLINE intersection #-} {-# INLINE intersectionWith #-} {-# INLINE intersectionWithKey #-} {-# INLINE map #-} {-# INLINE mapWithKey #-} {-# INLINE mapAccum #-} {-# INLINE mapAccumWithKey #-} {-# INLINE fold #-} {-# INLINE foldWithKey #-} {-# INLINE elems #-} {-# INLINE keys #-} {-# INLINE keysSet #-} {-# INLINE assocs #-} {-# INLINE toList #-} {-# INLINE fromList #-} {-# INLINE fromListWith #-} {-# INLINE fromListWithKey #-} {-# INLINE toAscList #-} {-# INLINE fromAscList #-} {-# INLINE fromAscListWith #-} {-# INLINE fromAscListWithKey #-} {-# INLINE fromDistinctAscList #-} {-# INLINE filter #-} {-# INLINE filterWithKey #-} {-# INLINE partition #-} {-# INLINE partitionWithKey #-} {-# INLINE mapMaybe #-} {-# INLINE mapMaybeWithKey #-} {-# INLINE mapEither #-} {-# INLINE mapEitherWithKey #-} {-# INLINE split #-} {-# INLINE splitLookup #-} {-# INLINE isSubmapOf #-} {-# INLINE isSubmapOfBy #-} {-# INLINE isProperSubmapOf #-} {-# INLINE isProperSubmapOfBy #-} {-# INLINE showTree #-} {-# INLINE showTreeWith #-} regex-tdfa-1.1.8/Data/IntMap/EnumMap2.hs0000644000000000000000000002525111536532136015770 0ustar0000000000000000module Data.IntMap.EnumMap2 where import Data.Foldable(Foldable(..)) import qualified Data.IntMap as M import qualified Data.IntSet.EnumSet2 as S (EnumSet(..)) import Data.Monoid(Monoid(..)) import Prelude import qualified Prelude as L (map) newtype EnumMap k a = EnumMap {unEnumMap :: M.IntMap a} deriving (Eq,Ord,Read,Show) instance Ord k => Monoid (EnumMap k a) where mempty = EnumMap mempty EnumMap x `mappend` EnumMap y = EnumMap (x `mappend` y) instance Ord k => Functor (EnumMap k) where fmap f (EnumMap m) = EnumMap (fmap f m) instance Ord k => Foldable (EnumMap k) where foldMap f (EnumMap m) = foldMap f m (!) :: (Enum key) => EnumMap key a -> key -> a (!) (EnumMap m) k = (M.!) m (fromEnum k) (\\) :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a (\\) (EnumMap m1) (EnumMap m2) = EnumMap ((M.\\) m1 m2) null :: (Enum key) => EnumMap key a -> Bool null (EnumMap m) = M.null m size :: (Enum key) => EnumMap key a -> Int size (EnumMap m) = M.size m member :: (Enum key) => key -> EnumMap key a -> Bool member k (EnumMap m) = M.member (fromEnum k) m notMember :: (Enum key) => key -> EnumMap key a -> Bool notMember k (EnumMap m) = M.notMember (fromEnum k) m {-# INLINE lookup #-} lookup :: (Enum key) => key -> EnumMap key a -> Maybe a lookup k (EnumMap m) = maybe (fail "EnumMap.lookup failed") return $ M.lookup (fromEnum k) m findWithDefault :: (Enum key) => a -> key -> EnumMap key a -> a findWithDefault a k (EnumMap m) = M.findWithDefault a (fromEnum k) m empty :: (Enum key) => EnumMap key a empty = EnumMap M.empty singleton :: (Enum key) => key -> a -> EnumMap key a singleton k a = EnumMap (M.singleton (fromEnum k) a) insert :: (Enum key) => key -> a -> EnumMap key a -> EnumMap key a insert k a (EnumMap m) = EnumMap (M.insert (fromEnum k) a m) insertWith :: (Enum key) => (a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a insertWith f k a (EnumMap m) = EnumMap (M.insertWith f (fromEnum k) a m) insertWithKey :: (Enum key) => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a insertWithKey f k a (EnumMap m) = EnumMap (M.insertWithKey f' (fromEnum k) a m) where f' b a1 a2 = f (toEnum b) a1 a2 insertLookupWithKey :: (Enum key) => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> (Maybe a, EnumMap key a) insertLookupWithKey f k a (EnumMap m) = (ma,EnumMap m') where (ma,m') = M.insertLookupWithKey f' (fromEnum k) a m f' b a1 a2 = f (toEnum b) a1 a2 delete :: (Enum key) => key -> EnumMap key a -> EnumMap key a delete k (EnumMap m) = EnumMap (M.delete (fromEnum k) m) adjust :: (Enum key) => (a -> a) -> key -> EnumMap key a -> EnumMap key a adjust f k (EnumMap m) = EnumMap (M.adjust f (fromEnum k) m) adjustWithKey :: (Enum key) => (key -> a -> a) -> key -> EnumMap key a -> EnumMap key a adjustWithKey f k (EnumMap m) = EnumMap (M.adjustWithKey f' (fromEnum k) m) where f' b a = f (toEnum b) a update :: (Enum key) => (a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a update f k (EnumMap m) = EnumMap (M.update f (fromEnum k) m) updateWithKey :: (Enum key) => (key -> a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a updateWithKey f k (EnumMap m) = EnumMap (M.updateWithKey f' (fromEnum k) m) where f' b a = f (toEnum b) a updateLookupWithKey :: (Enum key) => (key -> a -> Maybe a) -> key -> EnumMap key a -> (Maybe a, EnumMap key a) updateLookupWithKey f k (EnumMap m) = (a,EnumMap m') where (a,m') = M.updateLookupWithKey f' (fromEnum k) m f' b a1 = f (toEnum b) a1 union :: (Enum key) => EnumMap key a -> EnumMap key a -> EnumMap key a union (EnumMap m1) (EnumMap m2) = EnumMap (M.union m1 m2) unionWith :: (Enum key) => (a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a unionWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.unionWith f m1 m2) unionWithKey :: (Enum key) => (key -> a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a unionWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.unionWithKey f' m1 m2) where f' b a1 a2 = f (toEnum b) a1 a2 unions :: (Enum key) => [EnumMap key a] -> EnumMap key a unions cs = EnumMap (M.unions (L.map unEnumMap cs)) unionsWith :: (Enum key) => (a -> a -> a) -> [EnumMap key a] -> EnumMap key a unionsWith f cs = EnumMap (M.unionsWith f (L.map unEnumMap cs)) difference :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a difference (EnumMap m1) (EnumMap m2) = EnumMap (M.difference m1 m2) differenceWith :: (Enum key) => (a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a differenceWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.differenceWith f m1 m2) differenceWithKey :: (Enum key) => (key -> a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a differenceWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.differenceWithKey f' m1 m2) where f' b a1 a2 = f (toEnum b) a1 a2 intersection :: (Enum key) => EnumMap key a -> EnumMap key b -> EnumMap key a intersection (EnumMap m1) (EnumMap m2) = EnumMap (M.intersection m1 m2) intersectionWith :: (Enum key) => (a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a intersectionWith f (EnumMap m1) (EnumMap m2) = EnumMap (M.intersectionWith f m1 m2) intersectionWithKey :: (Enum key) => (key -> a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a intersectionWithKey f (EnumMap m1) (EnumMap m2) = EnumMap (M.intersectionWithKey f' m1 m2) where f' b a1 a2 = f (toEnum b) a1 a2 map :: (Enum key) => (a -> b) -> EnumMap key a -> EnumMap key b map f (EnumMap m) = EnumMap (M.map f m) mapWithKey :: (Enum key) => (key -> a -> b) -> EnumMap key a -> EnumMap key b mapWithKey f (EnumMap m) = EnumMap (M.mapWithKey f' m) where f' b a = f (toEnum b) a mapAccum :: (Enum key) => (a -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) mapAccum f a (EnumMap m) = (a',EnumMap m') where (a',m') = M.mapAccum f a m mapAccumWithKey :: (Enum key) => (a -> key -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) mapAccumWithKey f a (EnumMap m) = (a',EnumMap m') where (a',m') = M.mapAccumWithKey f' a m f' a1 b a2 = f a1 (toEnum b) a2 fold :: (Enum key) => (a -> b -> b) -> b -> EnumMap key a -> b fold f a (EnumMap m) = M.fold f a m foldWithKey :: (Enum key) => (key -> a -> b -> b) -> b -> EnumMap key a -> b foldWithKey f a (EnumMap m) = M.foldWithKey f' a m where f' b a1 a2 = f (toEnum b) a1 a2 elems :: (Enum key) => EnumMap key a -> [a] elems (EnumMap m) = M.elems m keys :: (Enum key) => EnumMap key a -> [key] keys (EnumMap m) = L.map toEnum (M.keys m) -- Have to break cover until I have CharSet keysSet :: (Enum key) => EnumMap key a -> S.EnumSet key keysSet (EnumMap m) = S.EnumSet (M.keysSet m) assocs :: (Enum key) => EnumMap key a -> [(key, a)] assocs (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.assocs m) toList :: (Enum key) => EnumMap key a -> [(key, a)] toList (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.toList m) fromList :: (Enum key) => [(key, a)] -> EnumMap key a fromList ka = EnumMap (M.fromList (L.map (\(k,a) -> (fromEnum k,a)) ka)) fromListWith :: (Enum key) => (a -> a -> a) -> [(key, a)] -> EnumMap key a fromListWith f ka = EnumMap (M.fromListWith f (L.map (\(k,a) -> (fromEnum k,a)) ka)) fromListWithKey :: (Enum key) => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a fromListWithKey f ka = EnumMap (M.fromListWithKey f' (L.map (\(k,a) -> (fromEnum k,a)) ka)) where f' b a1 a2 = f (toEnum b) a1 a2 toAscList :: (Enum key) => EnumMap key a -> [(key, a)] toAscList (EnumMap m) = L.map (\(b,a) -> (toEnum b,a)) (M.toAscList m) fromAscList :: (Enum key) => [(key, a)] -> EnumMap key a fromAscList ka = EnumMap (M.fromAscList (L.map (\(k,a) -> (fromEnum k,a)) ka)) fromAscListWith :: (Enum key) => (a -> a -> a) -> [(key, a)] -> EnumMap key a fromAscListWith f ka = EnumMap (M.fromAscListWith f (L.map (\(k,a) -> (fromEnum k,a)) ka)) fromAscListWithKey :: (Enum key) => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a fromAscListWithKey f ka = EnumMap (M.fromAscListWithKey f' (L.map (\(k,a) -> (fromEnum k,a)) ka)) where f' b a1 a2 = f (toEnum b) a1 a2 fromDistinctAscList :: (Enum key) => [(key, a)] -> EnumMap key a fromDistinctAscList ka = EnumMap (M.fromDistinctAscList (L.map (\(k,a) -> (fromEnum k,a)) ka)) filter :: (Enum key) => (a -> Bool) -> EnumMap key a -> EnumMap key a filter f (EnumMap m) = EnumMap (M.filter f m) filterWithKey :: (Enum key) => (key -> a -> Bool) -> EnumMap key a -> EnumMap key a filterWithKey f (EnumMap m) = EnumMap (M.filterWithKey f' m) where f' b a = f (toEnum b) a partition :: (Enum key) => (a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) partition f (EnumMap m) = (EnumMap m1', EnumMap m2') where (m1',m2') = M.partition f m partitionWithKey :: (Enum key) => (key -> a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) partitionWithKey f (EnumMap m) = (EnumMap m1', EnumMap m2') where (m1',m2') = M.partitionWithKey f' m f' b a = f (toEnum b) a mapMaybe :: (Enum key) => (a -> Maybe b) -> EnumMap key a -> EnumMap key b mapMaybe f (EnumMap m) = EnumMap (M.mapMaybe f m) mapMaybeWithKey :: (Enum key) => (key -> a -> Maybe b) -> EnumMap key a -> EnumMap key b mapMaybeWithKey f (EnumMap m) = EnumMap (M.mapMaybeWithKey f' m) where f' b a = f (toEnum b) a mapEither :: (Enum key) => (a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) mapEither f (EnumMap m) = (EnumMap m1', EnumMap m2') where (m1',m2') = M.mapEither f m mapEitherWithKey :: (Enum key) => (key -> a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) mapEitherWithKey f (EnumMap m) = (EnumMap m1', EnumMap m2') where (m1',m2') = M.mapEitherWithKey f' m f' b a = f (toEnum b) a split :: (Enum key) => key -> EnumMap key a -> (EnumMap key a, EnumMap key a) split k (EnumMap m) = (EnumMap m1', EnumMap m2') where (m1',m2') = M.split (fromEnum k) m splitLookup :: (Enum key) => key -> EnumMap key a -> (EnumMap key a, Maybe a, EnumMap key a) splitLookup k (EnumMap m) = (EnumMap m1', a, EnumMap m2') where (m1',a,m2') = M.splitLookup (fromEnum k) m isSubmapOf :: (Enum key,Eq a) => EnumMap key a -> EnumMap key a -> Bool isSubmapOf (EnumMap m1) (EnumMap m2) = M.isSubmapOf m1 m2 isSubmapOfBy :: (Enum key) => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool isSubmapOfBy f (EnumMap m1) (EnumMap m2) = M.isSubmapOfBy f m1 m2 isProperSubmapOf :: (Enum key,Eq a) => EnumMap key a -> EnumMap key a -> Bool isProperSubmapOf (EnumMap m1) (EnumMap m2) = M.isProperSubmapOf m1 m2 isProperSubmapOfBy :: (Enum key) => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool isProperSubmapOfBy f (EnumMap m1) (EnumMap m2) = M.isProperSubmapOfBy f m1 m2 showTree :: (Enum key,Show a) => EnumMap key a -> String showTree (EnumMap m) = M.showTree m showTreeWith :: (Enum key,Show a) => Bool -> Bool -> EnumMap key a -> String showTreeWith b1 b2 (EnumMap m) = M.showTreeWith b1 b2 m regex-tdfa-1.1.8/Data/IntSet/0000755000000000000000000000000011536532136014021 5ustar0000000000000000regex-tdfa-1.1.8/Data/IntSet/EnumSet2.hs0000644000000000000000000000703711536532136016026 0ustar0000000000000000module Data.IntSet.EnumSet2 where import qualified Data.IntSet as S import qualified Data.List as L (map) import Data.Monoid(Monoid(..)) newtype EnumSet e = EnumSet {unEnumSet :: S.IntSet} deriving (Eq,Ord,Read,Show) instance Monoid (EnumSet e) where mempty = EnumSet mempty EnumSet x `mappend` EnumSet y = EnumSet (x `mappend` y) (\\) :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e (\\) (EnumSet s1) (EnumSet s2) = EnumSet ((S.\\) s1 s2) null :: (Enum e) => EnumSet e -> Bool null (EnumSet s) = S.null s size :: (Enum e) => EnumSet e -> Int size (EnumSet s) = S.size s member :: (Enum e) => e -> EnumSet e -> Bool member e (EnumSet s) = S.member (fromEnum e) s notMember :: (Enum e) => Int -> EnumSet e -> Bool notMember e (EnumSet s) = S.notMember (fromEnum e) s isSubsetOf :: (Enum e) => EnumSet e -> EnumSet e -> Bool isSubsetOf (EnumSet e1) (EnumSet e2) = S.isSubsetOf e1 e2 isProperSubsetOf :: (Enum e) => EnumSet e -> EnumSet e -> Bool isProperSubsetOf (EnumSet e1) (EnumSet e2) = S.isProperSubsetOf e1 e2 empty :: (Enum e) => EnumSet e empty = EnumSet (S.empty) singleton :: (Enum e) => e -> EnumSet e singleton e = EnumSet (S.singleton (fromEnum e)) insert :: (Enum e) => e -> EnumSet e -> EnumSet e insert e (EnumSet s) = EnumSet (S.insert (fromEnum e) s) delete :: (Enum e) => e -> EnumSet e -> EnumSet e delete e (EnumSet s) = EnumSet (S.delete (fromEnum e) s) union :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e union (EnumSet s1) (EnumSet s2) = EnumSet (S.union s1 s2) unions :: (Enum e) => [EnumSet e] -> EnumSet e unions es = EnumSet (S.unions (L.map unEnumSet es)) difference :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e difference (EnumSet e1) (EnumSet e2) = EnumSet (S.difference e1 e2) intersection :: (Enum e) => EnumSet e -> EnumSet e -> EnumSet e intersection (EnumSet e1) (EnumSet e2) = EnumSet (S.intersection e1 e2) filter :: (Enum e) => (e -> Bool) -> EnumSet e -> EnumSet e filter f (EnumSet s) = EnumSet (S.filter f' s) where f' b = f (toEnum b) partition :: (Enum e) => (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e) partition f (EnumSet s) = (EnumSet s1', EnumSet s2') where (s1',s2') = S.partition f' s f' b = f (toEnum b) split :: (Enum e) => e -> EnumSet e -> (EnumSet e, EnumSet e) split e (EnumSet s) = (EnumSet s1', EnumSet s2') where (s1',s2') = S.split (fromEnum e) s splitMember :: (Enum e) => e -> EnumSet e -> (EnumSet e, Bool, EnumSet e) splitMember e (EnumSet s) = (EnumSet s1',a,EnumSet s2') where (s1',a,s2') = S.splitMember (fromEnum e) s map :: (Enum e) => (e -> e) -> EnumSet e -> EnumSet e map f (EnumSet s) = EnumSet (S.map f' s) where f' b = fromEnum (f (toEnum b)) fold :: (Enum e) => (e -> b -> b) -> b -> EnumSet e -> b fold f a (EnumSet s) = S.fold f' a s where f' b a1 = f (toEnum b) a1 elems :: (Enum e) => EnumSet e -> [e] elems (EnumSet s) = L.map toEnum (S.elems s) toList :: (Enum e) => EnumSet e -> [e] toList (EnumSet s) = L.map toEnum (S.toList s) fromList :: (Enum e) => [e] -> EnumSet e fromList es = EnumSet (S.fromList (L.map fromEnum es)) toAscList :: (Enum e) => EnumSet e -> [e] toAscList (EnumSet s) = L.map toEnum (S.toAscList s) fromAscList :: (Enum e) => [e] -> EnumSet e fromAscList es = EnumSet (S.fromAscList (L.map fromEnum es)) fromDistinctAscList :: (Enum e) => [e] -> EnumSet e fromDistinctAscList es = EnumSet (S.fromDistinctAscList (L.map fromEnum es)) showTree :: (Enum e) => EnumSet e -> String showTree (EnumSet s) = S.showTree s showTreeWith :: (Enum e) => Bool -> Bool -> EnumSet e -> String showTreeWith a1 a2 (EnumSet s) = S.showTreeWith a1 a2 s regex-tdfa-1.1.8/Text/0000755000000000000000000000000011536532136012666 5ustar0000000000000000regex-tdfa-1.1.8/Text/Regex/0000755000000000000000000000000011536532136013740 5ustar0000000000000000regex-tdfa-1.1.8/Text/Regex/TDFA.hs0000644000000000000000000000714311536532136015017 0ustar0000000000000000{-| The "Text.Regex.TDFA" module provides a backend for regular expressions. It provides instances for the classes defined and documented in "Text.Regex.Base" and re-exported by this module. If you import this along with other backends then you should do so with qualified imports (with renaming for convenience). This regex-tdfa package implements, correctly, POSIX extended regular expressions. It is highly unlikely that the regex-posix package on your operating system is correct, see http://www.haskell.org/haskellwiki/Regex_Posix for examples of your OS's bugs. This package does provide captured parenthesized subexpressions. Depending on the text being searched this package supports Unicode. The [Char] and (Seq Char) text types support Unicode. The ByteString and ByteString.Lazy text types only support ASCII. It is possible to support utf8 encoded ByteString.Lazy by using regex-tdfa and regex-tdfa-utf8 packages together (required the utf8-string package). As of version 1.1.1 the following GNU extensions are recognized, all anchors: \\\` at beginning of entire text \\\' at end of entire text \\< at beginning of word \\> at end of word \\b at either beginning or end of word \\B at neither beginning nor end of word The above are controlled by the 'newSyntax' Bool in 'CompOption'. Where the "word" boundaries means between characters that are and are not in the [:word:] character class which contains [a-zA-Z0-9_]. Note that \< and \b may match before the entire text and \> and \b may match at the end of the entire text. There is no locale support, so collating elements like [.ch.] are simply ignored and equivalence classes like [=a=] are converted to just [a]. The character classes like [:alnum:] are supported over ASCII only, valid classes are alnum, digit, punct, alpha, graph, space, blank, lower, upper, cntrl, print, xdigit, word. This package does not provide "basic" regular expressions. This package does not provide back references inside regular expressions. The package does not provide Perl style regular expressions. Please look at the regex-pcre and pcre-light packages instead. -} module Text.Regex.TDFA(getVersion_Text_Regex_TDFA ,(=~),(=~~) ,module Text.Regex.TDFA.Common ,module Text.Regex.Base) where import Data.Version(Version) import Text.Regex.Base import Text.Regex.TDFA.String() import Text.Regex.TDFA.ByteString() import Text.Regex.TDFA.ByteString.Lazy() import Text.Regex.TDFA.Sequence() import Text.Regex.TDFA.Common(Regex,CompOption(..),ExecOption(..)) --import Text.Regex.TDFA.Wrap(Regex,CompOption(..),ExecOption(..),(=~),(=~~)) import Paths_regex_tdfa(version) getVersion_Text_Regex_TDFA :: Version getVersion_Text_Regex_TDFA = version -- | This is the pure functional matching operator. If the target -- cannot be produced then some empty result will be returned. If -- there is an error in processing, then 'error' will be called. (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~) x r = let make :: RegexMaker Regex CompOption ExecOption a => a -> Regex make = makeRegex in match (make r) x -- | This is the monadic matching operator. If a single match fails, -- then 'fail' will be called. (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target (=~~) x r = do let make :: (RegexMaker Regex CompOption ExecOption a, Monad m) => a -> m Regex make = makeRegexM q <- make r matchM q x regex-tdfa-1.1.8/Text/Regex/TDFA/0000755000000000000000000000000011536532137014457 5ustar0000000000000000regex-tdfa-1.1.8/Text/Regex/TDFA/ByteString.hs0000644000000000000000000000611311536532136017105 0ustar0000000000000000{-| This modules provides 'RegexMaker' and 'RegexLike' instances for using 'ByteString' with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and "Text.Regex.Lazy.DFAEngineFPS"). This module is usually used via import "Text.Regex.TDFA". This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} {- By Chris Kuklewicz, 2009. BSD License, see the LICENSE file. -} module Text.Regex.TDFA.ByteString( Regex ,CompOption ,ExecOption ,compile ,execute ,regexec ) where import Data.Array((!),elems) import qualified Data.ByteString.Char8 as B(ByteString,take,drop,unpack) import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..)) import Text.Regex.Base.Impl(polymatch,polymatchM) import Text.Regex.TDFA.ReadRegex(parseRegex) import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String import Text.Regex.TDFA.TDFA(patternToRegex) import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups)) import Data.Maybe(listToMaybe) import Text.Regex.TDFA.NewDFA.Engine(execMatch) import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) instance RegexContext Regex B.ByteString B.ByteString where match = polymatch matchM = polymatchM instance RegexMaker Regex CompOption ExecOption B.ByteString where makeRegexOptsM c e source = makeRegexOptsM c e (B.unpack source) instance RegexLike Regex B.ByteString where matchOnce r s = listToMaybe (matchAll r s) matchAll r s = execMatch r 0 '\n' s matchCount r s = length (matchAll r' s) where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } matchTest = Tester.matchTest matchOnceText regex source = fmap (\ma -> let (o,l) = ma!0 in (B.take o source ,fmap (\ol@(off,len) -> (B.take len (B.drop off source),ol)) ma ,B.drop (o+l) source)) (matchOnce regex source) matchAllText regex source = map (fmap (\ol@(off,len) -> (B.take len (B.drop off source),ol))) (matchAll regex source) compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> B.ByteString -- ^ The regular expression to compile -> Either String Regex -- ^ Returns: the compiled regular expression compile compOpt execOpt bs = case parseRegex (B.unpack bs) of Left err -> Left ("parseRegex for Text.Regex.TDFA.ByteString failed:"++show err) Right pattern -> Right (patternToRegex pattern compOpt execOpt) execute :: Regex -- ^ Compiled regular expression -> B.ByteString -- ^ ByteString to match against -> Either String (Maybe MatchArray) execute r bs = Right (matchOnce r bs) regexec :: Regex -- ^ Compiled regular expression -> B.ByteString -- ^ ByteString to match against -> Either String (Maybe (B.ByteString, B.ByteString, B.ByteString, [B.ByteString])) regexec r bs = case matchOnceText r bs of Nothing -> Right (Nothing) Just (pre,mt,post) -> let main = fst (mt!0) rest = map fst (tail (elems mt)) -- will be [] in Right (Just (pre,main,post,rest)) regex-tdfa-1.1.8/Text/Regex/TDFA/Common.hs0000644000000000000000000003757511536532136016263 0ustar0000000000000000{-# OPTIONS -funbox-strict-fields #-} -- | Common provides simple functions to the backend. It defines most -- of the data types. All modules should call error via the -- common_error function below. module Text.Regex.TDFA.Common where import Text.Regex.Base(RegexOptions(..)) {- By Chris Kuklewicz, 2007-2009. BSD License, see the LICENSE file. -} import Text.Show.Functions() import Data.Array.IArray(Array) import Data.IntSet.EnumSet2(EnumSet) import qualified Data.IntSet.EnumSet2 as Set(toList) import Data.IntMap.CharMap2(CharMap(..)) import Data.IntMap (IntMap) import qualified Data.IntMap as IMap (findWithDefault,assocs,toList,null,size,toAscList) import Data.IntSet(IntSet) import qualified Data.IntMap.CharMap2 as Map (assocs,toAscList,null) import Data.Sequence as S(Seq) --import Debug.Trace import Text.Regex.TDFA.IntArrTrieSet(TrieSet) {-# INLINE look #-} look :: Int -> IntMap a -> a look key imap = IMap.findWithDefault (common_error "Text.Regex.DFA.Common" ("key "++show key++" not found in look")) key imap common_error :: String -> String -> a common_error moduleName message = error ("Explict error in module "++moduleName++" : "++message) on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2 f `on` g = (\x y -> (g x) `f` (g y)) -- | after sort or sortBy the use of nub/nubBy can be replaced by norep/norepBy norep :: (Eq a) => [a]->[a] norep [] = [] norep x@[_] = x norep (a:bs@(c:cs)) | a==c = norep (a:cs) | otherwise = a:norep bs -- | after sort or sortBy the use of nub/nubBy can be replaced by norep/norepBy norepBy :: (a -> a -> Bool) -> [a] -> [a] norepBy _ [] = [] norepBy _ x@[_] = x norepBy eqF (a:bs@(c:cs)) | a `eqF` c = norepBy eqF (a:cs) | otherwise = a:norepBy eqF bs mapFst :: (Functor f) => (t -> t2) -> f (t, t1) -> f (t2, t1) mapFst f = fmap (\ (a,b) -> (f a,b)) mapSnd :: (Functor f) => (t1 -> t2) -> f (t, t1) -> f (t, t2) mapSnd f = fmap (\ (a,b) -> (a,f b)) fst3 :: (a,b,c) -> a fst3 (x,_,_) = x snd3 :: (a,b,c) -> b snd3 (_,x,_) = x thd3 :: (a,b,c) -> c thd3 (_,_,x) = x flipOrder :: Ordering -> Ordering flipOrder GT = LT flipOrder LT = GT flipOrder EQ = EQ noWin :: WinTags -> Bool noWin = null -- | Used to track elements of the pattern that accept characters or -- are anchors newtype DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord) instance Enum DoPa where toEnum = DoPa fromEnum = dopaIndex instance Show DoPa where showsPrec p (DoPa {dopaIndex=i}) = ('#':) . showsPrec p i -- | Control whether the pattern is multiline or case-sensitive like Text.Regex and whether to -- capture the subgroups (\1, \2, etc). Controls enabling extra anchor syntax. data CompOption = CompOption { caseSensitive :: Bool -- ^ True in blankCompOpt and defaultCompOpt , multiline :: Bool {- ^ False in blankCompOpt, True in defaultCompOpt. Compile for newline-sensitive matching. "By default, newline is a completely ordinary character with no special meaning in either REs or strings. With this flag, inverted bracket expressions and . never match newline, a ^ anchor matches the null string after any newline in the string in addition to its normal function, and the $ anchor matches the null string before any newline in the string in addition to its normal function." -} , rightAssoc :: Bool -- ^ True (and therefore Right associative) in blankCompOpt and defaultCompOpt , newSyntax :: Bool -- ^ False in blankCompOpt, True in defaultCompOpt. Add the extended non-POSIX syntax described in "Text.Regex.TDFA" haddock documentation. , lastStarGreedy :: Bool -- ^ False by default. This is POSIX correct but it takes space and is slower. -- Setting this to true will improve performance, and should be done -- if you plan to set the captureGroups execoption to False. } deriving (Read,Show) data ExecOption = ExecOption { captureGroups :: Bool -- ^ True by default. Set to False to improve speed (and space). } deriving (Read,Show) -- | Used by implementation to name certain Postions during -- matching. Identity of Position tag to set during a transition type Tag = Int -- | Internal use to indicate type of tag and preference for larger or smaller Positions data OP = Maximize | Minimize | Orbit | Ignore deriving (Eq,Show) -- | Internal NFA node identity number type Index = Int -- | Internal DFA identity is this Set of NFA Index type SetIndex = IntSet {- Index -} -- | Index into the text being searched type Position = Int -- | GroupIndex is for indexing submatches from capturing -- parenthesized groups (PGroup/Group) type GroupIndex = Int -- | GroupInfo collects the parent and tag information for an instance -- of a group data GroupInfo = GroupInfo { thisIndex, parentIndex :: GroupIndex , startTag, stopTag, flagTag :: Tag } deriving Show -- | The TDFA backend specific 'Regex' type, used by this module's RegexOptions and RegexMaker data Regex = Regex { regex_dfa :: DFA -- ^ starting DFA state , regex_init :: Index -- ^ index of starting state , regex_b_index :: (Index,Index) -- ^ indexes of smallest and largest states , regex_b_tags :: (Tag,Tag) -- ^ indexes of smallest and largest tags , regex_trie :: TrieSet DFA -- ^ All DFA states , regex_tags :: Array Tag OP -- ^ information about each tag , regex_groups :: Array GroupIndex [GroupInfo] -- ^ information about each group , regex_isFrontAnchored :: Bool -- ^ used for optimizing execution , regex_compOptions :: CompOption , regex_execOptions :: ExecOption } -- no deriving at all, the DFA may be too big to ever traverse! instance RegexOptions Regex CompOption ExecOption where blankCompOpt = CompOption { caseSensitive = True , multiline = False , rightAssoc = True , newSyntax = False , lastStarGreedy = False } blankExecOpt = ExecOption { captureGroups = True } defaultCompOpt = CompOption { caseSensitive = True , multiline = True , rightAssoc = True , newSyntax = True , lastStarGreedy = False } defaultExecOpt = ExecOption { captureGroups = True } setExecOpts e r = r {regex_execOptions=e} getExecOpts r = regex_execOptions r data WinEmpty = WinEmpty Instructions | WinTest WhichTest (Maybe WinEmpty) (Maybe WinEmpty) deriving Show -- | Internal NFA node type data QNFA = QNFA {q_id :: Index, q_qt :: QT} -- | Internal to QNFA type. data QT = Simple { qt_win :: WinTags -- ^ empty transitions to the virtual winning state , qt_trans :: CharMap QTrans -- ^ all ways to leave this QNFA to other or the same QNFA , qt_other :: QTrans -- ^ default ways to leave this QNFA to other or the same QNFA } | Testing { qt_test :: WhichTest -- ^ The test to perform , qt_dopas :: EnumSet DoPa -- ^ location(s) of the anchor(s) in the original regexp , qt_a, qt_b :: QT -- ^ use qt_a if test is True, else use qt_b } -- | Internal type to represent the tagged transition from one QNFA to -- another (or itself). The key is the Index of the destination QNFA. type QTrans = IntMap {- Destination Index -} [TagCommand] -- | Known predicates, just Beginning of Line (^) and End of Line ($). -- Also support for GNU extensions is being added: \` beginning of -- buffer, \' end of buffer, \< and \> for begin and end of words, \b -- and \B for word boundary and not word boundary. data WhichTest = Test_BOL | Test_EOL -- '^' and '$' (affected by multiline option) | Test_BOB | Test_EOB -- \` and \' begin and end buffer | Test_BOW | Test_EOW -- \< and \> begin and end word | Test_EdgeWord | Test_NotEdgeWord -- \b and \B word boundaries deriving (Show,Eq,Ord,Enum) -- | The things that can be done with a Tag. TagTask and -- ResetGroupStopTask are for tags with Maximize or Minimize OP -- values. ResetOrbitTask and EnterOrbitTask and LeaveOrbitTask are -- for tags with Orbit OP value. data TagTask = TagTask | ResetGroupStopTask | SetGroupStopTask | ResetOrbitTask | EnterOrbitTask | LeaveOrbitTask deriving (Show,Eq) -- | Ordered list of tags and their associated Task type TagTasks = [(Tag,TagTask)] -- | When attached to a QTrans the TagTask can be done before or after -- accepting the character. data TagUpdate = PreUpdate TagTask | PostUpdate TagTask deriving (Show,Eq) -- | Ordered list of tags and their associated update operation. type TagList = [(Tag,TagUpdate)] -- | A TagList and the location of the item in the original pattern -- that is being accepted. type TagCommand = (DoPa,TagList) -- | Ordered list of tags and their associated update operation to -- perform on an empty transition to the virtual winning state. type WinTags = TagList -- | Internal DFA node, identified by the Set of indices of the QNFA -- nodes it represents. data DFA = DFA { d_id :: SetIndex, d_dt :: DT } deriving(Show) data Transition = Transition { trans_many :: DFA -- ^ where to go (maximal), including respawning , trans_single :: DFA -- ^ where to go, not including respawning , trans_how :: DTrans -- ^ how to go, including respawning } -- | Internal to the DFA node data DT = Simple' { dt_win :: IntMap {- Source Index -} Instructions -- ^ Actions to perform to win , dt_trans :: CharMap Transition -- ^ Transition to accept Char , dt_other :: Transition -- ^ default accepting transition } | Testing' { dt_test :: WhichTest -- ^ The test to perform , dt_dopas :: EnumSet DoPa -- ^ location(s) of the anchor(s) in the original regexp , dt_a,dt_b :: DT -- ^ use dt_a if test is True else use dt_b } -- | Internal type to repesent the commands for the tagged transition. -- The outer IntMap is for the destination Index and the inner IntMap -- is for the Source Index. This is convenient since all runtime data -- going to the same destination must be compared to find the best. -- -- A Destination IntMap entry may have an empty Source IntMap if and -- only if the destination is the starting index and the NFA/DFA. -- This instructs the matching engine to spawn a new entry starting at -- the post-update position. type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,Instructions)) -- type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,RunState ())) -- | Internal convenience type for the text display code type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position,Bool))],[String])))])] -- | Positions for which a * was re-started while looping. Need to -- append locations at back but compare starting with front, so use -- Seq as a Queue. The initial position is saved in basePos (and a -- Maximize Tag), the middle positions in the Seq, and the final -- position is NOT saved in the Orbits (only in a Maximize Tag). -- -- The orderinal code is being written XXX TODO document it. data Orbits = Orbits { inOrbit :: !Bool -- True if enterOrbit, False if LeaveOrbit , basePos :: Position , ordinal :: (Maybe Int) , getOrbits :: !(Seq Position) } deriving (Show) -- | The 'newPos' and 'newFlags' lists in Instructions are sorted by, and unique in, the Tag values data Instructions = Instructions { newPos :: ![(Tag,Action)] -- False is preUpdate, True is postUpdate (there are no Orbit tags here) -- 2009 : Change to enum from bool? , newOrbits :: !(Maybe (Position -> OrbitTransformer)) } deriving (Show) data Action = SetPre | SetPost | SetVal Int deriving (Show,Eq) type OrbitTransformer = OrbitLog -> OrbitLog type OrbitLog = IntMap Orbits instance Show QNFA where show (QNFA {q_id = i, q_qt = qt}) = "QNFA {q_id = "++show i ++"\n ,q_qt = "++ show qt ++"\n}" instance Show QT where show = showQT showQT :: QT -> String showQT (Simple win trans other) = "{qt_win=" ++ show win ++ "\n, qt_trans=" ++ show (foo trans) ++ "\n, qt_other=" ++ show (foo' other) ++ "}" where foo :: CharMap QTrans -> [(Char,[(Index,[TagCommand])])] foo = mapSnd foo' . Map.toAscList foo' :: QTrans -> [(Index,[TagCommand])] foo' = IMap.toList showQT (Testing test dopas a b) = "{Testing "++show test++" "++show (Set.toList dopas) ++"\n"++indent' a ++"\n"++indent' b++"}" where indent' = init . unlines . map (spaces++) . lines . showQT spaces = replicate 9 ' ' instance Show DT where show = showDT indent :: [String] -> String indent = unlines . map (\x -> ' ':' ':x) showDT :: DT -> String showDT (Simple' w t o) = "Simple' { dt_win = " ++ seeWin1 ++ "\n , dt_trans = " ++ seeTrans1 ++ "\n , dt_other = " ++ seeOther1 o ++ "\n }" where seeWin1 | IMap.null w = "No win" | otherwise = indent . map show . IMap.assocs $ w seeTrans1 :: String seeTrans1 | Map.null t = "No (Char,Transition)" | otherwise = ('\n':) . indent $ map (\(char,Transition {trans_many=dfa,trans_single=dfa2,trans_how=dtrans}) -> concat ["(" ,show char ,", MANY " ,show (d_id dfa) ,", SINGLE " ,show (d_id dfa2) ,", \n" ,seeDTrans dtrans ,")"]) (Map.assocs t) seeOther1 (Transition {trans_many=dfa,trans_single=dfa2,trans_how=dtrans}) = concat ["(MANY " ,show (d_id dfa) ,", SINGLE " ,show (d_id dfa2) ,", \n" ,seeDTrans dtrans ,")"] showDT (Testing' wt d a b) = "Testing' { dt_test = " ++ show wt ++ "\n , dt_dopas = " ++ show d ++ "\n , dt_a = " ++ indent' a ++ "\n , dt_b = " ++ indent' b ++ "\n }" where indent' = init . unlines . (\s -> case s of [] -> [] (h:t) -> h : (map (spaces ++) t)) . lines . showDT spaces = replicate 10 ' ' seeDTrans :: DTrans -> String --seeDTrans x = concatMap (\(dest,y) -> unlines . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ y) (IMap.assocs x) seeDTrans x | IMap.null x = "No DTrans" seeDTrans x = concatMap seeSource (IMap.assocs x) where seeSource (dest,srcMap) | IMap.null srcMap = indent [show (dest,"SPAWN")] | otherwise = indent . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ srcMap -- spawnIns = Instructions { newPos = [(0,SetPost)], newOrbits = Nothing } instance Eq QT where t1@(Testing {}) == t2@(Testing {}) = (qt_test t1) == (qt_test t2) && (qt_a t1) == (qt_a t2) && (qt_b t1) == (qt_b t2) (Simple w1 (CharMap t1) o1) == (Simple w2 (CharMap t2) o2) = w1 == w2 && eqTrans && eqQTrans o1 o2 where eqTrans :: Bool eqTrans = (IMap.size t1 == IMap.size t2) && and (zipWith together (IMap.toAscList t1) (IMap.toAscList t2)) where together (c1,qtrans1) (c2,qtrans2) = (c1 == c2) && eqQTrans qtrans1 qtrans2 eqQTrans :: QTrans -> QTrans -> Bool eqQTrans = (==) _ == _ = False regex-tdfa-1.1.8/Text/Regex/TDFA/CorePattern.hs0000644000000000000000000007704711536532136017257 0ustar0000000000000000-- | The CorePattern module deconstructs the Pattern tree created by -- ReadRegex.parseRegex and returns a simpler Q/P tree with -- annotations at each Q node. This will be converted by the TNFA -- module into a QNFA finite automata. -- -- Of particular note, this Pattern to Q/P conversion creates and -- assigns all the internal Tags that will be used during the matching -- process, and associates the captures groups with the tags that -- represent their starting and ending locations and with their -- immediate parent group. -- -- Each Maximize and Minimize tag is held as either a preTag or a -- postTag by one and only one location in the Q/P tree. The Orbit -- tags are each held by one and only one Star node. Tags that stop a -- Group are also held in perhaps numerous preReset lists. -- -- The additional nullQ::nullView field of Q records the potentially -- complex information about what tests and tags must be used if the -- pattern unQ::P matches 0 zero characters. There can be redundancy -- in nullView, which is eliminated by cleanNullView. -- -- Uses recursive do notation. -- -- 2009 XXX TODO: we can avoid needing tags in the part of the pattern -- after the last capturing group (when right-associative). This is -- flipped for left-associative where the front of the pattern before -- the first capturing group needs no tags. The edge of these regions -- is subtle: both case needs a Maximize tag. One ought to be able to -- check the Pattern: if the root is PConcat then a scan from the end -- (start) looking for the first with an embedded PGroup can be found -- and the PGroup free elements can be wrapped in some new PNOTAG -- semantic indicator. module Text.Regex.TDFA.CorePattern(Q(..),P(..),WhichTest(..),Wanted(..) ,TestInfo,OP(..),SetTestInfo(..),NullView ,patternToQ,cleanNullView,cannotAccept,mustAccept) where import Control.Monad.RWS {- all -} import Data.Array.IArray(Array,(!),accumArray,listArray) import Data.List(sort) import Data.IntMap.EnumMap2(EnumMap) import qualified Data.IntMap.EnumMap2 as Map(singleton,null,assocs,keysSet) --import Data.Maybe(isNothing) import Data.IntSet.EnumSet2(EnumSet) import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,isSubsetOf) import Text.Regex.TDFA.Common {- all -} import Text.Regex.TDFA.Pattern(Pattern(..),starTrans) -- import Debug.Trace {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} --err :: String -> a --err = common_error "Text.Regex.TDFA.CorePattern" --debug :: (Show a) => a -> b -> b --debug _ = id -- Core Pattern Language data P = Empty -- Could be replaced by (Test Nothing)?? | Or [Q] | Seq Q Q | Star { getOrbit :: Maybe Tag -- tag to prioritize the need to keep track of length of each pass though q , resetOrbits :: [Tag] -- child star's orbits to reset (ResetOrbitTask) at all depths , firstNull :: Bool -- Usually True to mean the first pass may match 0 characters , unStar :: Q} | Test TestInfo -- Require the test to be true (merge with empty as (Test (Maybe TestInfo)) ??) | OneChar Pattern -- Bring the Pattern element that accepts a character | NonEmpty Q -- Don't let the Q pattern match nothing deriving (Show,Eq) -- The diagnostics about the pattern. Note that when unQ is 'Seq' the -- the preTag and postTag are Nothing but the preReset might have tags -- from PGroup injecting them. data Q = Q {nullQ :: NullView -- Ordered list of nullable views ,takes :: (Position,Maybe Position) -- Range of number of accepted characters ,preReset :: [Tag] -- Tags to "reset" (ResetGroupStopTask) (Only immediate children for efficiency) ,postSet :: [Tag] -- Tags to "set" (SetGroupStopTask) ,preTag,postTag :: Maybe Tag -- Tags assigned around this pattern (TagTask) ,tagged :: Bool -- Whether this node should be tagged -- patternToQ use only ,childGroups :: Bool -- Whether unQ has any PGroups -- patternToQ use only ,wants :: Wanted -- What kind of continuation is used by this pattern ,unQ :: P} deriving (Eq) type TestInfo = (WhichTest,DoPa) -- This is newtype'd to allow control over class instances -- This is a set of WhichTest where each test has associated pattern location information newtype SetTestInfo = SetTestInfo {getTests :: EnumMap WhichTest (EnumSet DoPa)} deriving (Eq) instance Monoid SetTestInfo where mempty = SetTestInfo mempty SetTestInfo x `mappend` SetTestInfo y = SetTestInfo (x `mappend` y) instance Show SetTestInfo where show (SetTestInfo sti) = "SetTestInfo "++show (mapSnd (Set.toList) $ Map.assocs sti) -- There may be several distinct ways for a subtree to conditionally -- (i.e. with a Test) or unconditionally accept 0 characters. These -- are in the list in order of preference, with most preferred listed -- first. type NullView = [(SetTestInfo,TagList)] -- Ordered list of null views, each is a set of tests and tags -- During the depth first traversal, children are told about tags by the parent. -- They may change Apply to Advice and they may generate new tags. data HandleTag = NoTag -- No tag at this boundary | Advice Tag -- tag at this boundary, applied at higher level in tree | Apply Tag -- tag at this boundary, may be applied at this node or passed to one child deriving (Show) -- Nodes in the tree are labeled by the type kind of continuation they -- prefer to be passed when processing. This makes it possible to -- create a smaller number of QNFA states and avoid creating wasteful -- QNFA states that won't be reachable in the final automata. -- -- In practice WantsBoth is treated identically to WantsQNFA and -- WantsBoth could be removed. data Wanted = WantsQNFA | WantsQT | WantsBoth | WantsEither deriving (Eq,Show) instance Show Q where show = showQ showQ :: Q -> String showQ q = "Q { nullQ = "++show (nullQ q)++ "\n , takes = "++show (takes q)++ "\n , preReset = "++show (preReset q)++ "\n , postSet = "++show (postSet q)++ "\n , preTag = "++show (preTag q)++ "\n , postTag = "++show (postTag q)++ "\n , tagged = "++show (tagged q)++ "\n , wants = "++show (wants q)++ "\n , unQ = "++ indent' (unQ q)++" }" where indent' = unlines . (\s -> case s of [] -> [] (h:t) -> h : (map (spaces ++) t)) . lines . show spaces = replicate 10 ' ' -- Smart constructors for NullView notNull :: NullView notNull = [] -- Shorthand for combining a preTag and a postTag -- preTags :: Maybe Tag -> Maybe Tag -> TagList -- preTags a b = promote a `mappend` promote b -- where promote = maybe [] (\x -> [(x,PreUpdate TagTask)]) promotePreTag :: HandleTag -> TagList promotePreTag = maybe [] (\x -> [(x,PreUpdate TagTask)]) . apply makeEmptyNullView :: HandleTag -> HandleTag -> NullView makeEmptyNullView a b = [(mempty, promotePreTag a ++ promotePreTag b)] makeTestNullView :: TestInfo -> HandleTag -> HandleTag -> NullView makeTestNullView (w,d) a b = [(SetTestInfo (Map.singleton w (Set.singleton d)), promotePreTag a ++ promotePreTag b)] tagWrapNullView :: HandleTag -> HandleTag -> NullView -> NullView tagWrapNullView a b oldNV = case (promotePreTag a, promotePreTag b) of ([],[]) -> oldNV (pre,post) -> do (oldTests,oldTasks) <- oldNV return (oldTests,pre++oldTasks++post) -- For PGroup, need to prepend reset tasks before others in nullView addGroupResetsToNullView :: [Tag] -> Tag -> NullView -> NullView addGroupResetsToNullView groupResets groupSet nv = [ (test, prepend (append tags) ) | (test,tags) <- nv ] where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetGroupStopTask)) $ groupResets append = (++[(groupSet,PreUpdate SetGroupStopTask)]) -- For PStar, need to put in the orbit TagTasks orbitWrapNullView :: Maybe Tag -> [Tag] -> NullView -> NullView orbitWrapNullView mOrbit orbitResets oldNV = case (mOrbit,orbitResets) of (Nothing,[]) -> oldNV (Nothing,_) -> do (oldTests,oldTasks) <- oldNV return (oldTests,prepend oldTasks) (Just o,_) -> do (oldTests,oldTasks) <- oldNV return (oldTests,prepend $ [(o,PreUpdate EnterOrbitTask)] ++ oldTasks ++ [(o,PreUpdate LeaveOrbitTask)]) where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetOrbitTask)) $ orbitResets -- The NullViews are ordered, and later test sets that contain the -- tests from any earlier entry will never be chosen. This function -- returns a list with these redundant elements removed. Note that -- the first unconditional entry in the list will be the last entry of -- the returned list since the empty set is a subset of any other set. cleanNullView :: NullView -> NullView cleanNullView [] = [] cleanNullView (first@(SetTestInfo sti,_):rest) | Map.null sti = first : [] -- optimization | otherwise = first : cleanNullView (filter (not . (setTI `Set.isSubsetOf`) . Map.keysSet . getTests . fst) rest) where setTI = Map.keysSet sti -- Ordered Sequence of two NullViews: all ordered combinations of tests and tags. -- Order of <- s1 and <- s2 is deliberately chosen to maintain preference priority mergeNullViews :: NullView -> NullView -> NullView mergeNullViews s1 s2 = cleanNullView $ do (test1,tag1) <- s1 (test2,tag2) <- s2 return (mappend test1 test2,mappend tag1 tag2) -- mergeNullViews = cleanNullView $ liftM2 (mappend *** mappend) -- Concatenated two ranges of number of accepted characters seqTake :: (Int, Maybe Int) -> (Int, Maybe Int) -> (Int, Maybe Int) seqTake (x1,y1) (x2,y2) = (x1+x2,liftM2 (+) y1 y2) -- Parallel combination of list of ranges of number of accepted characters orTakes :: [(Int, Maybe Int)] -> (Int,Maybe Int) orTakes [] = (0,Just 0) orTakes ts = let (xs,ys) = unzip ts in (minimum xs, foldl1 (liftM2 max) ys) -- Invariant: apply (toAdvice _ ) == mempty apply :: HandleTag -> Maybe Tag apply (Apply tag) = Just tag apply _ = Nothing toAdvice :: HandleTag -> HandleTag toAdvice (Apply tag) = Advice tag toAdvice s = s noTag :: HandleTag -> Bool noTag NoTag = True noTag _ = False fromHandleTag :: HandleTag -> Tag fromHandleTag (Apply tag) = tag fromHandleTag (Advice tag) = tag fromHandleTag _ = error "fromHandleTag" -- Predicates on the range of number of accepted characters varies :: Q -> Bool varies Q {takes = (_,Nothing)} = True varies Q {takes = (x,Just y)} = x/=y mustAccept :: Q -> Bool mustAccept q = (0/=) . fst . takes $ q canAccept :: Q -> Bool canAccept q = maybe True (0/=) $ snd . takes $ q cannotAccept :: Q -> Bool cannotAccept q = maybe False (0==) $ snd . takes $ q -- This converts then input Pattern to an analyzed Q structure with -- the tags assigned. -- -- The analysis is filled in by a depth first search and the tags are -- created top down and passed to children. Thus information flows up -- from the dfs of the children and simultaneously down in the form of -- pre and post HandleTag data. This bidirectional flow is handled -- declaratively by using the MonadFix (i.e. mdo). -- -- Invariant: A tag should exist in Q in exactly one place (and will -- be in a preTag,postTag, or getOrbit field). This is partly because -- PGroup needs to know the tags are around precisely the expression -- that it wants to record. If the same tag were in other branches -- then this would no longer be true. The tag may or may not also -- show up in one or more preReset list or resetOrbits list. -- -- This invariant is enforced by each node either taking -- responsibility (apply) for a passed in / created tag or sending it -- to exactly one child node. Other child nodes need to receive it -- via toAdvice. Leaf nodes are forced to apply any passed tags. -- -- There is a final "qwin of Q {postTag=ISet.singleton 1}" and an -- implied initial index tag of 0. -- -- favoring pushing Apply into the child postTag makes PGroup happier type PM = RWS (Maybe GroupIndex) [Either Tag GroupInfo] ([OP]->[OP],Tag) type HHQ = HandleTag -- m1 : info about left boundaary / preTag -> HandleTag -- m2 : info about right boundary / postTag -> PM Q -- There is no group 0 here, since it is always the whole match and has no parent of its own makeGroupArray :: GroupIndex -> [GroupInfo] -> Array GroupIndex [GroupInfo] makeGroupArray maxGroupIndex groups = accumArray (\earlier later -> later:earlier) [] (1,maxGroupIndex) filler where filler = map (\gi -> (thisIndex gi,gi)) groups fromRight :: [Either Tag GroupInfo] -> [GroupInfo] fromRight [] = [] fromRight ((Right x):xs) = x:fromRight xs fromRight ((Left _):xs) = fromRight xs partitionEither :: [Either Tag GroupInfo] -> ([Tag],[GroupInfo]) partitionEither = helper id id where helper :: ([Tag]->[Tag]) -> ([GroupInfo]->[GroupInfo]) -> [Either Tag GroupInfo] -> ([Tag],[GroupInfo]) helper ls rs [] = (ls [],rs []) helper ls rs ((Right x):xs) = helper ls (rs.(x:)) xs helper ls rs ((Left x):xs) = helper (ls.(x:)) rs xs -- Partial function: assumes starTrans has been run on the Pattern -- Note that the lazy dependency chain for this very zigzag: -- varies information is sent up the tree -- handle tags depend on that and sends m1 m2 down the tree -- makeGroup sends some tags to the writer (Right _) -- withParent listens to children send group info to writer -- and lazily looks resetGroupTags from aGroups, the result of all writer (Right _) -- preReset stores the resetGroupTags result of the lookup in the tree -- makeOrbit sends some tags to the writer (Left _) -- withOrbit listens to children send orbit info to writer for resetOrbitTags -- nullQ depends m1 m2 and resetOrbitTags and resetGroupTags and is sent up the tree patternToQ :: CompOption -> (Pattern,(GroupIndex,DoPa)) -> (Q,Array Tag OP,Array GroupIndex [GroupInfo]) patternToQ compOpt (pOrig,(maxGroupIndex,_)) = (tnfa,aTags,aGroups) where (tnfa,(tag_dlist,nextTag),groups) = runRWS monad startReader startState aTags = listArray (0,pred nextTag) (tag_dlist []) aGroups = makeGroupArray maxGroupIndex (fromRight groups) -- implicitly inside a PGroup 0 converted into a GroupInfo 0 undefined 0 1 monad = go (starTrans pOrig) (Advice 0) (Advice 1) -- startReader is accessed by getParentIndex and changed by nonCapture and withParent startReader :: Maybe GroupIndex startReader = Just 0 -- start inside group 0, capturing enabled -- The startState is only acted upon in the "uniq" command -- Tag 0 is Minimized and Tag 1 is maximized, next tag has value of 2 -- This is regardless of right or left associativity startState :: ([OP]->[OP],Tag) startState = ( (Minimize:) . (Maximize:) , 2) -- uniq uses MonadState and always returns an "Apply _" tag {-# INLINE uniq #-} uniq :: String -> PM HandleTag uniq _msg = fmap Apply (uniq' Maximize) -- uniq _msg = do x <- fmap Apply (uniq' Maximize) -- trace ('\n':msg ++ " Maximize "++show x) $ return x -- return x ignore :: String -> PM Tag ignore _msg = uniq' Ignore -- ignore _msg = do x <- uniq' Ignore -- trace ('\n':msg ++ " Ignore "++show x) $ return x -- return x {-# NOINLINE uniq' #-} uniq' :: OP -> PM Tag uniq' newOp = do (op,s) <- get -- generate the next tag with bias newOp let op' = op . (newOp:) s' = succ s put $! (op',s') return s {-# INLINE makeOrbit #-} -- Specialize the monad operations and give more meaningful names -- makeOrbit uses MonadState(uniq) and MonadWriter(tell/Left) makeOrbit :: PM (Maybe Tag) makeOrbit = do x <- uniq' Orbit -- trace ('\n':"PStar Orbit "++show x) $ do tell [Left x] return (Just x) {-# INLINE withOrbit #-} -- withOrbit uses MonadWriter(listens to makeOrbit/Left), collects -- children at all depths withOrbit :: PM a -> PM (a,[Tag]) withOrbit = listens childStars where childStars x = let (ts,_) = partitionEither x in ts {-# INLINE makeGroup #-} -- makeGroup usesMonadWriter(tell/Right) makeGroup :: GroupInfo -> PM () makeGroup = tell . (:[]) . Right {-# INLINE getParentIndex #-} -- getParentIndex uses MonadReader(ask) getParentIndex :: PM (Maybe GroupIndex) getParentIndex = ask {-# INLINE nonCapture #-} -- nonCapture uses MonadReader(local) to suppress getParentIndex to return Nothing nonCapture :: PM a -> PM a nonCapture = local (const Nothing) -- withParent uses MonadReader(local) to set getParentIndex to return (Just this) -- withParent uses MonadWriter(listens to makeGroup/Right) to return contained group indices (stopTag) -- withParent is only safe if getParentIndex has been checked to be not equal to Nothing (see PGroup below) -- Note use of laziness: the immediate children's group index is used to look up all copies of the -- group in aGroups, including copies that are not immediate children. withParent :: GroupIndex -> PM a -> PM (a,[Tag]) withParent this = local (const (Just this)) . listens childGroupInfo where childGroupInfo x = let (_,gs) = partitionEither x children :: [GroupIndex] children = norep . sort . map thisIndex -- filter to get only immediate children (efficiency) . filter ((this==).parentIndex) $ gs in concatMap (map flagTag . (aGroups!)) (this:children) -- combineConcat is a partial function: Must not pass in an empty list -- Policy choices: -- * pass tags to apply to children and have no preTag or postTag here (so none addded to nullQ) -- * middle 'mid' tag: give to left/front child as postTag so a Group there might claim it as a stopTag -- * if parent is Group then preReset will become non-empty combineConcat :: [Pattern] -> HHQ combineConcat | rightAssoc compOpt = foldr1 combineSeq . map go | otherwise = foldl1 combineSeq . map go -- libtre default where {-# INLINE front'end #-} front'end | rightAssoc compOpt = liftM2 (,) | otherwise = flip (liftM2 (flip (,))) combineSeq :: HHQ -> HHQ -> HHQ combineSeq pFront pEnd = (\ m1 m2 -> mdo let bothVary = varies qFront && varies qEnd a <- if noTag m1 && bothVary then uniq "combineSeq start" else return m1 b <- if noTag m2 && bothVary then uniq "combineSeq stop" else return m2 mid <- case (noTag a,canAccept qFront,noTag b,canAccept qEnd) of (False,False,_,_) -> return (toAdvice a) (_,_,False,False) -> return (toAdvice b) _ -> if tagged qFront || tagged qEnd then uniq "combineSeq mid" else return NoTag -- qFront <- pFront a mid -- qEnd <- pEnd (toAdvice mid) b (qFront,qEnd) <- front'end (pFront a mid) (pEnd (toAdvice mid) b) -- XXX: Perhaps a "produces" should be created to compliment "wants", -- then "produces qEnd" could be compared to "wants qFront" let wanted = if WantsEither == wants qEnd then wants qFront else wants qEnd return $ Q { nullQ = mergeNullViews (nullQ qFront) (nullQ qEnd) , takes = seqTake (takes qFront) (takes qEnd) , preReset = [], postSet = [], preTag = Nothing, postTag = Nothing , tagged = bothVary , childGroups = childGroups qFront || childGroups qEnd , wants = wanted , unQ = Seq qFront qEnd } ) go :: Pattern -> HHQ go pIn m1 m2 = let die = error $ "patternToQ cannot handle "++show pIn nil = return $ Q {nullQ=makeEmptyNullView m1 m2 ,takes=(0,Just 0) ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2 ,tagged=False,childGroups=False,wants=WantsEither ,unQ=Empty} one = return $ Q {nullQ=notNull ,takes=(1,Just 1) ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2 ,tagged=False,childGroups=False,wants=WantsQNFA ,unQ = OneChar pIn} test myTest = return $ Q {nullQ=makeTestNullView myTest m1 m2 ,takes=(0,Just 0) ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2 ,tagged=False,childGroups=False,wants=WantsQT ,unQ=Test myTest } xtra = newSyntax compOpt in case pIn of PEmpty -> nil POr [] -> nil POr [branch] -> go branch m1 m2 POr branches -> mdo -- 2009 : The PNonEmpty p as POr [PEmpty,p] takes no branch tracking tag. -- I claim this is because only accepting branches need tags, -- and the last accepting branch does not need a tag. -- Non-accepting possibilities can all commute to the front and -- become part of the nullQ. The accepting bits then need prioritizing. -- Does the above require changes in POr handling in TNFA? Yes. -- Have to always use nullQ instead of "recapitulating" it. -- Could also create a constant-writing tag instead of many index tags. -- Exasperation: This POr recursive mdo is very easy to make loop and lockup the program -- if needTags is False then there is no way to disambiguate branches so fewer tags are needed let needUniqTags = childGroups ans let needTags = varies ans || childGroups ans -- childGroups detects that "abc|a(b)c" needs tags a <- if noTag m1 && needTags then uniq "POr start" else return m1 -- whole POr b <- if noTag m2 && needTags then uniq "POr stop" else return m2 -- whole POr let aAdvice = toAdvice a -- all branches share 'aAdvice' bAdvice = toAdvice b -- last branch gets 'bAdvice', others may get own tag -- Due to the recursive-do, it seems that I have to put the if needTags into the op' newUniq = if needUniqTags then uniq "POr branch" else return bAdvice -- trace ("\nPOr sub "++show aAdvice++" "++show bAdvice++"needsTags is "++show needTags) $ return () -- The "bs" values are allocated in left-to-right order before the children in "qs" -- optimiztion: low priority for last branch is implicit, do not create separate tag here. bs <- fmap (++[bAdvice]) $ replicateM (pred $ length branches) newUniq -- 2 <= length ps -- create all the child branches in left-to-right order after the "bs" qs <- forM (zip branches bs) (\(branch,bTag) -> (go branch aAdvice bTag)) let wqs = map wants qs wanted = if any (WantsBoth==) wqs then WantsBoth else case (any (WantsQNFA==) wqs,any (WantsQT==) wqs) of (True,True) -> WantsBoth (True,False) -> WantsQNFA (False,True) -> WantsQT (False,False) -> WantsEither nullView = cleanNullView . tagWrapNullView a b . concatMap nullQ $ qs -- The nullView computed above takes the nullQ of the branches and combines them. This -- assumes that the pre/post tags of the children are also part of the nullQ values. So -- for consistency, POr must then add its own pre/post tags to its nullQ value. Note that -- concatMap sets the left-to-right preference when choosing the null views. let ans = Q { nullQ = nullView , takes = orTakes . map takes $ qs , preReset = [], postSet = [] , preTag = apply a, postTag = apply b , tagged = needTags , childGroups = any childGroups qs , wants = wanted , unQ = Or qs } return ans PConcat [] -> nil -- fatal to pass [] to combineConcat PConcat ps -> combineConcat ps m1 m2 PStar mayFirstBeNull p -> mdo let accepts = canAccept q -- if needsOrbit is False then there is no need to disambiguate captures on each orbit -- Both checks are useful because (varies q) of True does not imply (childGroups q) of True when under PNonCapture needsOrbit = varies q && childGroups q -- if needsOrbit then must check start/stop before the Orbit tag -- if accepts then must check start/stop of whole pattern needsTags = needsOrbit || accepts -- important that needsOrbit implies needsTags a <- if noTag m1 && needsTags then uniq "PStar start" else return m1 b <- if noTag m2 && needsTags then uniq "PStar stop" else return m2 mOrbit <- if needsOrbit then makeOrbit else return Nothing -- any Orbit tag is created after the pre and post tags -- test1 <- if tagged q then uniq "not-TEST1" Minimize else return NoTag -- XXX XXX 1.1.5 testing second NoTag replaced with (toAdvice b) (q,resetOrbitTags) <- withOrbit (go p NoTag (toAdvice b)) -- all contained orbit tags get listened to (not including this one). let nullView | mayFirstBeNull = cleanNullView $ childViews ++ skipView | otherwise = skipView where childViews = tagWrapNullView a b . orbitWrapNullView mOrbit resetOrbitTags $ nullQ q skipView = makeEmptyNullView a b return $ Q { nullQ = nullView , takes = (0,if accepts then Nothing else (Just 0)) , preReset = [], postSet = [] , preTag = apply a, postTag = apply b , tagged = needsTags , childGroups = childGroups q , wants = WantsQT , unQ =Star { getOrbit = mOrbit , resetOrbits = resetOrbitTags , firstNull = mayFirstBeNull , unStar = q } } PCarat dopa -> test (Test_BOL,dopa) PDollar dopa -> test (Test_EOL,dopa) PChar {} -> one PDot {} -> one PAny {} -> one PAnyNot {} -> one -- CompOption's newSyntax enables these escaped anchors PEscape dopa '`' | xtra -> test (Test_BOB,dopa) PEscape dopa '\'' | xtra -> test (Test_EOB,dopa) PEscape dopa '<' | xtra -> test (Test_BOW,dopa) PEscape dopa '>' | xtra -> test (Test_EOW,dopa) PEscape dopa 'b' | xtra -> test (Test_EdgeWord,dopa) PEscape dopa 'B' | xtra -> test (Test_NotEdgeWord,dopa) -- otherwise escape codes are just the escaped character PEscape {} -> one -- A PGroup node in the Pattern tree does not become a node -- in the Q/P tree. A PGroup can share and pass along a -- preTag (with Advice) with other branches, but will pass -- down an Apply postTag. -- -- If the parent index is Nothing then this is part of a -- non-capturing subtree and ignored. This is a lazy and -- efficient alternative to rebuidling the tree with PGroup -- Nothing replacing PGroup (Just _). -- -- Guarded by the getParentIndex /= Nothing check is the -- withParent command. -- PGroup Nothing p -> go p m1 m2 PGroup (Just this) p -> do mParent <- getParentIndex case mParent of Nothing -> go p m1 m2 -- just like PGroup Nothing p Just parent -> do -- 'a' may be Advice or Apply from parent or Apply created here a <- if noTag m1 then uniq "PGroup start" else return m1 b <- if noTag m2 then uniq "PGroup stop" else return m2 flag <- ignore "PGroup ignore" {- -- 'b' may be Apply from parent or Apply created here b <- if isNothing (apply m2) then uniq "PGroup" else return m2 -} (q,resetGroupTags) <- withParent this (go p a b) -- all immediate child groups stop tags get listened to. -- 2009: makeGroup performs a tell, why after withParent? I am no longer sure. makeGroup (GroupInfo this parent (fromHandleTag a) (fromHandleTag b) flag) return $ q { nullQ = addGroupResetsToNullView resetGroupTags flag (nullQ q) , tagged = True , childGroups = True , preReset = resetGroupTags `mappend` (preReset q) , postSet = (postSet q) `mappend` [flag] } -- A PNonCapture node in the Pattern tree does not become a -- node in the Q/P tree. It sets the parent to Nothing while -- processing the sub-tree. PNonCapture p -> nonCapture (go p m1 m2) -- these are here for completeness of the case branches, currently starTrans replaces them all PPlus {} -> die PQuest {} -> die PBound {} -> die -- PNonEmpty is deprecated, and not produced in Pattern by starTrans anymore PNonEmpty {} -> die {- Similar to change in WinTags for QT/QNFA: Change the NullView to use a tasktags instead of wintags since they are all PreUpdate -- PNonEmpty means the child pattern p can be skipped by -- bypassing the pattern. This is only used in the case p -- can accept 0 and can accept more than zero characters -- (thus the assertions, enforcted by CorePattern.starTrans). -- The important thing about this case is intercept the -- "accept 0" possibility and replace with "skip". PNonEmpty p -> mdo let needsTags = canAccept q a <- if noTag m1 && needsTags then uniq Minimize else return m1 b <- if noTag m2 && needsTags then uniq Maximize else return m2 q <- go p (toAdvice a) (toAdvice b) when (not needsTags) (err $ "PNonEmpty could not accept characters: "++show (p,pOrig)) when (mustAccept q) (err $ "patternToQ : PNonEmpty provided with a *mustAccept* pattern: "++show (p,pOrig)) return $ Q { nullQ = emptyNull (preTags (apply a) (apply b)) -- The meaning of NonEmpty , takes = (0,snd (takes q)) -- like Or, drop lower bound to 0 , preReset = [] , preTag = apply a, postTag = apply b -- own the closing tag so it will not end a PGroup , tagged = needsTags , childGroups = childGroups q , wants = wants q -- the test case is "x" =~ "(.|$){1,3}" , unQ = NonEmpty q } -} {- emptyNull :: TagList -> NullView emptyNull tags = (mempty, tags) : [] testNull :: TestInfo -> TagList -> NullView testNull (w,d) tags = (SetTestInfo (Map.singleton w (Set.singleton d)), tags) : [] -- Prepend tags to nullView addTagsToNullView :: TagList -> NullView -> NullView addTagsToNullView [] oldNV = oldNV addTagsToNullView tags oldNV= do (oldTest,oldTags) <- oldNV return (oldTest,tags `mappend` oldTags) -} -- xxx todo -- -- see of PNonEmpty -> NonEmpty -> TNFA is really smarter than POr about tags regex-tdfa-1.1.8/Text/Regex/TDFA/IntArrTrieSet.hs0000644000000000000000000000541311536532136017514 0ustar0000000000000000{- | This creates a lazy Trie based on a finite range of Ints and is used to memorize a function over the subsets of this range. To create a Trie you need two supply 2 things * Range of keys to bound * A function or functions used to construct the value for a subset of keys The Trie uses the Array type internally. -} module Text.Regex.TDFA.IntArrTrieSet where {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} import Data.Array.IArray(Array,(!),listArray) data TrieSet v = TrieSet { value :: v , next :: Array Int (TrieSet v) } -- | This is the accessor for the Trie. The list of keys should be -- sorted. lookupAsc :: TrieSet v -> [Int] -> v lookupAsc (TrieSet {value=v,next=n}) = (\keys -> case keys of [] -> v (key:keys') -> lookupAsc (n!key) keys') -- | This is a Trie constructor for a complete range of keys. fromBounds :: (Int,Int) -- ^ (lower,upper) range of keys, lower<=upper -> ([Int] -> v) -- ^ Function from list of keys to its value. -- It must work for distinct ascending lists. -> TrieSet v -- ^ The constructed Trie fromBounds (start,stop) keysToValue = build id start where build keys low = TrieSet { value = keysToValue (keys []) , next = listArray (low,stop) [build (keys.(x:)) (succ x) | x <- [low..stop] ] } -- | This is a Trie constructor for a complete range of keys that uses -- a function from single values and a merge operation on values to -- fill the Trie. fromSinglesMerge :: v -- ^ value for (lookupAsc trie []) -> (v->v->v) -- ^ merge operation on values -> (Int,Int) -- ^ (lower,upper) range of keys, lower<=upper -> (Int->v) -- ^ Function from a single key to its value -> TrieSet v -- ^ The constructed Trie fromSinglesMerge emptyValue mergeValues bound keyToValue = trieSet where trieSet = fromBounds bound keysToValue' keysToValue' keys = case keys of [] -> emptyValue [key] -> keyToValue key _ -> mergeValues (keysToValue (init keys)) (keysToValue [last keys]) keysToValue = lookupAsc trieSet -- | This is a Trie constructor for a complete range of keys that uses -- a function from single values and a sum operation of values to fill -- the Trie. fromSinglesSum :: ([v]->v) -- ^ summation operation for values -> (Int,Int) -- ^ (lower,upper) range of keys, lower <= upper -> (Int->v) -- ^ Function from a single key to its value -> TrieSet v -- ^ The constructed Trie fromSinglesSum mergeValues bound keyToValue = trieSet where trieSet = fromBounds bound keysToValue' keysToValue' = mergeValues . map keyToValue regex-tdfa-1.1.8/Text/Regex/TDFA/Pattern.hs0000644000000000000000000004270011536532137016433 0ustar0000000000000000-- | This "Text.Regex.TDFA.Pattern" module provides the 'Pattern' data -- type and its subtypes. This 'Pattern' type is used to represent -- the parsed form of a Regular Expression. module Text.Regex.TDFA.Pattern (Pattern(..) ,PatternSet(..) ,PatternSetCharacterClass(..) ,PatternSetCollatingElement(..) ,PatternSetEquivalenceClass(..) ,GroupIndex ,DoPa(..) ,showPattern -- ** Internal use ,starTrans -- ** Internal use, Operations to support debugging under ghci ,starTrans',simplify',dfsPattern ) where {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} import Data.List(intersperse,partition) import qualified Data.Set as Set(toAscList,toList) import Data.Set(Set) -- XXX EnumSet import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error) err :: String -> a err = common_error "Text.Regex.TDFA.Pattern" -- | Pattern is the type returned by the regular expression parser. -- This is consumed by the CorePattern module and the tender leaves -- are nibbled by the TNFA module. data Pattern = PEmpty | PGroup (Maybe GroupIndex) Pattern -- Nothing to indicate non-matching PGroup (Nothing never used!) | POr [Pattern] -- flattened by starTrans | PConcat [Pattern] -- flattened by starTrans | PQuest Pattern -- eliminated by starTrans | PPlus Pattern -- eliminated by starTrans | PStar Bool Pattern -- True means mayFirstBeNull is True | PBound Int (Maybe Int) Pattern -- eliminated by starTrans -- The rest of these need an index of where in the regex string it is from | PCarat {getDoPa::DoPa} | PDollar {getDoPa::DoPa} -- The following test and accept a single character | PDot {getDoPa::DoPa} -- Any character (newline?) at all | PAny {getDoPa::DoPa,getPatternSet::PatternSet} -- Square bracketed things | PAnyNot {getDoPa::DoPa,getPatternSet::PatternSet} -- Inverted square bracketed things | PEscape {getDoPa::DoPa,getPatternChar::Char} -- Backslashed Character | PChar {getDoPa::DoPa,getPatternChar::Char} -- Specific Character -- The following are semantic tags created in starTrans, not the parser | PNonCapture Pattern -- introduced by starTrans | PNonEmpty Pattern -- introduced by starTrans deriving (Eq,Show) -- | I have not been checking, but this should have the property that -- parsing the resulting string should result in an identical Pattern. -- This is not true if starTrans has created PNonCapture and PNonEmpty -- values or a (PStar False). The contents of a "[ ]" grouping are -- always shown in a sorted canonical order. showPattern :: Pattern -> String showPattern pIn = case pIn of PEmpty -> "()" PGroup _ p -> paren (showPattern p) POr ps -> concat $ intersperse "|" (map showPattern ps) PConcat ps -> concatMap showPattern ps PQuest p -> (showPattern p)++"?" PPlus p -> (showPattern p)++"+" -- If PStar has mayFirstBeNull False then reparsing will forget this flag PStar _ p -> (showPattern p)++"*" PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}" PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj -- PCarat _ -> "^" PDollar _ -> "$" PDot _ -> "." PAny _ ps -> ('[':show ps)++"]" PAnyNot _ ps -> ('[':'^':show ps)++"]" PEscape _ c -> '\\':c:[] PChar _ c -> [c] -- The following were not directly from the parser, and will not be parsed in properly PNonCapture p -> showPattern p PNonEmpty p -> showPattern p where {- groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys else (if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys groupRange x n [] = if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[] -} paren s = ('(':s)++")" data PatternSet = PatternSet (Maybe (Set Char)) (Maybe (Set PatternSetCharacterClass)) (Maybe (Set PatternSetCollatingElement)) (Maybe (Set PatternSetEquivalenceClass)) deriving (Eq) instance Show PatternSet where showsPrec i (PatternSet s scc sce sec) = let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s charSpec = (if ']' `elem` special then (']':) else id) (byRange normal) scc' = maybe "" ((concatMap show) . Set.toList) scc sce' = maybe "" ((concatMap show) . Set.toList) sce sec' = maybe "" ((concatMap show) . Set.toList) sec in shows charSpec . showsPrec i scc' . showsPrec i sce' . showsPrec i sec' . if '-' `elem` special then showChar '-' else id where byRange xAll@(x:xs) | length xAll <=3 = xAll | otherwise = groupRange x 1 xs byRange _ = undefined groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys else (if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys groupRange x n [] = if n <=3 then take n [x..] else x:'-':(toEnum (pred n+fromEnum x)):[] newtype PatternSetCharacterClass = PatternSetCharacterClass {unSCC::String} deriving (Eq,Ord) newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String} deriving (Eq,Ord) newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String} deriving (Eq,Ord) instance Show PatternSetCharacterClass where showsPrec _ p = showChar '[' . showChar ':' . shows (unSCC p) . showChar ':' . showChar ']' instance Show PatternSetCollatingElement where showsPrec _ p = showChar '[' . showChar '.' . shows (unSCE p) . showChar '.' . showChar ']' instance Show PatternSetEquivalenceClass where showsPrec _ p = showChar '[' . showChar '=' . shows (unSEC p) . showChar '=' . showChar ']' -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- | Do the transformation and simplification in a single traversal. -- This removes the PPlus, PQuest, and PBound values, changing to POr -- and PEmpty and PStar True/False. For some PBound values it adds -- PNonEmpty and PNonCapture semantic marker. It also simplifies to -- flatten out nested POr and PConcat instances and eliminate some -- unneeded PEmpty values. starTrans :: Pattern -> Pattern starTrans = dfsPattern (simplify' . starTrans') -- | Apply a Pattern transfomation function depth first dfsPattern :: (Pattern -> Pattern) -- ^ The transformation function -> Pattern -- ^ The Pattern to transform -> Pattern -- ^ The transformed Pattern dfsPattern f = dfs where unary c = f . c . dfs dfs pattern = case pattern of POr ps -> f (POr (map dfs ps)) PConcat ps -> f (PConcat (map dfs ps)) PGroup i p -> unary (PGroup i) p PQuest p -> unary PQuest p PPlus p -> unary PPlus p PStar i p -> unary (PStar i) p PBound i mi p -> unary (PBound i mi) p _ -> f pattern {- Replace by PNonCapture unCapture = dfsPattern unCapture' where unCapture' (PGroup (Just _) p) = PGroup Nothing p unCapture' x = x -} reGroup :: Pattern -> Pattern reGroup p@(PConcat xs) | 2 <= length xs = PGroup Nothing p reGroup p@(POr xs) | 2 <= length xs = PGroup Nothing p reGroup p = p starTrans' :: Pattern -> Pattern starTrans' pIn = case pIn of -- We know that "p" has been simplified in each of these cases: PQuest p -> POr [p,PEmpty] {- The PStar should not capture 0 characters on its first iteration, so set its mayFirstBeNull flag to False -} PPlus p | canOnlyMatchNull p -> p | otherwise -> asGroup $ PConcat [reGroup p,PStar False p] {- "An ERE matching a single character repeated by an '*' , '?' , or an interval expression shall not match a null expression unless this is the only match for the repetition or it is necessary to satisfy the exact or minimum number of occurrences for the interval expression." -} {- p? is p|PEmpty which prefers even a 0-character match for p p{0,1} is p? is POr [p,PEmpty] p{0,2} is (pp?)? NOT p?p? p{0,3} is (p(pp?)?)? p{1,2} is like pp{0,1} is like pp? but see below p{2,5} is ppp{0,3} is pp(p(pp?)?)? But this is not always right. Because if the second use of p in p?p? matches 0 characters then the perhaps non 0 character match of the first p is overwritten. We need a new operation "p!" that means "p?" unless "p" match 0 characters, in which case skip p as if it failed in "p?". Thus when p cannot accept 0 characters p! and p? are equivalent. And when p can only match 0 characters p! is PEmpty. So for simplicity, only use ! when p can match 0 characters but not only 0 characters. Call this (PNonEmpty p) in the Pattern type. p! is PNonEmpty p is POr [PEmpty,p] IS THIS TRUE? Use QuickCheck? Note that if p cannot match 0 characters then p! is p? and vice versa The p{0,1} is still always p? and POr [p,PEmpty] Now p{0,2} means p?p! or (pp!)? and p{0,3} means (p(pp!)!)? or p?p!p! Equivalently p?p! and p?p!p! And p{2,2} is p'p and p{3,3} is p'p'p and p{4} is p'p'p'p The p{1,2} is pp! and p{1,3} is pp!p! or p(pp!)! And p{2,4} means p'pp!p! and p{3,6} is p'p'pp!p!p! or p'p'p(p(pp!)!)! But this second form still has a problem: the (pp!)! can have the first p match 0 and the second p match non-zero. This showed up for (.|$){1,3} since ($.!)! should not be a valid path but altered the qt_win commands. Thus only p'p'pp!p!p! has the right semantics. For completeness: if p can only match only 0 characters then the cases are p{0,0} is (), p{0,_} = p?, p{_,_} is p if p can match 0 or non-zero characters then cases are p{0,0} is (), p{0,1} is (p)?, p{0,2} is (pp!)?, p{0,3} is (pp!p!)? p{1,1} is p, p{1,2} is pp!, p{1,3} is pp!p!, p{1,4} is pp!p!p! p{2,2} is p'p, p{2,3} is p'pp!, p{2,4} is p'pp!p! or p'p(pp!)! p{2,5} is p'pp!p!p! or p'p(p(pp!)!)! p{3,3} is p'p'p, p{3,4} is p'p'pp!, p{3,5} is p'p'pp!p!, p{3,6} is p'p'pp!p!p! if p can only match 1 or more characters then cases are p{0,0} is () p{0,1} is p?, p{0,2} is (pp?)?, p{0,3} is (p(pp?)?)?, p{0,4} is (pp{0,3})? p{1,1} is p, p{1,j} is pp{0,pred j} p{2,2} is p'p, p{2,3} is p'pp?, p{2,4} is p'p(pp?)?, p{2,5} = p'p{1,4} = p'(pp{0,3}) p{3,3} is p'p'p, p{3,4} is p'p'pp?, p{3,5} is p'p'p(pp?)?, p{3,6} is And by this logic, the PStar False is really p*! So p{0,} is p* and p{1,} is pp*! and p{2,} is p'pp*! and p{3,} is p'p'pp*! The (nonEmpty' p) below is the only way PNonEmpty is introduced into the Pattern. It is always preceded by p inside a PConcat list. The p involved never simplifies to PEmpty. Thus it is impossible to have PNonEmpty directly nested, i.e. (PNonEmpty (PNonEmpty _)) never occurs even after simplifications. The (nonCapture' p) below is the only way PNonCapture is introduced into the Pattern. It is always followed by p inside a PConcat list. -} -- Easy cases PBound i _ _ | i<0 -> PEmpty -- impossibly malformed PBound i (Just j) _ | i>j -> PEmpty -- impossibly malformed PBound _ (Just 0) _ -> PEmpty -- Medium cases PBound 0 Nothing p | canOnlyMatchNull p -> quest p | otherwise -> PStar True p PBound 0 (Just 1) p -> quest p -- Hard cases PBound i Nothing p | canOnlyMatchNull p -> p | otherwise -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p,PStar False p] where nc'p = nonCapture' p PBound 0 (Just j) p | canOnlyMatchNull p -> quest p -- The first operation is quest NOT nonEmpty. This can be tested with -- "a\nb" "((^)?|b){0,3}" and "a\nb" "((^)|b){0,3}" | otherwise -> quest . (concat' p) $ apply (nonEmpty' . (concat' p)) (j-2) (nonEmpty' p) {- 0.99.6 remove | cannotMatchNull p -> apply (quest' . (concat' p)) (pred j) (quest' p) | otherwise -> POr [ simplify' (PConcat (p : replicate (pred j) (nonEmpty' p))) , PEmpty ] -} {- 0.99.6 add, 0.99.7 remove PBound i (Just j) p | canOnlyMatchNull p -> p | i == j -> PConcat $ apply (p':) (pred i) [p] | otherwise -> PConcat $ apply (p':) (pred i) [p,apply (nonEmpty' . (concat' p)) (j-i-1) (nonEmpty' p) ] where p' = nonCapture' p -} {- 0.99.7 add -} PBound i (Just j) p | canOnlyMatchNull p -> p | i == j -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p] | otherwise -> asGroup . PConcat $ apply (nc'p:) (pred i) [reGroup p,apply (nonEmpty' . (concat' p)) (j-i-1) (ne'p) ] where nc'p = nonCapture' p ne'p = nonEmpty' p {- 0.99.6 | cannotMatchNull p -> PConcat $ apply (p':) (pred i) $ (p:) $ [apply (quest' . (concat' p)) (pred (j-i)) (quest' p)] | otherwise -> PConcat $ (replicate (pred i) p') ++ p : (replicate (j-i) (nonEmpty' p)) -} PStar mayFirstBeNull p | canOnlyMatchNull p -> if mayFirstBeNull then quest p else PEmpty | otherwise -> pass -- Left intact PEmpty -> pass PGroup {} -> pass POr {} -> pass PConcat {} -> pass PCarat {} -> pass PDollar {} -> pass PDot {} -> pass PAny {} -> pass PAnyNot {} -> pass PEscape {} -> pass PChar {} -> pass PNonCapture {} -> pass PNonEmpty {} -> pass -- TODO : remove PNonEmpty from program where quest = (\ p -> POr [p,PEmpty]) -- require p to have been simplified -- quest' = (\ p -> simplify' $ POr [p,PEmpty]) -- require p to have been simplified concat' a b = simplify' $ PConcat [reGroup a,reGroup b] -- require a and b to have been simplified nonEmpty' = (\ p -> simplify' $ POr [PEmpty,p]) -- 2009-01-19 : this was PNonEmpty nonCapture' = PNonCapture apply f n x = foldr ($) x (replicate n f) -- function f applied n times to x : f^n(x) asGroup p = PGroup Nothing (simplify' p) pass = pIn -- | Function to transform a pattern into an equivalent, but less -- redundant form. Nested 'POr' and 'PConcat' are flattened. PEmpty -- is propagated. simplify' :: Pattern -> Pattern simplify' x@(POr _) = let ps' = case span notPEmpty (flatten x) of (notEmpty,[]) -> notEmpty (notEmpty,_:rest) -> notEmpty ++ (PEmpty:filter notPEmpty rest) -- keep 1st PEmpty only in case ps' of [] -> PEmpty [p] -> p _ -> POr ps' simplify' x@(PConcat _) = let ps' = filter notPEmpty (flatten x) in case ps' of [] -> PEmpty [p] -> p _ -> PConcat ps' -- PConcat ps' simplify' (PStar _ PEmpty) = PEmpty simplify' (PNonCapture PEmpty) = PEmpty -- 2009, perhaps useful --simplify' (PNonEmpty PEmpty) = err "simplify' (PNonEmpty PEmpty) = should be Impossible!" -- 2009 simplify' other = other -- | Function to flatten nested POr or nested PConcat applicataions. flatten :: Pattern -> [Pattern] flatten (POr ps) = (concatMap (\x -> case x of POr ps' -> ps' p -> [p]) ps) flatten (PConcat ps) = (concatMap (\x -> case x of PConcat ps' -> ps' p -> [p]) ps) flatten _ = err "flatten can only be applied to POr or PConcat" notPEmpty :: Pattern -> Bool notPEmpty PEmpty = False notPEmpty _ = True -- | Determines if pIn will fail or accept [] and never accept any -- characters. Treat PCarat and PDollar as True. canOnlyMatchNull :: Pattern -> Bool canOnlyMatchNull pIn = case pIn of PEmpty -> True PGroup _ p -> canOnlyMatchNull p POr ps -> all canOnlyMatchNull ps PConcat ps -> all canOnlyMatchNull ps PQuest p -> canOnlyMatchNull p PPlus p -> canOnlyMatchNull p PStar _ p -> canOnlyMatchNull p PBound _ (Just 0) _ -> True PBound _ _ p -> canOnlyMatchNull p PCarat _ -> True PDollar _ -> True PNonCapture p -> canOnlyMatchNull p -- PNonEmpty p -> canOnlyMatchNull p -- like PQuest _ ->False {- -- | If 'cannotMatchNull' returns 'True' then it is known that the -- 'Pattern' will never accept an empty string. If 'cannotMatchNull' -- returns 'False' then it is possible but not definite that the -- 'Pattern' could accept an empty string. cannotMatchNull :: Pattern -> Bool cannotMatchNull pIn = case pIn of PEmpty -> False PGroup _ p -> cannotMatchNull p POr [] -> False POr ps -> all cannotMatchNull ps PConcat [] -> False PConcat ps -> any cannotMatchNull ps PQuest _ -> False PPlus p -> cannotMatchNull p PStar {} -> False PBound 0 _ _ -> False PBound _ _ p -> cannotMatchNull p PCarat _ -> False PDollar _ -> False PNonCapture p -> cannotMatchNull p -- PNonEmpty _ -> False -- like PQuest _ -> True -} regex-tdfa-1.1.8/Text/Regex/TDFA/ReadRegex.hs0000644000000000000000000001451511536532137016667 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-missing-signatures #-} -- | This is a POSIX version of parseRegex that allows NUL characters. -- Lazy/Possessive/Backrefs are not recognized. Anchors ^ and $ are -- recognized. -- -- The PGroup returned always have (Maybe GroupIndex) set to (Just _) -- and never to Nothing. module Text.Regex.TDFA.ReadRegex (parseRegex) where {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} import Text.Regex.TDFA.Pattern {- all -} import Text.ParserCombinators.Parsec((<|>), (), unexpected, try, runParser, many, getState, setState, CharParser, ParseError, sepBy1, option, notFollowedBy, many1, lookAhead, eof, between, string, noneOf, digit, char, anyChar) import Control.Monad(liftM, when, guard) import qualified Data.Set as Set(fromList) -- | BracketElement is internal to this module data BracketElement = BEChar Char | BEChars String | BEColl String | BEEquiv String | BEClass String -- | Return either an error message or a tuple of the Pattern and the -- largest group index and the largest DoPa index (both have smallest -- index of 1). Since the regular expression is supplied as [Char] it -- automatically supports unicode and '\NUL' characters. parseRegex :: String -> Either ParseError (Pattern,(GroupIndex,DoPa)) parseRegex x = runParser (do pat <- p_regex eof (lastGroupIndex,lastDopa) <- getState return (pat,(lastGroupIndex,DoPa lastDopa))) (0,0) x x p_regex :: CharParser (GroupIndex,Int) Pattern p_regex = liftM POr $ sepBy1 p_branch (char '|') -- man re_format helps alot, it says one-or-more pieces so this is -- many1 not many. Use "()" to indicate an empty piece. p_branch = liftM PConcat $ many1 p_piece p_piece = (p_anchor <|> p_atom) >>= p_post_atom -- correct specification p_atom = p_group <|> p_bracket <|> p_char "an atom" group_index :: CharParser (GroupIndex,Int) (Maybe GroupIndex) group_index = do (gi,ci) <- getState let index = succ gi setState (index,ci) return (Just index) p_group = lookAhead (char '(') >> do index <- group_index liftM (PGroup index) $ between (char '(') (char ')') p_regex -- p_post_atom takes the previous atom as a parameter p_post_atom atom = (char '?' >> return (PQuest atom)) <|> (char '+' >> return (PPlus atom)) <|> (char '*' >> return (PStar True atom)) <|> p_bound atom <|> return atom p_bound atom = try $ between (char '{') (char '}') (p_bound_spec atom) p_bound_spec atom = do lowS <- many1 digit let lowI = read lowS highMI <- option (Just lowI) $ try $ do _ <- char ',' -- parsec note: if 'many digits' fails below then the 'try' ensures -- that the ',' will not match the closing '}' in p_bound, same goes -- for any non '}' garbage after the 'many digits'. highS <- many digit if null highS then return Nothing -- no upper bound else do let highI = read highS guard (lowI <= highI) return (Just (read highS)) return (PBound lowI highMI atom) -- An anchor cannot be modified by a repetition specifier p_anchor = (char '^' >> liftM PCarat char_index) <|> (char '$' >> liftM PDollar char_index) <|> try (do _ <- string "()" index <- group_index return $ PGroup index PEmpty) "empty () or anchor ^ or $" char_index = do (gi,ci) <- getState let ci' = succ ci setState (gi,ci') return (DoPa ci') p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char where p_dot = char '.' >> char_index >>= return . PDot p_left_brace = try $ (char '{' >> notFollowedBy digit >> char_index >>= return . (`PChar` '{')) p_escaped = char '\\' >> anyChar >>= \c -> char_index >>= return . (`PEscape` c) p_other_char = noneOf specials >>= \c -> char_index >>= return . (`PChar` c) where specials = "^.[$()|*+?{\\" -- parse [bar] and [^bar] sets of characters p_bracket = (char '[') >> ( (char '^' >> p_set True) <|> (p_set False) ) -- p_set :: Bool -> GenParser Char st Pattern p_set invert = do initial <- (option "" ((char ']' >> return "]") <|> (char '-' >> return "-"))) values <- if null initial then many1 p_set_elem else many p_set_elem _ <- char ']' ci <- char_index let chars = maybe'set $ initial ++ [c | BEChar c <- values ] ++ concat [s | BEChars s <- values ] colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ] equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values] class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values] maybe'set x = if null x then Nothing else Just (Set.fromList x) sets = PatternSet chars class's colls equivs sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets -- From here down the code is the parser and functions for pattern [ ] set things p_set_elem = p_set_elem_class <|> p_set_elem_equiv <|> p_set_elem_coll <|> p_set_elem_range <|> p_set_elem_char "Failed to parse bracketed string" p_set_elem_class = liftM BEClass $ try (between (string "[:") (string ":]") (many1 $ noneOf ":]")) p_set_elem_equiv = liftM BEEquiv $ try (between (string "[=") (string "=]") (many1 $ noneOf "=]")) p_set_elem_coll = liftM BEColl $ try (between (string "[.") (string ".]") (many1 $ noneOf ".]")) p_set_elem_range = try $ do start <- noneOf "]-" _ <- char '-' end <- noneOf "]" -- bug fix: check start <= end before "return (BEChars [start..end])" if start <= end then return (BEChars [start..end]) else unexpected "End point of dashed character range is less than starting point" p_set_elem_char = do c <- noneOf "]" when (c == '-') $ do atEnd <- (lookAhead (char ']') >> return True) <|> (return False) when (not atEnd) (unexpected "A dash is in the wrong place in a bracket") return (BEChar c) regex-tdfa-1.1.8/Text/Regex/TDFA/Sequence.hs0000644000000000000000000000623511536532137016571 0ustar0000000000000000{-| This modules provides 'RegexMaker' and 'RegexLike' instances for using 'ByteString' with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and "Text.Regex.Lazy.DFAEngineFPS"). This module is usually used via import "Text.Regex.TDFA". This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} module Text.Regex.TDFA.Sequence( Regex ,CompOption ,ExecOption ,compile ,execute ,regexec ) where import Data.Sequence(Seq) import Data.Foldable as F(toList) import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..),Extract(..)) import Text.Regex.Base.Impl(polymatch,polymatchM) import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups)) import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String import Text.Regex.TDFA.TDFA(patternToRegex) import Text.Regex.TDFA.ReadRegex(parseRegex) import Data.Array.IArray((!),elems) import Data.Maybe(listToMaybe) import Text.Regex.TDFA.NewDFA.Engine(execMatch) import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} instance RegexContext Regex (Seq Char) (Seq Char) where match = polymatch matchM = polymatchM instance RegexMaker Regex CompOption ExecOption (Seq Char) where makeRegexOptsM c e source = case parseRegex (F.toList source) of Left err -> fail $ "parseRegex for Text.Regex.TDFA.Sequence failed:"++show err Right pattern -> return $ patternToRegex pattern c e instance RegexLike Regex (Seq Char) where matchOnce r s = listToMaybe (matchAll r s) matchAll r s = execMatch r 0 '\n' s matchCount r s = length (matchAll r' s) where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } matchTest = Tester.matchTest matchOnceText regex source = fmap (\ma -> let (o,l) = ma!0 in (before o source ,fmap (\ol -> (extract ol source,ol)) ma ,after (o+l) source)) (matchOnce regex source) matchAllText regex source = map (fmap (\ol -> (extract ol source,ol))) (matchAll regex source) compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> (Seq Char) -- ^ The regular expression to compile -> Either String Regex -- ^ Returns: the compiled regular expression compile compOpt execOpt bs = case parseRegex (F.toList bs) of Left err -> Left ("parseRegex for Text.Regex.TDFA.Sequence failed:"++show err) Right pattern -> Right (patternToRegex pattern compOpt execOpt) execute :: Regex -- ^ Compiled regular expression -> (Seq Char) -- ^ ByteString to match against -> Either String (Maybe MatchArray) execute r bs = Right (matchOnce r bs) regexec :: Regex -- ^ Compiled regular expression -> (Seq Char) -- ^ ByteString to match against -> Either String (Maybe ((Seq Char), (Seq Char), (Seq Char), [(Seq Char)])) regexec r bs = case matchOnceText r bs of Nothing -> Right (Nothing) Just (pre,mt,post) -> let main = fst (mt!0) rest = map fst (tail (elems mt)) -- will be [] in Right (Just (pre,main,post,rest)) regex-tdfa-1.1.8/Text/Regex/TDFA/String.hs0000644000000000000000000000627111536532137016267 0ustar0000000000000000{- | This modules provides 'RegexMaker' and 'RegexLike' instances for using 'String' with the TDFA backend. This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} {- By Chris Kuklewicz, 2009. BSD License, see the LICENSE file. -} module Text.Regex.TDFA.String( -- ** Types Regex ,MatchOffset ,MatchLength ,CompOption ,ExecOption -- ** Medium level API functions ,compile ,execute ,regexec ) where import Text.Regex.Base.Impl(polymatch,polymatchM) import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchOffset,MatchLength,MatchArray) import Text.Regex.TDFA.Common(common_error,Regex(..),CompOption,ExecOption(captureGroups)) import Text.Regex.TDFA.ReadRegex(parseRegex) import Text.Regex.TDFA.TDFA(patternToRegex) import Data.Array.IArray((!),elems,amap) import Data.Maybe(listToMaybe) import Text.Regex.TDFA.NewDFA.Engine(execMatch) import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) err :: String -> a err = common_error "Text.Regex.TDFA.String" unwrap :: Either String v -> v unwrap x = case x of Left msg -> err ("Text.Regex.TDFA.String died: "++msg) Right v -> v compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> String -- ^ The regular expression to compile (ASCII only, no null bytes) -> Either String Regex -- ^ Returns: the compiled regular expression compile compOpt execOpt source = case parseRegex source of Left msg -> Left ("parseRegex for Text.Regex.TDFA.String failed:"++show msg) Right pattern -> Right (patternToRegex pattern compOpt execOpt) instance RegexMaker Regex CompOption ExecOption String where makeRegexOpts c e source = unwrap (compile c e source) makeRegexOptsM c e source = either fail return $ compile c e source execute :: Regex -- ^ Compiled regular expression -> String -- ^ String to match against -> Either String (Maybe MatchArray) execute r s = Right (matchOnce r s) regexec :: Regex -- ^ Compiled regular expression -> String -- ^ String to match against -> Either String (Maybe (String, String, String, [String])) regexec r s = case matchOnceText r s of Nothing -> Right Nothing Just (pre,mt,post) -> let main = fst (mt!0) rest = map fst (tail (elems mt)) -- will be [] in Right (Just (pre,main,post,rest)) -- Minimal defintion for now instance RegexLike Regex String where matchOnce r s = listToMaybe (matchAll r s) matchAll r s = execMatch r 0 '\n' s matchCount r s = length (matchAll r' s) where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } matchTest = Tester.matchTest -- matchOnceText matchAllText r s = let go i _ _ | i `seq` False = undefined go _i _t [] = [] go i t (x:xs) = let (off0,len0) = x!0 trans pair@(off,len) = (take len (drop (off-i) t),pair) t' = drop (off0+len0-i) t in amap trans x : seq t' (go (off0+len0) t' xs) in go 0 s (matchAll r s) instance RegexContext Regex String String where match = polymatch matchM = polymatchM regex-tdfa-1.1.8/Text/Regex/TDFA/TDFA.hs0000644000000000000000000004627611536532137015550 0ustar0000000000000000-- | "Text.Regex.TDFA.TDFA" converts the QNFA from TNFA into the DFA. -- A DFA state corresponds to a Set of QNFA states, repesented as list -- of Index which are used to lookup the DFA state in a lazy Trie -- which holds all possible subsets of QNFA states. module Text.Regex.TDFA.TDFA(patternToRegex,DFA(..),DT(..) ,examineDFA,nfaToDFA,dfaMap) where --import Control.Arrow((***)) import Control.Monad.Instances() import Data.Monoid(Monoid(..)) import Control.Monad.State(State,MonadState(..),execState) import Data.Array.IArray(Array,(!),bounds,{-assocs-}) import Data.IntMap(IntMap) import qualified Data.IntMap as IMap(empty,keys,delete,null,lookup,fromDistinctAscList ,member,unionWith,singleton,union ,toAscList,Key,elems,toList,insert ,insertWith,insertWithKey) import Data.IntMap.CharMap2(CharMap(..)) import qualified Data.IntMap.CharMap2 as Map(empty) --import Data.IntSet(IntSet) import qualified Data.IntSet as ISet(empty,singleton,null) import Data.List(foldl') import qualified Data.Map (Map,empty,member,insert,elems) import Data.Sequence as S((|>),{-viewl,ViewL(..)-}) import Text.Regex.TDFA.Common {- all -} import Text.Regex.TDFA.IntArrTrieSet(TrieSet) import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerge) import Text.Regex.TDFA.Pattern(Pattern) --import Text.Regex.TDFA.RunMutState(toInstructions) import Text.Regex.TDFA.TNFA(patternToNFA) --import Debug.Trace {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} err :: String -> a err s = common_error "Text.Regex.TDFA.TDFA" s dlose :: DFA dlose = DFA { d_id = ISet.empty , d_dt = Simple' { dt_win = IMap.empty , dt_trans = Map.empty , dt_other = Transition dlose dlose mempty } } -- dumb smart constructor for tracing construction (I wanted to monitor laziness) {-# INLINE makeDFA #-} makeDFA :: SetIndex -> DT -> DFA makeDFA i dt = DFA i dt -- Note that no CompOption or ExecOption parameter is needed. nfaToDFA :: ((Index,Array Index QNFA),Array Tag OP,Array GroupIndex [GroupInfo]) -> CompOption -> ExecOption -> Regex nfaToDFA ((startIndex,aQNFA),aTagOp,aGroupInfo) co eo = Regex dfa startIndex indexBounds tagBounds trie aTagOp aGroupInfo ifa co eo where dfa = indexesToDFA [startIndex] indexBounds = bounds aQNFA tagBounds = bounds aTagOp ifa = (not (multiline co)) && isDFAFrontAnchored dfa indexesToDFA = {-# SCC "nfaToDFA.indexesToDFA" #-} Trie.lookupAsc trie -- Lookup in cache trie :: TrieSet DFA trie = Trie.fromSinglesMerge dlose mergeDFA (bounds aQNFA) indexToDFA newTransition :: DTrans -> Transition newTransition dtrans = Transition { trans_many = indexesToDFA (IMap.keys dtransWithSpawn) , trans_single = indexesToDFA (IMap.keys dtrans) , trans_how = dtransWithSpawn } where dtransWithSpawn = addSpawn dtrans makeTransition :: DTrans -> Transition makeTransition dtrans | hasSpawn = Transition { trans_many = indexesToDFA (IMap.keys dtrans) , trans_single = indexesToDFA (IMap.keys (IMap.delete startIndex dtrans)) , trans_how = dtrans } | otherwise = Transition { trans_many = indexesToDFA (IMap.keys dtrans) , trans_single = indexesToDFA (IMap.keys dtrans) , trans_how = dtrans } where hasSpawn = maybe False IMap.null (IMap.lookup startIndex dtrans) -- coming from (-1) means spawn a new starting item addSpawn :: DTrans -> DTrans addSpawn dtrans | IMap.member startIndex dtrans = dtrans | otherwise = IMap.insert startIndex mempty dtrans indexToDFA :: Index -> DFA -- used to seed the Trie from the NFA indexToDFA i = {-# SCC "nfaToDFA.indexToDFA" #-} makeDFA (ISet.singleton source) (qtToDT qtIn) where (QNFA {q_id = source,q_qt = qtIn}) = aQNFA!i qtToDT :: QT -> DT qtToDT (Testing {qt_test=wt, qt_dopas=dopas, qt_a=a, qt_b=b}) = Testing' { dt_test = wt , dt_dopas = dopas , dt_a = qtToDT a , dt_b = qtToDT b } qtToDT (Simple {qt_win=w, qt_trans=t, qt_other=o}) = Simple' { dt_win = makeWinner , dt_trans = fmap qtransToDFA t -- , dt_other = if IMap.null o then Just (newTransition $ IMap.singleton startIndex mempty) else Just (qtransToDFA o)} , dt_other = qtransToDFA o} where makeWinner :: IntMap {- Index -} Instructions -- (RunState ()) makeWinner | noWin w = IMap.empty | otherwise = IMap.singleton source (cleanWin w) qtransToDFA :: QTrans -> Transition qtransToDFA qtrans = {-# SCC "nfaToDFA.indexToDFA.qtransToDFA" #-} newTransition dtrans where dtrans :: DTrans dtrans =IMap.fromDistinctAscList . mapSnd (IMap.singleton source) $ best best :: [(Index {- Destination -} ,(DoPa,Instructions))] best = pickQTrans aTagOp $ qtrans -- The DFA states are built up by merging the singleton ones converted from the NFA. -- Thus the "source" indices in the DTrans should not collide. mergeDFA :: DFA -> DFA -> DFA mergeDFA d1 d2 = {-# SCC "nfaToDFA.mergeDFA" #-} makeDFA i dt where i = d_id d1 `mappend` d_id d2 dt = d_dt d1 `mergeDT` d_dt d2 mergeDT,nestDT :: DT -> DT -> DT mergeDT (Simple' w1 t1 o1) (Simple' w2 t2 o2) = Simple' w t o where w = w1 `mappend` w2 t = fuseDTrans -- t1 o1 t2 o2 o = mergeDTrans o1 o2 -- This is very much like mergeQTrans mergeDTrans :: Transition -> Transition -> Transition mergeDTrans (Transition {trans_how=dt1}) (Transition {trans_how=dt2}) = makeTransition dtrans where dtrans = IMap.unionWith IMap.union dt1 dt2 -- This is very much like fuseQTrans fuseDTrans :: CharMap Transition fuseDTrans = CharMap (IMap.fromDistinctAscList (fuse l1 l2)) where l1 = IMap.toAscList (unCharMap t1) l2 = IMap.toAscList (unCharMap t2) fuse :: [(IMap.Key, Transition)] -> [(IMap.Key, Transition)] -> [(IMap.Key, Transition)] fuse [] y = fmap (fmap (mergeDTrans o1)) y fuse x [] = fmap (fmap (mergeDTrans o2)) x fuse x@((xc,xa):xs) y@((yc,ya):ys) = case compare xc yc of LT -> (xc,mergeDTrans o2 xa) : fuse xs y EQ -> (xc,mergeDTrans xa ya) : fuse xs ys GT -> (yc,mergeDTrans o1 ya) : fuse x ys mergeDT dt1@(Testing' wt1 dopas1 a1 b1) dt2@(Testing' wt2 dopas2 a2 b2) = case compare wt1 wt2 of LT -> nestDT dt1 dt2 EQ -> Testing' { dt_test = wt1 , dt_dopas = dopas1 `mappend` dopas2 , dt_a = mergeDT a1 a2 , dt_b = mergeDT b1 b2 } GT -> nestDT dt2 dt1 mergeDT dt1@(Testing' {}) dt2 = nestDT dt1 dt2 mergeDT dt1 dt2@(Testing' {}) = nestDT dt2 dt1 nestDT dt1@(Testing' {dt_a=a,dt_b=b}) dt2 = dt1 { dt_a = mergeDT a dt2, dt_b = mergeDT b dt2 } nestDT _ _ = err "nestDT called on Simple -- cannot happen" patternToRegex :: (Pattern,(GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex patternToRegex pattern compOpt execOpt = nfaToDFA (patternToNFA compOpt pattern) compOpt execOpt dfaMap :: DFA -> Data.Map.Map SetIndex DFA dfaMap = seen (Data.Map.empty) where seen old d@(DFA {d_id=i,d_dt=dt}) = if i `Data.Map.member` old then old else let new = Data.Map.insert i d old in foldl' seen new (flattenDT dt) -- Get all trans_many states flattenDT :: DT -> [DFA] flattenDT (Simple' {dt_trans=(CharMap mt),dt_other=o}) = concatMap (\d -> [trans_many d {-,trans_single d-}]) . (:) o . IMap.elems $ mt flattenDT (Testing' {dt_a=a,dt_b=b}) = flattenDT a ++ flattenDT b examineDFA :: Regex -> String examineDFA (Regex {regex_dfa=dfa}) = unlines . (:) ("Number of reachable DFA states: "++show (length dfas)) . map show $ dfas where dfas = Data.Map.elems $ dfaMap dfa {- fillMap :: Tag -> IntMap (Position,Bool) fillMap tag = IMap.fromDistinctAscList [(t,(-1,True)) | t <- [0..tag] ] diffMap :: IntMap (Position,Bool) -> IntMap (Position,Bool) -> [(Index,(Position,Bool))] diffMap old new = IMap.toList (IMap.differenceWith (\a b -> if a==b then Nothing else Just b) old new) examineDFA :: (DFA,Index,Array Tag OP,Array GroupIndex [GroupInfo]) -> String examineDFA (dfa,_,aTags,_) = unlines $ map (examineDFA' (snd . bounds $ aTags)) (Map.elems $ dfaMap dfa) examineDFA' :: Tag -> DFA -> String examineDFA' maxTag = showDFA (fillMap maxTag) {- instance Show DFA where show (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i) ++"\n ,d_dt = "++ show dt ++"\n}" -} -- instance Show DT where show = showDT showDFA :: IntMap (Position,Bool) -> DFA -> String showDFA m (DFA {d_id=i,d_dt=dt}) = "DFA {d_id = "++show (ISet.toList i) ++"\n ,d_dt = "++ showDT m dt ++"\n}" -} -- pick QTrans can be told the unique source and knows all the -- destinations (hmm...along with qt_win)! So if in ascending destination order the last source -- is free to mutatate the old state. If the QTrans has only one -- entry then all we need to do is mutate that entry when making a -- transition. -- pickQTrans :: Array Tag OP -> QTrans -> [({-Destination-}Index,(DoPa,Instructions))] pickQTrans op tr = mapSnd (bestTrans op) . IMap.toList $ tr cleanWin :: WinTags -> Instructions cleanWin = toInstructions bestTrans :: Array Tag OP -> [TagCommand] -> (DoPa,Instructions) bestTrans _ [] = err "bestTrans : There were no transition choose from!" bestTrans aTagOP (f:fs) | null fs = canonical f | otherwise = answer -- if null toDisplay then answer else trace toDisplay answer where answer = foldl' pick (canonical f) fs {- toDisplay | null fs = "" | otherwise = unlines $ "bestTrans" : show (answer) : "from among" : concatMap (\x -> [show x, show (toInstructions (snd x))]) (f:fs) -} canonical :: TagCommand -> (DoPa,Instructions) canonical (dopa,spec) = (dopa, toInstructions spec) pick :: (DoPa,Instructions) -> TagCommand -> (DoPa,Instructions) pick win@(dopa1,winI) (dopa2,spec) = let nextI = toInstructions spec -- in case compareWith choose winPos nextPos of -- XXX 2009: add in enterOrbit information in case compareWith choose (toListing winI) (toListing nextI) of GT -> win LT -> (dopa2,nextI) EQ -> if dopa1 >= dopa2 then win else (dopa2,nextI) -- no deep reason not to just pick win toListing :: Instructions -> [(Tag,Action)] toListing (Instructions {newPos = nextPos}) = filter notReset nextPos where notReset (_,SetVal (-1)) = False notReset _ = True {- toListing (Instructions {newPos = nextPos}) = mergeTagOrbit nextPos (filter snd nextFlags) mergeTagOrbit xx [] = xx mergeTagOrbit [] yy = yy mergeTagOrbit xx@(x:xs) yy@(y:ys) = case compare (fst x) (fst y) of GT -> y : mergeTagOrbit xx ys LT -> x : mergeTagOrbit xs yy EQ -> x : mergeTagOrbit xs ys -- keep tag setting over orbit setting. -} {-# INLINE choose #-} choose :: Maybe (Tag,Action) -> Maybe (Tag,Action) -> Ordering choose Nothing Nothing = EQ choose Nothing x = flipOrder (choose x Nothing) choose (Just (tag,_post)) Nothing = case aTagOP!tag of Maximize -> GT Minimize -> LT -- needed to choose best path inside nested * operators, -- this needs a leading Minimize tag inside at least the parent * operator Ignore -> GT -- XXX this is a guess in analogy with Maximize for the end bit of a group Orbit -> LT -- trace ("choose LT! Just "++show tag++" < Nothing") LT -- 2009 XXX : comment out next line and use the Orbit instead -- Orbit -> err $ "bestTrans.choose : Very Unexpeted Orbit in Just Nothing: "++show (tag,post,aTagOP,f:fs) choose (Just (tag,post1)) (Just (_,post2)) = case aTagOP!tag of Maximize -> order Minimize -> flipOrder order Ignore -> EQ Orbit -> EQ -- Orbit -> err $ "bestTrans.choose : Very Unexpeted Orbit in Just Just: "++show (tag,(post1,post2),aTagOP,f:fs) where order = case (post1,post2) of (SetPre,SetPre) -> EQ (SetPost,SetPost) -> EQ (SetPre,SetPost) -> LT (SetPost,SetPre) -> GT (SetVal v1,SetVal v2) -> compare v1 v2 _ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (tag,post1,post2) {-# INLINE compareWith #-} compareWith :: (Ord x,Monoid a) => (Maybe (x,b) -> Maybe (x,c) -> a) -> [(x,b)] -> [(x,c)] -> a compareWith comp = cw where cw [] [] = comp Nothing Nothing cw xx@(x:xs) yy@(y:ys) = case compare (fst x) (fst y) of GT -> comp Nothing (Just y) `mappend` cw xx ys EQ -> comp (Just x) (Just y) `mappend` cw xs ys LT -> comp (Just x) Nothing `mappend` cw xs yy cw xx [] = foldr (\x rest -> comp (Just x) Nothing `mappend` rest) mempty xx cw [] yy = foldr (\y rest -> comp Nothing (Just y) `mappend` rest) mempty yy isDFAFrontAnchored :: DFA -> Bool isDFAFrontAnchored = isDTFrontAnchored . d_dt where isDTFrontAnchored :: DT -> Bool isDTFrontAnchored (Simple' {}) = False isDTFrontAnchored (Testing' {dt_test=wt,dt_a=a,dt_b=b}) | wt == Test_BOL = isDTLosing b | otherwise = isDTFrontAnchored a && isDTFrontAnchored b where -- can DT never win or accept a character (when following trans_single)? isDTLosing :: DT -> Bool isDTLosing (Testing' {dt_a=a',dt_b=b'}) = isDTLosing a' && isDTLosing b' isDTLosing (Simple' {dt_win=w}) | not (IMap.null w) = False -- can win with 0 characters isDTLosing (Simple' {dt_trans=CharMap mt,dt_other=o}) = let ts = o : IMap.elems mt in all transLoses ts where transLoses :: Transition -> Bool transLoses (Transition {trans_single=dfa,trans_how=dtrans}) = isDTLose dfa || onlySpawns dtrans where isDTLose :: DFA -> Bool isDTLose dfa' = ISet.null (d_id dfa') onlySpawns :: DTrans -> Bool onlySpawns t = case IMap.elems t of [m] -> IMap.null m _ -> False {- toInstructions -} toInstructions :: TagList -> Instructions toInstructions spec = let (p,o) = execState (assemble spec) (mempty,mempty) in Instructions { newPos = IMap.toList p , newOrbits = if IMap.null o then Nothing else Just $ alterOrbits (IMap.toList o) } type CompileInstructions a = State ( IntMap Action -- 2009: change to SetPre | SetPost enum , IntMap AlterOrbit ) a data AlterOrbit = AlterReset -- removing the Orbits record from the OrbitLog | AlterLeave -- set inOrbit to False | AlterModify { newInOrbit :: Bool -- set inOrbit to the newInOrbit value , freshOrbit :: Bool} -- freshOrbit of True means to set getOrbits to mempty deriving (Show) -- freshOrbit of False means try appending position or else Seq.empty assemble :: TagList -> CompileInstructions () assemble = mapM_ oneInstruction where oneInstruction (tag,command) = case command of PreUpdate TagTask -> setPreTag tag PreUpdate ResetGroupStopTask -> resetGroupTag tag PreUpdate SetGroupStopTask -> setGroupTag tag PreUpdate ResetOrbitTask -> resetOrbit tag PreUpdate EnterOrbitTask -> enterOrbit tag PreUpdate LeaveOrbitTask -> leaveOrbit tag PostUpdate TagTask -> setPostTag tag PostUpdate ResetGroupStopTask -> resetGroupTag tag PostUpdate SetGroupStopTask -> setGroupTag tag _ -> err ("assemble : Weird orbit command: "++show (tag,command)) setPreTag :: Tag -> CompileInstructions () setPreTag = modifyPos SetPre setPostTag :: Tag -> CompileInstructions () setPostTag = modifyPos SetPost resetGroupTag :: Tag -> CompileInstructions () resetGroupTag = modifyPos (SetVal (-1)) setGroupTag :: Tag -> CompileInstructions () setGroupTag = modifyPos (SetVal 0) resetOrbit :: Tag -> CompileInstructions () resetOrbit tag = modifyPos (SetVal (-1)) tag >> modifyOrbit (IMap.insert tag AlterReset) enterOrbit :: Tag -> CompileInstructions () enterOrbit tag = modifyPos (SetVal 0) tag >> modifyOrbit changeOrbit where changeOrbit = IMap.insertWith overwriteOrbit tag appendNewOrbit appendNewOrbit = AlterModify {newInOrbit = True, freshOrbit = False} -- try to append startNewOrbit = AlterModify {newInOrbit = True, freshOrbit = True} -- will start a new series overwriteOrbit _ AlterReset = startNewOrbit overwriteOrbit _ AlterLeave = startNewOrbit overwriteOrbit _ (AlterModify {newInOrbit = False}) = startNewOrbit overwriteOrbit _ (AlterModify {newInOrbit = True}) = err $ "enterOrbit: Cannot enterOrbit twice in a row: " ++ show tag leaveOrbit :: Tag -> CompileInstructions () leaveOrbit tag = modifyOrbit escapeOrbit where escapeOrbit = IMap.insertWith setInOrbitFalse tag AlterLeave where setInOrbitFalse _ x@(AlterModify {}) = x {newInOrbit = False} setInOrbitFalse _ x = x modifyPos :: Action -> Tag -> CompileInstructions () modifyPos todo tag = do (a,c) <- get let a' = IMap.insert tag todo a seq a' $ put (a',c) modifyOrbit :: (IntMap AlterOrbit -> IntMap AlterOrbit) -> CompileInstructions () modifyOrbit f = do (a,c) <- get let c' = f c seq c' $ put (a,c') ---- alterOrbits :: [(Tag,AlterOrbit)] -> (Position -> OrbitTransformer) alterOrbits x = let items = map alterOrbit x in (\ pos m -> foldl (flip ($)) m (map ($ pos) items)) alterOrbit :: (Tag,AlterOrbit) -> (Position -> OrbitTransformer) alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = True}) = (\ pos m -> IMap.insert tag (Orbits { inOrbit = inOrbit' , basePos = pos , ordinal = Nothing , getOrbits = mempty}) m) alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = False}) = (\ pos m -> IMap.insertWithKey (updateOrbit pos) tag (newOrbit pos) m) where newOrbit pos = Orbits { inOrbit = inOrbit' , basePos = pos , ordinal = Nothing , getOrbits = mempty} updateOrbit pos _tag new old | inOrbit old = old { inOrbit = inOrbit' , getOrbits = getOrbits old |> pos } | otherwise = new alterOrbit (tag,AlterReset) = (\ _ m -> IMap.delete tag m) alterOrbit (tag,AlterLeave) = (\ _ m -> case IMap.lookup tag m of Nothing -> m Just x -> IMap.insert tag (x {inOrbit=False}) m) regex-tdfa-1.1.8/Text/Regex/TDFA/TNFA.hs0000644000000000000000000012312711536532137015551 0ustar0000000000000000-- XXX design uncertainty: should preResets be inserted into nullView? -- if not, why not? ADDED -- XXX design uncertainty: what does act -> actNullable -> -- actNullableTagless not use nullQ and same for inStar, etc? -- TODO : try rewriting whole qToNFA in terms of "act" -- (That will require re-organizing the continuation data a bit) -- | "Text.Regex.TDFA.TNFA" converts the CorePattern Q/P data (and its -- Pattern leafs) to a QNFA tagged non-deterministic finite automata. -- -- This holds every possible way to follow one state by another, while -- in the DFA these will be reduced by picking a single best -- transition for each (soure,destination) pair. The transitions are -- heavily and often redundantly annotated with tasks to perform, and -- this redundancy is reduced when picking the best transition. So -- far, keeping all this information has helped fix bugs in both the -- design and implementation. -- -- The QNFA for a Pattern with a starTraned Q/P form with N one -- character accepting leaves has at most N+1 nodes. These nodes -- repesent the future choices after accepting a leaf. The processing -- of Or nodes often reduces this number by sharing at the end of the -- different paths. Turning off capturing while compiling the pattern -- may (future extension) reduce this further for some patterns by -- processing Star with optimizations. This compact design also means -- that tags are assigned not just to be updated before taking a -- transition (PreUpdate) but also after the transition (PostUpdate). -- -- Uses recursive do notation. module Text.Regex.TDFA.TNFA(patternToNFA ,QNFA(..),QT(..),QTrans,TagUpdate(..)) where {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} import Control.Monad(when) import Control.Monad.State(State,runState,execState,get,put,modify) import Data.Array.IArray(Array,array) import Data.Char(toLower,toUpper,isAlpha,ord) import Data.List(foldl') import Data.IntMap (IntMap) import qualified Data.IntMap as IMap(toAscList,null,unionWith,singleton,fromList,fromDistinctAscList) import Data.IntMap.CharMap2(CharMap(..)) import qualified Data.IntMap.CharMap2 as Map(null,singleton,map) import qualified Data.IntMap.EnumMap2 as EMap(null,keysSet,assocs) import Data.IntSet.EnumSet2(EnumSet) import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,insert) import Data.Maybe(catMaybes,isNothing) import Data.Monoid(mempty,mappend) import qualified Data.Set as S(Set,insert,toAscList,empty) import Text.Regex.TDFA.Common(QT(..),QNFA(..),QTrans,TagTask(..),TagUpdate(..),DoPa(..) ,CompOption(..) ,Tag,TagTasks,TagList,Index,WinTags,GroupIndex,GroupInfo(..) ,common_error,noWin,snd3,mapSnd) import Text.Regex.TDFA.CorePattern(Q(..),P(..),OP(..),WhichTest,cleanNullView,NullView ,SetTestInfo(..),Wanted(..),TestInfo ,mustAccept,cannotAccept,patternToQ) import Text.Regex.TDFA.Pattern(Pattern(..),PatternSet(..),unSEC,PatternSetCharacterClass(..)) --import Debug.Trace ecart :: String -> a -> a ecart _ = id err :: String -> a err t = common_error "Text.Regex.TDFA.TNFA" t debug :: (Show a) => a -> s -> s debug _ s = s qtwin,qtlose :: QT -- qtwin is the continuation after matching the whole pattern. It has -- no futher transitions and sets tag #1 to the current position. qtwin = Simple {qt_win=[(1,PreUpdate TagTask)],qt_trans=mempty,qt_other=mempty} -- qtlose is the continuation to nothing, used when ^ or $ tests fail. qtlose = Simple {qt_win=mempty,qt_trans=mempty,qt_other=mempty} patternToNFA :: CompOption -> (Pattern,(GroupIndex, DoPa)) -> ((Index,Array Index QNFA) ,Array Tag OP ,Array GroupIndex [GroupInfo]) patternToNFA compOpt pattern = let (q,tags,groups) = patternToQ compOpt pattern msg = unlines [ show q ] in debug msg (qToNFA compOpt q,tags,groups) -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- Query function on Q nullable :: Q -> Bool nullable = not . null . nullQ notNullable :: Q -> Bool notNullable = null . nullQ -- This asks if the preferred (i.e. first) NullView has no tests. maybeOnlyEmpty :: Q -> Maybe WinTags maybeOnlyEmpty (Q {nullQ = ((SetTestInfo sti,tags):_)}) = if EMap.null sti then Just tags else Nothing maybeOnlyEmpty _ = Nothing usesQNFA :: Q -> Bool usesQNFA (Q {wants=WantsBoth}) = True usesQNFA (Q {wants=WantsQNFA}) = True usesQNFA _ = False -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- Functions related to QT -- dumb smart constructor used by qToQNFA -- Possible: Go through the qt and keep only the best tagged transition(s) to each state to make simple NFA? mkQNFA :: Index -> QT -> QNFA mkQNFA i qt = debug ("\n>QNFA id="++show i) $ QNFA i (debug ("\ngetting QT for "++show i) qt) -- This uses the Eq QT instance above -- ZZZ mkTesting :: QT -> QT mkTesting t@(Testing {qt_a=a,qt_b=b}) = if a==b then a else t -- Move to nfsToDFA XXX mkTesting t = t nullQT :: QT -> Bool nullQT (Simple {qt_win=w,qt_trans=t,qt_other=o}) = noWin w && Map.null t && IMap.null o nullQT _ = False -- This reconstructs the set of tests checked in processing QT, adding -- them to the passed set. listTestInfo :: QT -> EnumSet WhichTest -> EnumSet WhichTest listTestInfo qt s = execState (helper qt) s where helper (Simple {}) = return () helper (Testing {qt_test = wt, qt_a = a, qt_b = b}) = do modify (Set.insert wt) helper a helper b -- This is used to view "win" only through NullView, and is used in -- processing Or. applyNullViews :: NullView -> QT -> QT applyNullViews [] win = win applyNullViews nvs win = foldl' (dominate win) qtlose (reverse $ cleanNullView nvs) where -- This is used to prefer to view "win" through NullView. Losing is -- replaced by the plain win. This is employed by Star patterns to -- express that the first iteration is allowed to match null, but -- skipping the NullView occurs if the match fails. preferNullViews :: NullView -> QT -> QT preferNullViews [] win = win preferNullViews nvs win = foldl' (dominate win) win (reverse $ cleanNullView nvs) where {- dominate is common to applyNullViews and preferNullViews above. Even I no longer understand it without study. Oversimplified: The last argument has a new set of tests "sti" that must be satisfied to then apply the new "tags" and reach the "win" QT. Failing any of this set of tests leads to the "lose" QT. Closer: The "win" may already have some other set of tests leading to various branches, this set is cached in winTests. And the "lose" may already have some other set of tests leading to various branches. The combination of "win" and "lose" and "sti" must check the union of these tests, which is "allTests". Detail: The merging is done by useTest, where the tests in sti divert losing to a branch of "lose" and winning to a branch of "win". Tests not in sti are unchanged (but the losing DoPa index might be added). -} dominate :: QT -> QT -> (SetTestInfo,WinTags) -> QT dominate win lose x@(SetTestInfo sti,tags) = debug ("dominate "++show x) $ let -- The winning states are reached through the SetTag win' = prependTags' tags win -- get the SetTestInfo winTests = listTestInfo win $ mempty allTests = (listTestInfo lose $ winTests) `mappend` (EMap.keysSet sti) -- The first and second arguments of useTest are sorted -- At all times the second argument of useTest is a subset of the first useTest _ [] w _ = w -- no more dominating tests to fail to choose lose, so just choose win useTest (aTest:tests) allD@((dTest,dopas):ds) w l = let (wA,wB,wD) = branches w (lA,lB,lD) = branches l branches qt@(Testing {}) | aTest==qt_test qt = (qt_a qt,qt_b qt,qt_dopas qt) branches qt = (qt,qt,mempty) in if aTest == dTest then Testing {qt_test = aTest ,qt_dopas = (dopas `mappend` wD) `mappend` lD ,qt_a = useTest tests ds wA lA ,qt_b = lB} else Testing {qt_test = aTest ,qt_dopas = wD `mappend` lD ,qt_a = useTest tests allD wA lA ,qt_b = useTest tests allD wB lB} useTest [] _ _ _ = err "This case in dominate.useText cannot happen: second argument would have to have been null and that is checked before this case" in useTest (Set.toList allTests) (EMap.assocs sti) win' lose -- 'applyTest' is only used by addTest -- 2009: maybe need to keep track of whether a change is actually made -- (beyond DoPa tracking) to the QT. applyTest :: TestInfo -> QT -> QT applyTest (wt,dopa) qt | nullQT qt = qt | otherwise = applyTest' qt where applyTest' :: QT -> QT applyTest' q@(Simple {}) = mkTesting $ Testing {qt_test = wt ,qt_dopas = Set.singleton dopa ,qt_a = q ,qt_b = qtlose} applyTest' q@(Testing {qt_test=wt'}) = case compare wt wt' of LT -> Testing {qt_test = wt ,qt_dopas = Set.singleton dopa ,qt_a = q ,qt_b = qtlose} EQ -> q {qt_dopas = Set.insert dopa (qt_dopas q) ,qt_b = qtlose} GT -> q {qt_a = applyTest' (qt_a q) ,qt_b = applyTest' (qt_b q)} -- Three ways to merge a pair of QT's varying how winning transitions -- are handled. -- -- mergeQT_2nd is used by the NonEmpty case and always discards the -- first argument's win and uses the second argment's win. -- -- mergeAltQT is used by the Or cases and is biased to the first -- argument's winning transition, if present. -- -- mergeQT is used by Star and mergeE and combines the winning -- transitions (concatenating the instructions). mergeQT_2nd,mergeAltQT,mergeQT :: QT -> QT -> QT mergeQT_2nd q1 q2 | nullQT q1 = q2 | otherwise = mergeQTWith (\_ w2 -> w2) q1 q2 mergeAltQT q1 q2 | nullQT q1 = q2 -- prefer winning with w1 then with w2 | otherwise = mergeQTWith (\w1 w2 -> if noWin w1 then w2 else w1) q1 q2 mergeQT q1 q2 | nullQT q1 = q2 -- union wins | nullQT q2 = q1 -- union wins | otherwise = mergeQTWith mappend q1 q2 -- no preference, win with combined SetTag XXX is the wrong thing! "(.?)*" -- This takes a function which implements a policy on mergining -- winning transitions and then merges all the transitions. It opens -- the CharMap newtype for more efficient operation, then rewraps it. mergeQTWith :: (WinTags -> WinTags -> WinTags) -> QT -> QT -> QT mergeQTWith mergeWins = merge where merge :: QT -> QT -> QT merge (Simple w1 t1 o1) (Simple w2 t2 o2) = let w' = mergeWins w1 w2 t' = fuseQTrans t1 o1 t2 o2 o' = mergeQTrans o1 o2 in Simple w' t' o' merge t1@(Testing _ _ a1 b1) s2@(Simple {}) = mkTesting $ t1 {qt_a=(merge a1 s2), qt_b=(merge b1 s2)} merge s1@(Simple {}) t2@(Testing _ _ a2 b2) = mkTesting $ t2 {qt_a=(merge s1 a2), qt_b=(merge s1 b2)} merge t1@(Testing wt1 ds1 a1 b1) t2@(Testing wt2 ds2 a2 b2) = mkTesting $ case compare wt1 wt2 of LT -> t1 {qt_a=(merge a1 t2), qt_b=(merge b1 t2)} EQ -> Testing {qt_test = wt1 -- same as wt2 ,qt_dopas = mappend ds1 ds2 ,qt_a = merge a1 a2 ,qt_b = merge b1 b2} GT -> t2 {qt_a=(merge t1 a2), qt_b=(merge t1 b2)} fuseQTrans :: (CharMap QTrans) -> QTrans -> (CharMap QTrans) -> QTrans -> CharMap QTrans fuseQTrans (CharMap t1) o1 (CharMap t2) o2 = CharMap (IMap.fromDistinctAscList (fuse l1 l2)) where l1 = IMap.toAscList t1 l2 = IMap.toAscList t2 fuse [] y = mapSnd (mergeQTrans o1) y fuse x [] = mapSnd (mergeQTrans o2) x fuse x@((xc,xa):xs) y@((yc,ya):ys) = case compare xc yc of LT -> (xc,mergeQTrans xa o2) : fuse xs y EQ -> (xc,mergeQTrans xa ya) : fuse xs ys GT -> (yc,mergeQTrans o1 ya) : fuse x ys mergeQTrans :: QTrans -> QTrans -> QTrans mergeQTrans = IMap.unionWith mappend -- Note: There are no append* operations. There are only these -- prepend* operations because things are only prepended to the future -- continuation. And the ordering is significant. -- This is only used in inStar/nullable prependPreTag :: Maybe Tag -> QT -> QT prependPreTag Nothing qt = qt prependPreTag (Just tag) qt = prependTags' [(tag,PreUpdate TagTask)] qt prependGroupResets :: [Tag] -> QT -> QT prependGroupResets [] qt = qt prependGroupResets tags qt = prependTags' [(tag,PreUpdate ResetGroupStopTask)|tag<-tags] qt prependTags' :: TagList -> QT -> QT prependTags' [] qt = qt prependTags' tcs' qt@(Testing {}) = qt { qt_a = prependTags' tcs' (qt_a qt) , qt_b = prependTags' tcs' (qt_b qt) } prependTags' tcs' (Simple {qt_win=w,qt_trans=t,qt_other=o}) = Simple { qt_win = if noWin w then w else tcs' `mappend` w , qt_trans = Map.map prependQTrans t , qt_other = prependQTrans o } where prependQTrans = fmap (map (\(d,tcs) -> (d,tcs' `mappend` tcs))) -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- define type S which is a State monad, this allows the creation of the uniq QNFA ids and storing the QNFA -- in an ascending order difference list for later placement in an array. -- Type of State monad used inside qToNFA type S = State (Index -- Next available QNFA index ,[(Index,QNFA)]->[(Index,QNFA)]) -- DList of previous QNFAs -- Type of continuation of the NFA, not much more complicated type E = (TagTasks -- Things to do before the Either QNFA QT -- with OneChar these become PostUpdate otherwise they become PreUpdate ,Either QNFA QT) -- The future, packaged in the best way -- See documentation below before the 'act' function. This is for use inside a Star pattern. type ActCont = ( E -- The eLoop is the dangerous recursive reference to continuation -- future that loops while accepting zero more characters , Maybe E -- This holds the safe non-zero-character accepting continuation , Maybe (TagTasks,QNFA)) -- optimized merger of the above, used only inside act, to avoid orphan QNFA id values -- newQNFA is the only operation that actually uses the monad get and put operations newQNFA :: String -> QT -> S QNFA newQNFA s qt = do (thisI,oldQs) <- get let futureI = succ thisI in seq futureI $ debug (">newQNFA< "++s++" : "++show thisI) $ do let qnfa = mkQNFA thisI qt -- (strictQT qt) -- making strictQNFA kills test (1,11) ZZZ put $! (futureI, oldQs . ((thisI,qnfa):)) return qnfa -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- E related functions fromQNFA :: QNFA -> E fromQNFA qnfa = (mempty,Left qnfa) fromQT :: QT -> E fromQT qt = (mempty,Right qt) -- Promises the output will match (_,Left _), used by Or cases when any branch wants a QNFA continuation asQNFA :: String -> E -> S E asQNFA _ x@(_,Left _) = return x asQNFA s (tags,Right qt) = do qnfa <- newQNFA s qt -- YYY Policy choice: leave the tags return (tags, Left qnfa) -- Convert continuation E into a QNFA, only done at "top level" by qToNFA to get unique start state getQNFA :: String -> E -> S QNFA getQNFA _ ([],Left qnfa) = return qnfa getQNFA s (tags,Left qnfa) = newQNFA s (prependTags' (promoteTasks PreUpdate tags) (q_qt qnfa)) getQNFA s (tags,Right qt) = newQNFA s (prependTags' (promoteTasks PreUpdate tags) qt) -- Extract the QT from the E getQT :: E -> QT getQT (tags,cont) = prependTags' (promoteTasks PreUpdate tags) (either q_qt id cont) -- 2009: This looks realllly dodgy, since it can convert a QNFA/Testing to a QT/Testing -- without actually achieving anything except adding a DoPa to the Testing. A diagnostic -- series of runs might be needed to decide if this ever creates orphan id numbers. -- Then applyTest might need to keep track of whether it actually changes anything. addTest :: TestInfo -> E -> E addTest ti (tags,cont) = (tags, Right . applyTest ti . either q_qt id $ cont) -- This is used only with PreUpdate and PostUpdate as the first argument. promoteTasks :: (TagTask->TagUpdate) -> TagTasks -> TagList promoteTasks promote tags = map (\(tag,task) -> (tag,promote task)) tags -- only used in addWinTags demoteTags :: TagList -> TagTasks demoteTags = map helper where helper (tag,PreUpdate tt) = (tag,tt) helper (tag,PostUpdate tt) = (tag,tt) -- This is polymorphic so addWinTags can be cute below {-# INLINE addWinTags #-} addWinTags :: WinTags -> (TagTasks,a) -> (TagTasks,a) addWinTags wtags (tags,cont) = (demoteTags wtags `mappend` tags ,cont) {-# INLINE addTag' #-} -- This is polymorphic so addTagAC can be cute below addTag' :: Tag -> (TagTasks,a) -> (TagTasks,a) addTag' tag (tags,cont) = ((tag,TagTask):tags ,cont) -- a Maybe version of addTag' above, specializing 'a' to Either QNFA QT addTag :: Maybe Tag -> E -> E addTag Nothing e = e addTag (Just tag) e = addTag' tag e {-# INLINE addGroupResets #-} -- This is polymorphic so addGroupResetsAC can be cute below addGroupResets :: (Show a) => [Tag] -> (TagTasks,a) -> (TagTasks,a) addGroupResets [] x = x addGroupResets tags (tags',cont) = (foldr (:) tags' . map (\tag -> (tag,ResetGroupStopTask)) $ tags ,cont) addGroupSets :: (Show a) => [Tag] -> (TagTasks,a) -> (TagTasks,a) addGroupSets [] x = x addGroupSets tags (tags',cont) = (foldr (:) tags' . map (\tag -> (tag,SetGroupStopTask)) $ tags ,cont) -- Consume an ActCont. Uses the mergeQT form to combine non-accepting -- and accepting view of the continuation. getE :: ActCont -> E getE (_,_,Just (tags,qnfa)) = (tags, Left qnfa) -- consume optimized mQNFA value returned by Star getE (eLoop,Just accepting,_) = fromQT (mergeQT (getQT eLoop) (getQT accepting)) getE (eLoop,Nothing,_) = eLoop -- 2009: See coment for addTest. Here is a case where the third component might be a (Just qnfa) and it -- is being lost even though the added test might be redundant. addTestAC :: TestInfo -> ActCont -> ActCont addTestAC ti (e,mE,_) = (addTest ti e ,fmap (addTest ti) mE ,Nothing) -- These are AC versions of the add functions on E addTagAC :: Maybe Tag -> ActCont -> ActCont addTagAC Nothing ac = ac addTagAC (Just tag) (e,mE,mQNFA) = (addTag' tag e ,fmap (addTag' tag) mE ,fmap (addTag' tag) mQNFA) addGroupResetsAC :: [Tag] -> ActCont -> ActCont addGroupResetsAC [] ac = ac addGroupResetsAC tags (e,mE,mQNFA) = (addGroupResets tags e ,fmap (addGroupResets tags) mE ,fmap (addGroupResets tags) mQNFA) addGroupSetsAC :: [Tag] -> ActCont -> ActCont addGroupSetsAC [] ac = ac addGroupSetsAC tags (e,mE,mQNFA) = (addGroupSets tags e ,fmap (addGroupSets tags) mE ,fmap (addGroupSets tags) mQNFA) addWinTagsAC :: WinTags -> ActCont -> ActCont addWinTagsAC wtags (e,mE,mQNFA) = (addWinTags wtags e ,fmap (addWinTags wtags) mE ,fmap (addWinTags wtags) mQNFA) -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- Initial preTag of 0th tag is implied. No other general pre-tags would be expected. -- The qtwin contains the preTag of the 1st tag and is only set when a match is completed. -- The fst Index is the index of the unique starting QNFA state. -- The snd (Array Index QNFA) is all the QNFA states. -- -- In the cases below, Empty is handled much like a Test with no TestInfo. qToNFA :: CompOption -> Q -> (Index,Array Index QNFA) qToNFA compOpt qTop = (q_id startingQNFA ,array (0,pred lastIndex) (table [])) where -- Result startingQNFA is the top level's index -- State pair: fst 0 is the next state number (not yet used) going in, and lastIndex coming out (succ of last used) -- snd id is the difference list of states going in, and the finished list coming out (startingQNFA,(lastIndex,table)) = runState (getTrans qTop (fromQT $ qtwin) >>= getQNFA "top level") startState startState = (0,id) getTrans,getTransTagless :: Q -> E -> S E getTrans qIn@(Q {preReset=resets,postSet=sets,preTag=pre,postTag=post,unQ=pIn}) e = debug (">< getTrans "++show qIn++" <>") $ case pIn of -- The case below is the ultimate consumer of every single OneChar in the input and the only caller of -- newTrans/acceptTrans which is the sole source of QT/Simple nodes. OneChar pat -> newTrans "getTrans/OneChar" resets pre pat . addTag post . addGroupSets sets $ e Empty -> return . addGroupResets resets . addTag pre . addTag post . addGroupSets sets $ e Test ti -> return . addGroupResets resets . addTag pre . addTest ti . addTag post . addGroupSets sets $ e _ -> return . addGroupResets resets . addTag pre =<< getTransTagless qIn (addTag post . addGroupSets sets $ e) getTransTagless qIn e = debug (">< getTransTagless "++show qIn++" <>") $ case unQ qIn of Seq q1 q2 -> getTrans q1 =<< getTrans q2 e Or [] -> return e Or [q] -> getTrans q e Or qs -> do eqts <- if usesQNFA qIn then do eQNFA <- asQNFA "getTransTagless/Or/usesQNFA" e sequence [ getTrans q eQNFA | q <- qs ] else sequence [ getTrans q e | q <- qs ] let qts = map getQT eqts return (fromQT (foldr1 mergeAltQT qts)) Star mOrbit resetTheseOrbits mayFirstBeNull q -> -- mOrbit of Just implies varies q and childGroups q let (e',clear) = -- debug ("\n>"++show e++"\n"++show q++"\n<") $ if notNullable q then (e,True) -- subpattern cannot be null else if null resetTheseOrbits && isNothing mOrbit then case maybeOnlyEmpty q of Just [] -> (e,True) -- True because null of subpattern is same as skipping subpattern Just tagList -> (addWinTags tagList e,False) -- null of subpattern NOT same as skipping _ -> (fromQT . preferNullViews (nullQ q) . getQT $ e,False) -- is NOT same as skipping else (fromQT . resetOrbitsQT resetTheseOrbits . enterOrbitQT mOrbit -- resetOrbitsQT and enterOrbitQT commute . preferNullViews (nullQ q) . getQT . leaveOrbit mOrbit $ e,False) -- perform resets when accepting 0 characters in if cannotAccept q then return e' else mdo mqt <- inStar q this (this,ans) <- case mqt of Nothing -> err ("Weird pattern in getTransTagless/Star: " ++ show (qTop,qIn)) Just qt -> do let qt' = resetOrbitsQT resetTheseOrbits . enterOrbitQT mOrbit $ qt -- resetOrbitsQT and enterOrbitQT commute thisQT = mergeQT qt' . getQT . leaveOrbit mOrbit $ e -- capture of subpattern or leave via next pattern (avoid null of subpattern on way out) ansE = fromQT . mergeQT qt' . getQT $ e' -- capture of subpattern or leave via null of subpattern thisE <- if usesQNFA q then return . fromQNFA =<< newQNFA "getTransTagless/Star" thisQT else return . fromQT $ thisQT return (thisE,ansE) return (if mayFirstBeNull then (if clear then this -- optimization to possibly preserve QNFA else ans) else this) {- NonEmpty is like actNullable (Or [Empty,q]) without the extra tag to prefer the first Empty branch -} NonEmpty q -> ecart ("\n> getTransTagless/NonEmpty"++show qIn) $ do -- Assertion to check than Pattern.starTrans did its job right: when (cannotAccept q) (err $ "getTransTagless/NonEmpty : provided with a *cannotAccept* pattern: "++show (qTop,qIn)) when (mustAccept q) (err $ "getTransTagless/NonEmpty : provided with a *mustAccept* pattern: "++show (qTop,qIn)) let e' = case maybeOnlyEmpty qIn of Just [] -> e Just _wtags -> e -- addWinTags wtags e XXX was duplicating tags Nothing -> err $ "getTransTagless/NonEmpty is supposed to have an emptyNull nullView : "++show qIn mqt <- inStar q e return $ case mqt of Nothing -> err ("Weird pattern in getTransTagless/NonEmpty: " ++ show (qTop,qIn)) Just qt -> fromQT . mergeQT_2nd qt . getQT $ e' -- ...and then this sets qt_win to exactly that of e' _ -> err ("This case in Text.Regex.TNFA.TNFA.getTransTagless cannot happen" ++ show (qTop,qIn)) inStar,inStarNullableTagless :: Q -> E -> S (Maybe QT) inStar qIn@(Q {preReset=resets,postSet=sets,preTag=pre,postTag=post}) eLoop | notNullable qIn = debug (">< inStar/1 "++show qIn++" <>") $ return . Just . getQT =<< getTrans qIn eLoop | otherwise = debug (">< inStar/2 "++show qIn++" <>") $ return . fmap (prependGroupResets resets . prependPreTag pre) =<< inStarNullableTagless qIn (addTag post . addGroupSets sets $ eLoop) inStarNullableTagless qIn eLoop = debug (">< inStarNullableTagless "++show qIn++" <>") $ do case unQ qIn of Empty -> return Nothing -- with Or this discards () branch in "(^|foo|())*" Or [] -> return Nothing Or [q] -> inStar q eLoop Or qs -> do mqts <- if usesQNFA qIn then do eQNFA <- asQNFA "inStarNullableTagless/Or/usesQNFA" eLoop sequence [ inStar q eQNFA | q <- qs ] else sequence [inStar q eLoop | q <- qs ] let qts = catMaybes mqts mqt = if null qts then Nothing else Just (foldr1 mergeAltQT qts) return mqt -- Calls to act are inlined by hand to actNullable. This returns only cases where q1 or q2 or both -- accepted characters. The zero-character case is handled by the tag wrapping by inStar. -- 2009: Does this look dodgy and repetitios of tags? Seq by policy has no preTag or postTag. -- though it can have prependGroupResets, but those are not repeated in children so it is okay. Seq q1 q2 -> do (_,meAcceptingOut,_) <- actNullable q1 =<< actNullable q2 (eLoop,Nothing,Nothing) return (fmap getQT meAcceptingOut) -- Calls to act are inlined by hand and are we losing the tags? Star {} -> do (_,meAcceptingOut,_) <- actNullableTagless qIn (eLoop,Nothing,Nothing) return (fmap getQT meAcceptingOut) NonEmpty {} -> ecart ("\n> inStarNullableTagless/NonEmpty"++show qIn) $ do (_,meAcceptingOut,_) <- actNullableTagless qIn (eLoop,Nothing,Nothing) return (fmap getQT meAcceptingOut) Test {} -> return Nothing -- with Or this discards ^ branch in "(^|foo|())*" OneChar {} -> err ("OneChar cannot have nullable True") {- act* functions These have a very complicated state that they receive and return as "the continuation". (E, Maybe E,Maybe (SetTag,QNFA)) The first E is the source of the danger that must be avoided. It starts out a reference to the QNFA/QT state that will be created by the most recent parent Star node. Thus it is a recursive reference from the MonadFix machinery. In particular, this value cannot be returned to the parent Star to be included in itself or we get a "let x = y; y=x" style infinite loop. As act* progresses the first E is actually modified to be the parent QNFA/QT as "seen" when all the elements to the right have accepted 0 characters. Thus it acquires tags and tests+tags (the NullView data is used for this purpose). The second item in the 3-tuple is a Maybe E. This will be used as the source of the QT for this contents of the Star QNFA/QT. It will be merged with the Star's own continuation data. It starts out Nothing and stays that way as long as there are no accepting transitions in the Star's pattern. This is value (via getQT) returned by inStar. The third item is a special optimization I added to remove a source of orphaned QNFAs. A Star within Act will often have to create a QNFA node. This cannot go into the second Maybe E item as Just (SetTag,Left QNFA) because this QNFA can have pulled values from the recursive parent Star's QNFA/QT in the first E value. Thus pulling with getQT from the QNFA and using that as the Maybe E would likely cause an infinite loop. This extra QNFA is stored in the thd3 location for use by getE. To improve it further it can accumulate Tag information after being formed. When a non nullable Q is handled by act it checks to see if the third value is there, in which case it uses that QNFA as the total continuation (subsumed in getE). Otherwise it merges the first E with any (Just E) in the second value to form the continuation. -} act :: Q -> ActCont -> S (Maybe E) act qIn c | nullable qIn = fmap snd3 $ actNullable qIn c | otherwise = debug (">< act "++show qIn++" <>") $ do mqt <- return . Just =<< getTrans qIn ( getE $ c ) return mqt -- or "return (fromQT qtlose,mqt,Nothing)" actNullable,actNullableTagless :: Q -> ActCont -> S ActCont actNullable qIn@(Q {preReset=resets,postSet=sets,preTag=pre,postTag=post,unQ=pIn}) ac = debug (">< actNullable "++show qIn++" <>") $ do case pIn of Empty -> return . addGroupResetsAC resets . addTagAC pre . addTagAC post . addGroupSetsAC sets $ ac Test ti -> return . addGroupResetsAC resets . addTagAC pre . addTestAC ti . addTagAC post . addGroupSetsAC sets $ ac OneChar {} -> err ("OneChar cannot have nullable True ") _ -> return . addGroupResetsAC resets . addTagAC pre =<< actNullableTagless qIn ( addTagAC post . addGroupSetsAC sets $ ac ) actNullableTagless qIn ac@(eLoop,mAccepting,mQNFA) = debug (">< actNullableTagless "++show (qIn)++" <>") $ do case unQ qIn of Seq q1 q2 -> actNullable q1 =<< actNullable q2 ac -- We know q1 and q2 are nullable Or [] -> return ac Or [q] -> actNullableTagless q ac Or qs -> do cqts <- do if all nullable qs then sequence [fmap snd3 $ actNullable q ac | q <- qs] else do e' <- asQNFA "qToNFA/actNullableTagless/Or" . getE $ ac let act' :: Q -> S (Maybe E) act' q = return . Just =<< getTrans q e' sequence [ if nullable q then fmap snd3 $ actNullable q ac else act' q | q <- qs ] let qts = map getQT (catMaybes cqts) eLoop' = case maybeOnlyEmpty qIn of Just wtags -> addWinTags wtags eLoop -- nullable without tests; avoid getQT Nothing -> fromQT $ applyNullViews (nullQ qIn) (getQT eLoop) -- suspect this of duplicating some tags with nullQ qIn mAccepting' = if null qts then fmap (fromQT . applyNullViews (nullQ qIn) . getQT) mAccepting -- suspect this of duplicating some tags with nullQ qIn else Just (fromQT $ foldr1 mergeAltQT qts) mQNFA' = if null qts then case maybeOnlyEmpty qIn of Just wtags -> fmap (addWinTags wtags) mQNFA Nothing -> Nothing else Nothing return (eLoop',mAccepting',mQNFA') Star mOrbit resetTheseOrbits mayFirstBeNull q -> do let (ac0@(_,mAccepting0,_),clear) = if notNullable q then (ac,True) else if null resetTheseOrbits && isNothing mOrbit then case maybeOnlyEmpty q of Just [] -> (ac,True) Just wtags -> (addWinTagsAC wtags ac,False) _ -> let nQ = fromQT . preferNullViews (nullQ q) . getQT in ((nQ eLoop,fmap nQ mAccepting,Nothing),False) else let nQ = fromQT . resetOrbitsQT resetTheseOrbits . enterOrbitQT mOrbit . preferNullViews (nullQ q) . getQT . leaveOrbit mOrbit in ((nQ eLoop,fmap nQ mAccepting,Nothing),False) if cannotAccept q then return ac0 else mdo mChildAccepting <- act q (this,Nothing,Nothing) (thisAC@(this,_,_),ansAC) <- case mChildAccepting of Nothing -> err $ "Weird pattern in getTransTagless/Star: " ++ show (qTop,qIn) Just childAccepting -> do let childQT = resetOrbitsQT resetTheseOrbits . enterOrbitQT mOrbit . getQT $ childAccepting thisQT = mergeQT childQT . getQT . leaveOrbit mOrbit . getE $ ac thisAccepting = case mAccepting of Just futureAccepting -> Just . fromQT . mergeQT childQT . getQT $ futureAccepting Nothing -> Just . fromQT $ childQT thisAll <- if usesQNFA q then do thisQNFA <- newQNFA "actNullableTagless/Star" thisQT return (fromQNFA thisQNFA, thisAccepting, Just (mempty,thisQNFA)) else return (fromQT thisQT, thisAccepting, Nothing) let skipQT = mergeQT childQT . getQT . getE $ ac0 -- for first iteration the continuation uses NullView skipAccepting = case mAccepting0 of Just futureAccepting0 -> Just . fromQT . mergeQT childQT . getQT $ futureAccepting0 Nothing -> Just . fromQT $ childQT ansAll = (fromQT skipQT, skipAccepting, Nothing) return (thisAll,ansAll) return (if mayFirstBeNull then (if clear then thisAC else ansAC) else thisAC) NonEmpty q -> ecart ("\n> actNullableTagless/NonEmpty"++show qIn) $ do -- We *know* that q is nullable from Pattern and CorePattern checks, but assert here anyway when (mustAccept q) (err $ "actNullableTagless/NonEmpty : provided with a *mustAccept* pattern: "++show (qTop,qIn)) when (cannotAccept q) (err $ "actNullableTagless/NonEmpty : provided with a *cannotAccept* pattern: "++show (qTop,qIn)) {- This is like actNullable (Or [Empty,q]) without the extra tag to prefer the first Empty branch -} let (clearE,_,_) = case maybeOnlyEmpty qIn of Just [] -> ac Just _wtags -> ac -- addWinTagsAC wtags ac XXX was duplicating tags Nothing -> err $ "actNullableTagless/NonEmpty is supposed to have an emptyNull nullView : "++show (qTop,qIn) (_,mChildAccepting,_) <- actNullable q ac case mChildAccepting of Nothing -> err $ "Weird pattern in actNullableTagless/NonEmpty: " ++ show (qTop,qIn) -- cannotAccept q checked for and excluded the above condition (and starTrans!) Just childAccepting -> do let childQT = getQT childAccepting thisAccepting = case mAccepting of Nothing -> Just . fromQT $ childQT Just futureAcceptingE -> Just . fromQT . mergeQT childQT . getQT $ futureAcceptingE -- I _think_ there is no need for mergeQT_2nd in the above. return (clearE,thisAccepting,Nothing) _ -> err $ "This case in Text.Regex.TNFA.TNFA.actNullableTagless cannot happen: "++show (qTop,qIn) -- This is applied directly to any qt immediately before passing to mergeQT resetOrbitsQT :: [Tag] -> QT -> QT resetOrbitsQT | lastStarGreedy compOpt = const id | otherwise = (\tags -> prependTags' [(tag,PreUpdate ResetOrbitTask)|tag<-tags]) enterOrbitQT :: Maybe Tag -> QT -> QT enterOrbitQT | lastStarGreedy compOpt = const id | otherwise = maybe id (\tag->prependTags' [(tag,PreUpdate EnterOrbitTask)]) leaveOrbit :: Maybe Tag -> E -> E leaveOrbit | lastStarGreedy compOpt = const id | otherwise = maybe id (\tag->(\(tags,cont)->((tag,LeaveOrbitTask):tags,cont))) -- 'newTrans' is the only place where PostUpdate is used and is only called from getTrans/OneChar -- and is the only caller of 'acceptTrans' to make QT/Simple nodes. newTrans :: String -- debugging string for when a newQNFA is allocated -> [Tag] -- which tags get ResetGroupStopTask in this transition (PreUpdate) -> Maybe Tag -- maybe one TagTask to update before incrementing the offset (PreUpdate) -> Pattern -- the one character accepting Pattern of this transition -> E -- the continuation state, reified to a QNFA, of after this Pattern -- The fst part of the E is consumed here as a TagTask (PostUpdate) -> S E -- the continuation state, as a QT, of before this Pattern newTrans s resets mPre pat (tags,cont) = do i <- case cont of Left qnfa -> return (q_id qnfa) -- strictQNFA ZZZ no help Right qt -> do qnfa <- newQNFA s qt -- strictQT ZZZ no help return (q_id qnfa) let post = promoteTasks PostUpdate tags pre = promoteTasks PreUpdate ([(tag,ResetGroupStopTask) | tag<-resets] ++ maybe [] (\tag -> [(tag,TagTask)]) mPre) return . fromQT $ acceptTrans pre pat post i -- fromQT $ strictQT no help -- 'acceptTrans' is the sole creator of QT/Simple and is only called by getTrans/OneChar/newTrans acceptTrans :: TagList -> Pattern -> TagList -> Index -> QT acceptTrans pre pIn post i = let target = IMap.singleton i [(getDoPa pIn,pre++post)] in case pIn of PChar _ char -> let trans = toMap target [char] in Simple { qt_win = mempty, qt_trans = trans, qt_other = mempty } PEscape _ char -> let trans = toMap target [char] in Simple { qt_win = mempty, qt_trans = trans, qt_other = mempty } PDot _ -> Simple { qt_win = mempty, qt_trans = dotTrans, qt_other = target } PAny _ ps -> let trans = toMap target . S.toAscList . decodePatternSet $ ps in Simple { qt_win = mempty, qt_trans = trans, qt_other = mempty } PAnyNot _ ps -> let trans = toMap mempty . S.toAscList . addNewline . decodePatternSet $ ps in Simple { qt_win = mempty, qt_trans = trans, qt_other = target } _ -> err ("Cannot acceptTrans pattern "++show (qTop,pIn)) where -- Take a common destination and a sorted list of unique chraceters -- and create a map from those characters to the common destination toMap :: IntMap [(DoPa,[(Tag, TagUpdate)])] -> [Char] -> CharMap (IntMap [(DoPa,[(Tag, TagUpdate)])]) toMap dest | caseSensitive compOpt = CharMap . IMap.fromDistinctAscList . map (\c -> (ord c,dest)) | otherwise = CharMap . IMap.fromList . ($ []) . foldr (\c dl -> if isAlpha c then (dl.((ord (toUpper c),dest):) .((ord (toLower c),dest):) ) else (dl.((ord c,dest):)) ) id addNewline | multiline compOpt = S.insert '\n' | otherwise = id dotTrans | multiline compOpt = Map.singleton '\n' mempty | otherwise = mempty {- prepend architecture becomes prependTags :: TagTask -> [Tag] -> QT -> QT which always uses PreUpdate and the same task for all the tags qt_win seems to only allow PreUpdate so why keep the same type? ADD ORPHAN ID check and make this a fatal error while testing -} -- | decodePatternSet cannot handle collating element and treats -- equivalence classes as just their definition and nothing more. decodePatternSet :: PatternSet -> S.Set Char decodePatternSet (PatternSet msc mscc _ msec) = let baseMSC = maybe S.empty id msc withMSCC = foldl (flip S.insert) baseMSC (maybe [] (concatMap decodeCharacterClass . S.toAscList) mscc) withMSEC = foldl (flip S.insert) withMSCC (maybe [] (concatMap unSEC . S.toAscList) msec) in withMSEC -- | This returns the disctince ascending list of characters -- represented by [: :] values in legalCharacterClasses; unrecognized -- class names return an empty string decodeCharacterClass :: PatternSetCharacterClass -> String decodeCharacterClass (PatternSetCharacterClass s) = case s of "alnum" -> ['0'..'9']++['a'..'z']++['A'..'Z'] "digit" -> ['0'..'9'] "punct" -> ['\33'..'\47']++['\58'..'\64']++['\91'..'\95']++"\96"++['\123'..'\126'] "alpha" -> ['a'..'z']++['A'..'Z'] "graph" -> ['\41'..'\126'] "space" -> "\t\n\v\f\r " "blank" -> "\t " "lower" -> ['a'..'z'] "upper" -> ['A'..'Z'] "cntrl" -> ['\0'..'\31']++"\127" -- with NUL "print" -> ['\32'..'\126'] "xdigit" -> ['0'..'9']++['a'..'f']++['A'..'F'] "word" -> ['0'..'9']++['a'..'z']++['A'..'Z']++"_" _ -> [] {- -- | This is the list of recognized [: :] character classes, others -- are decoded as empty. legalCharacterClasses :: [String] legalCharacterClasses = ["alnum","digit","punct","alpha","graph" ,"space","blank","lower","upper","cntrl","print","xdigit","word"] -}regex-tdfa-1.1.8/Text/Regex/TDFA/ByteString/0000755000000000000000000000000011536532136016550 5ustar0000000000000000regex-tdfa-1.1.8/Text/Regex/TDFA/ByteString/Lazy.hs0000644000000000000000000000711611536532136020030 0ustar0000000000000000{-| This modules provides 'RegexMaker' and 'RegexLike' instances for using 'ByteString' with the DFA backend ("Text.Regex.Lib.WrapDFAEngine" and "Text.Regex.Lazy.DFAEngineFPS"). This module is usually used via import "Text.Regex.TDFA". This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -} module Text.Regex.TDFA.ByteString.Lazy( Regex ,CompOption ,ExecOption ,compile ,execute ,regexec ) where import Data.Array.IArray((!),elems,amap) import qualified Data.ByteString.Lazy.Char8 as L(ByteString,take,drop,unpack) import Text.Regex.Base(MatchArray,RegexContext(..),RegexMaker(..),RegexLike(..)) import Text.Regex.Base.Impl(polymatch,polymatchM) import Text.Regex.TDFA.ReadRegex(parseRegex) import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String import Text.Regex.TDFA.TDFA(patternToRegex) import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups)) import Data.Maybe(listToMaybe) import Text.Regex.TDFA.NewDFA.Engine(execMatch) import Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} instance RegexContext Regex L.ByteString L.ByteString where match = polymatch matchM = polymatchM instance RegexMaker Regex CompOption ExecOption L.ByteString where makeRegexOptsM c e source = makeRegexOptsM c e (L.unpack source) instance RegexLike Regex L.ByteString where matchOnce r s = listToMaybe (matchAll r s) matchAll r s = execMatch r 0 '\n' s matchCount r s = length (matchAll r' s) where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } matchTest = Tester.matchTest matchOnceText regex source = fmap (\ma -> let (o32,l32) = ma!0 o = fi o32 l = fi l32 in (L.take o source ,fmap (\ol@(off32,len32) -> let off = fi off32 len = fi len32 in (L.take len (L.drop off source),ol)) ma ,L.drop (o+l) source)) (matchOnce regex source) matchAllText regex source = let go i _ _ | i `seq` False = undefined go _i _t [] = [] go i t (x:xs) = let (off0,len0) = x!0 trans pair@(off32,len32) = (L.take (fi len32) (L.drop (fi (off32-i)) t),pair) t' = L.drop (fi (off0+len0-i)) t in amap trans x : seq t' (go (off0+len0) t' xs) in go 0 source (matchAll regex source) fi :: (Integral a, Num b) => a -> b fi = fromIntegral compile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> L.ByteString -- ^ The regular expression to compile -> Either String Regex -- ^ Returns: the compiled regular expression compile compOpt execOpt bs = case parseRegex (L.unpack bs) of Left err -> Left ("parseRegex for Text.Regex.TDFA.ByteString failed:"++show err) Right pattern -> Right (patternToRegex pattern compOpt execOpt) execute :: Regex -- ^ Compiled regular expression -> L.ByteString -- ^ ByteString to match against -> Either String (Maybe MatchArray) execute r bs = Right (matchOnce r bs) regexec :: Regex -- ^ Compiled regular expression -> L.ByteString -- ^ ByteString to match against -> Either String (Maybe (L.ByteString, L.ByteString, L.ByteString, [L.ByteString])) regexec r bs = case matchOnceText r bs of Nothing -> Right (Nothing) Just (pre,mt,post) -> let main = fst (mt!0) rest = map fst (tail (elems mt)) -- will be [] in Right (Just (pre,main,post,rest)) regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/0000755000000000000000000000000011536532137015523 5ustar0000000000000000regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/Engine.hs0000644000000000000000000007674111536532137017303 0ustar0000000000000000-- | This is the code for the main engine. This captures the posix subexpressions. This 'execMatch' -- also dispatches to "Engine_NC", "Engine_FA", and "Engine_FC_NA" -- -- It is polymorphic over the internal Uncons type class, and specialized to produce the needed -- variants. module Text.Regex.TDFA.NewDFA.Engine(execMatch) where import Control.Monad(when,forM,forM_,liftM2,foldM,join,filterM) import Data.Array.Base(unsafeRead,unsafeWrite,STUArray(..)) -- #ifdef __GLASGOW_HASKELL__ import GHC.Arr(STArray(..)) import GHC.ST(ST(..)) import GHC.Prim(MutableByteArray#,RealWorld,Int#,sizeofMutableByteArray#,unsafeCoerce#) {- -- #else import Control.Monad.ST(ST) import Data.Array.ST(STArray) -- #endif -} import Prelude hiding ((!!)) import Data.Array.MArray(MArray(..),unsafeFreeze) import Data.Array.IArray(Array,bounds,assocs,Ix(rangeSize,range)) import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) import Data.IntMap(IntMap) import qualified Data.IntMap as IMap(null,toList,lookup,insert) import Data.Maybe(catMaybes) import Data.Monoid(Monoid(..)) import qualified Data.IntSet as ISet(toAscList) import Data.Array.IArray((!)) import Data.List(partition,sort,foldl',sortBy,groupBy) import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef) import qualified Control.Monad.ST.Lazy as L(ST,runST,strictToLazyST) import qualified Control.Monad.ST.Strict as S(ST) import Data.Sequence(Seq,ViewL(..),viewl) import qualified Data.Sequence as Seq(null) import qualified Data.ByteString.Char8 as SBS(ByteString) import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc) import Text.Regex.TDFA.Common hiding (indent) import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) import qualified Text.Regex.TDFA.NewDFA.Engine_FA as FA(execMatch) import qualified Text.Regex.TDFA.NewDFA.Engine_NC as NC(execMatch) import qualified Text.Regex.TDFA.NewDFA.Engine_NC_FA as NC_FA(execMatch) --import Debug.Trace -- trace :: String -> a -> a -- trace _ a = a {- see :: (Show x, Monad m) => String -> x -> m a -> m a see _ _ m = m --see msg s m = trace ("\nsee: "++msg++" : "++show s) m sees :: (Monad m) => String -> String -> m a -> m a sees _ _ m = m --sees msg s m = trace ("\nsee: "++msg++" :\n"++s) m -} err :: String -> a err s = common_error "Text.Regex.TDFA.NewDFA.Engine" s {-# INLINE (!!) #-} (!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e (!!) = unsafeRead {-# INLINE set #-} set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s () set = unsafeWrite {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] execMatch r@(Regex { regex_dfa = DFA {d_id=didIn,d_dt=dtIn} , regex_init = startState , regex_b_index = b_index , regex_b_tags = b_tags_all , regex_trie = trie , regex_tags = aTags , regex_groups = aGroups , regex_isFrontAnchored = frontAnchored , regex_compOptions = CompOption { multiline = newline } , regex_execOptions = ExecOption { captureGroups = capture }}) offsetIn prevIn inputIn = case (subCapture,frontAnchored) of (True ,False) -> L.runST runCaptureGroup (True ,True) -> FA.execMatch r offsetIn prevIn inputIn (False ,False) -> NC.execMatch r offsetIn prevIn inputIn (False ,True) -> NC_FA.execMatch r offsetIn prevIn inputIn where subCapture :: Bool subCapture = capture && (1<=rangeSize (bounds aGroups)) b_tags :: (Tag,Tag) !b_tags = b_tags_all orbitTags :: [Tag] !orbitTags = map fst . filter ((Orbit==).snd) . assocs $ aTags !test = mkTest newline comp :: C s comp = {-# SCC "matchHere.comp" #-} ditzyComp'3 aTags runCaptureGroup :: L.ST s [MatchArray] runCaptureGroup = {-# SCC "runCaptureGroup" #-} do obtainNext <- L.strictToLazyST constructNewEngine let loop = do vals <- L.strictToLazyST obtainNext if null vals -- force vals before defining valsRest then return [] -- end of capturing else do valsRest <- loop return (vals ++ valsRest) loop constructNewEngine :: S.ST s (S.ST s [MatchArray]) constructNewEngine = {-# SCC "constructNewEngine" #-} do storeNext <- newSTRef undefined writeSTRef storeNext (goNext storeNext) let obtainNext = join (readSTRef storeNext) return obtainNext goNext storeNext = {-# SCC "goNext" #-} do (SScratch s1In s2In (winQ,blank,which)) <- newScratch b_index b_tags _ <- spawnStart b_tags blank startState s1In offsetIn eliminatedStateFlag <- newSTRef False eliminatedRespawnFlag <- newSTRef False let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-} case dt of Testing' {dt_test=wt,dt_a=a,dt_b=b} -> if test wt offset prev input then next s1 s2 did a offset prev input else next s1 s2 did b offset prev input Simple' {dt_win=w,dt_trans=t, dt_other=o} | IMap.null w -> case uncons input of Nothing -> finalizeWinners Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> findTrans s1 s2 did did' dt' dtrans offset c input' | otherwise -> do (did',dt') <- processWinner s1 did dt w offset next' s1 s2 did' dt' offset prev input next' s1 s2 did dt offset prev input = {-# SCC "goNext.next'" #-} case dt of Testing' {dt_test=wt,dt_a=a,dt_b=b} -> if test wt offset prev input then next' s1 s2 did a offset prev input else next' s1 s2 did b offset prev input Simple' {dt_trans=t, dt_other=o} -> case uncons input of Nothing -> finalizeWinners Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> findTrans s1 s2 did did' dt' dtrans offset c input' -- compressOrbits gets all the current Tag-0 start information from -- the NFA states; then it loops through all the Orbit tags with -- compressOrbit. -- -- compressOrbit on such a Tag loops through all the NFS states' -- m_orbit record, discardind ones that are Nothing and discarding -- ones that are too new to care about (after the cutoff value). -- -- compressOrbit then groups the Orbits records by the Tag-0 start -- position and the basePos position. Entried in different groups -- will never be comparable in the future so they can be processed -- separately. Groups could probably be even more finely -- distinguished, as a futher optimization, but the justification will -- be tricky. -- -- Current Tag-0 values are at most offset and all newly spawned -- groups will have Tag-0 of at least (succ offset) so the current -- groups are closed to those spawned in the future. The basePos may -- be as large as offset and may be overwritten later with values of -- offset or larger (and this will also involve deleting the Orbits -- record). Thus there could be a future collision between a current -- group with basePos==offset and an updated record that acquires -- basePos==offset. By excluding groups with basePos before the -- current offset the collision between existing and future records -- is avoided. -- -- An entry in a group can only collide with that group's -- descendents. compressOrbit sends each group to the compressGroup -- command. -- -- compressGroup on a single record checks whether it's Seq can be -- cleared and if so it will clear it (and set ordinal to Nothing but -- this this not particularly important). -- -- compressGroup on many records sorts and groups the members and zips -- the groups with their new ordinal value. The comparision is based -- on the old ordinal value, then the inOrbit value, and then the (Seq -- Position) data. -- -- The old ordinals of the group will all be Nothing or all be Just, -- but this condition is neither checked nor violations detected. -- This comparision is justified because once records get different -- ordinals assigned they will never change places. -- -- The inOrbit Bool is only different if one of them has set the stop -- position to at most (succ offset). They will obly be compared if -- the other one leaves, an its stop position will be at least offset. -- The previous sentence is justified by inspectin of the "assemble" -- function in the TDFA module: there is no (PostUpdate -- LeaveOrbitTask) so the largest possible value for the stop Tag is -- (pred offset). Thus the record with inOrbit==False would beat (be -- GT than) the record with inOrbit==True. -- -- The Seq comparison is safe because the largest existing Position -- value is (pred offset) and the smallest future Position value is -- offset. The previous sentence is justified by inspectin of the -- "assemble" function in the TDFA module: there is no (PostUpdate -- EnterOrbitTags) so the largest possible value in the Seq is (pred -- offset). -- -- The updated Orbits get the new ordinal value and an empty (Seq -- Position). compressOrbits s1 did offset = do let getStart state = do start <- maybe (err "compressOrbit,1") (!! 0) =<< m_pos s1 !! state return (state,start) cutoff = offset - 50 -- Require: cutoff <= offset, MAGIC TUNABLE CONSTANT 50 ss <- mapM getStart (ISet.toAscList did) let compressOrbit tag = do mos <- forM ss ( \ p@(state,_start) -> do mo <- fmap (IMap.lookup tag) (m_orbit s1 !! state) case mo of Just orbits | basePos orbits < cutoff -> return (Just (p,orbits)) | otherwise -> return Nothing _ -> return Nothing ) let compressGroup [((state,_),orbit)] | Seq.null (getOrbits orbit) = return () | otherwise = set (m_orbit s1) state . (IMap.insert tag $! (orbit { ordinal = Nothing, getOrbits = mempty})) =<< m_orbit s1 !! state compressGroup gs = do let sortPos (_,b1) (_,b2) = compare (ordinal b1) (ordinal b2) `mappend` compare (inOrbit b2) (inOrbit b1) `mappend` comparePos (viewl (getOrbits b1)) (viewl (getOrbits b2)) groupPos (_,b1) (_,b2) = ordinal b1 == ordinal b2 && getOrbits b1 == getOrbits b2 gs' = zip [(1::Int)..] (groupBy groupPos . sortBy sortPos $ gs) forM_ gs' $ \ (!n,eqs) -> do forM_ eqs $ \ ((state,_),orbit) -> set (m_orbit s1) state . (IMap.insert tag $! (orbit { ordinal = Just n, getOrbits = mempty })) =<< m_orbit s1 !! state let sorter ((_,a1),b1) ((_,a2),b2) = compare a1 a2 `mappend` compare (basePos b1) (basePos b2) grouper ((_,a1),b1) ((_,a2),b2) = a1==a2 && basePos b1 == basePos b2 orbitGroups = groupBy grouper . sortBy sorter . catMaybes $ mos mapM_ compressGroup orbitGroups mapM_ compressOrbit orbitTags -- findTrans has to (part 1) decide, for each destination, "which" of -- zero or more source NFA states will be the chosen source. Then it -- has to (part 2) perform the transition or spawn. It keeps track of -- the starting index while doing so, and compares the earliest start -- with the stored winners. (part 3) If some winners are ready to be -- released then the future continuation of the search is placed in -- "storeNext". If no winners are ready to be released then the -- computation continues immediately. findTrans s1 s2 did did' dt' dtrans offset prev' input' = {-# SCC "goNext.findTrans" #-} do -- findTrans part 0 -- MAGIC TUNABLE CONSTANT 100 (and 100-1). TODO: (offset .&. 127 == 127) instead? when (not (null orbitTags) && (offset `rem` 100 == 99)) (compressOrbits s1 did offset) -- findTrans part 1 let findTransTo (destIndex,sources) | IMap.null sources = set which destIndex ((-1,Instructions { newPos = [(0,SetPost)], newOrbits = Nothing }) ,blank_pos blank,mempty) | otherwise = do let prep (sourceIndex,(_dopa,instructions)) = {-# SCC "goNext.findTrans.prep" #-} do pos <- maybe (err $ "findTrans,1 : "++show (sourceIndex,destIndex,did')) return =<< m_pos s1 !! sourceIndex orbit <- m_orbit s1 !! sourceIndex let orbit' = maybe orbit (\ f -> f offset orbit) (newOrbits instructions) return ((sourceIndex,instructions),pos,orbit') challenge x1@((_si1,ins1),_p1,_o1) x2@((_si2,ins2),_p2,_o2) = {-# SCC "goNext.findTrans.challenge" #-} do check <- comp offset x1 (newPos ins1) x2 (newPos ins2) if check==LT then return x2 else return x1 (first:rest) <- mapM prep (IMap.toList sources) set which destIndex =<< foldM challenge first rest let dl = IMap.toList dtrans mapM_ findTransTo dl -- findTrans part 2 let performTransTo (destIndex,_) = {-# SCC "goNext.findTrans.performTransTo" #-} do x@((sourceIndex,_instructions),_pos,_orbit') <- which !! destIndex if sourceIndex == (-1) then spawnStart b_tags blank destIndex s2 (succ offset) else updateCopy x offset s2 destIndex earlyStart <- fmap minimum $ mapM performTransTo dl -- findTrans part 3 earlyWin <- readSTRef (mq_earliest winQ) if earlyWin < earlyStart then do winners <- fmap (foldl' (\ rest ws -> ws : rest) []) $ getMQ earlyStart winQ writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input') mapM (tagsToGroupsST aGroups) winners else do let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input' -- The "newWinnerThenProceed" can find both a new non-empty winner and -- a new empty winner. A new non-empty winner can cause some of the -- NFA states that comprise the DFA state to be eliminated, and if the -- startState is eliminated then it must then be respawned. And -- imperative flag setting and resetting style is used. -- -- A non-empty winner from the startState might obscure a potential -- empty winner (form the startState at the current offset). This -- winEmpty possibility is also checked for. (unit test pattern ".*") -- (futher test "(.+|.+.)*" on "aa\n") {-# INLINE processWinner #-} processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do let prep x@(sourceIndex,instructions) = {-# SCC "goNext.newWinnerThenProceed.prep" #-} do pos <- maybe (err "newWinnerThenProceed,1") return =<< m_pos s1 !! sourceIndex startPos <- pos !! 0 orbit <- m_orbit s1 !! sourceIndex let orbit' = maybe orbit (\ f -> f offset orbit) (newOrbits instructions) return (startPos,(x,pos,orbit')) challenge x1@((_si1,ins1),_p1,_o1) x2@((_si2,ins2),_p2,_o2) = {-# SCC "goNext.newWinnerThenProceed.challenge" #-} do check <- comp offset x1 (newPos ins1) x2 (newPos ins2) if check==LT then return x2 else return x1 prep'd <- mapM prep (IMap.toList w) let (emptyFalse,emptyTrue) = partition ((offset >) . fst) prep'd mayID <- {-# SCC "goNext.newWinnerThenProceed.mayID" #-} case map snd emptyFalse of [] -> return Nothing (first:rest) -> do best@((_sourceIndex,_instructions),bp,_orbit') <- foldM challenge first rest newWinner offset best startWin <- bp !! 0 let states = ISet.toAscList did keepState i1 = do pos <- maybe (err "newWinnerThenProceed,2") return =<< m_pos s1 !! i1 startsAt <- pos !! 0 let keep = (startsAt <= startWin) || (offset <= startsAt) when (not keep) $ do writeSTRef eliminatedStateFlag True when (i1 == startState) (writeSTRef eliminatedRespawnFlag True) return keep states' <- filterM keepState states changed <- readSTRef eliminatedStateFlag if changed then return (Just states') else return Nothing case emptyTrue of [] -> case IMap.lookup startState w of Nothing -> return () Just ins -> winEmpty offset ins [first] -> newWinner offset (snd first) _ -> err "newWinnerThenProceed,3 : too many emptyTrue values" case mayID of Nothing -> return (did,dt) -- proceedNow s1 s2 did dt offset prev input Just states' -> do writeSTRef eliminatedStateFlag False respawn <- readSTRef eliminatedRespawnFlag DFA {d_id=did',d_dt=dt'} <- if respawn then do writeSTRef eliminatedRespawnFlag False _ <- spawnStart b_tags blank startState s1 (succ offset) return (Trie.lookupAsc trie (sort (states'++[startState]))) else return (Trie.lookupAsc trie states') return (did',dt') winEmpty preTag winInstructions = {-# SCC "goNext.winEmpty" #-} do newerPos <- newA_ b_tags copySTU (blank_pos blank) newerPos set newerPos 0 preTag doActions preTag newerPos (newPos winInstructions) putMQ (WScratch newerPos) winQ newWinner preTag ((_sourceIndex,winInstructions),oldPos,_newOrbit) = {-# SCC "goNext.newWinner" #-} do newerPos <- newA_ b_tags copySTU oldPos newerPos doActions preTag newerPos (newPos winInstructions) putMQ (WScratch newerPos) winQ finalizeWinners = do winners <- fmap (foldl' (\ rest mqa -> mqa_ws mqa : rest) []) $ readSTRef (mq_list winQ) -- reverses the winner list resetMQ winQ writeSTRef storeNext (return []) mapM (tagsToGroupsST aGroups) winners -- goNext then ends with the next statement next s1In s2In didIn dtIn offsetIn prevIn inputIn {-# INLINE doActions #-} doActions :: Position -> STUArray s Tag Position -> [(Tag, Action)] -> ST s () doActions preTag pos ins = mapM_ doAction ins where postTag = succ preTag doAction (tag,SetPre) = set pos tag preTag doAction (tag,SetPost) = set pos tag postTag doAction (tag,SetVal v) = set pos tag v ---- {-# INLINE mkTest #-} mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool mkTest isMultiline = if isMultiline then test_multiline else test_singleline ---- {- MUTABLE WINNER QUEUE -} data MQA s = MQA {mqa_start :: !Position, mqa_ws :: !(WScratch s)} data MQ s = MQ { mq_earliest :: !(STRef s Position) , mq_list :: !(STRef s [MQA s]) } newMQ :: S.ST s (MQ s) newMQ = do earliest <- newSTRef maxBound list <- newSTRef [] return (MQ earliest list) resetMQ :: MQ s -> S.ST s () resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do writeSTRef earliest maxBound writeSTRef list [] putMQ :: WScratch s -> MQ s -> S.ST s () putMQ ws (MQ {mq_earliest=earliest,mq_list=list}) = do start <- w_pos ws !! 0 let mqa = MQA start ws startE <- readSTRef earliest if start <= startE then writeSTRef earliest start >> writeSTRef list [mqa] else do old <- readSTRef list let !rest = dropWhile (\ m -> start <= mqa_start m) old !new = mqa : rest writeSTRef list new getMQ :: Position -> MQ s -> ST s [WScratch s] getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do old <- readSTRef list case span (\m -> pos <= mqa_start m) old of ([],ans) -> do writeSTRef earliest maxBound writeSTRef list [] return (map mqa_ws ans) (new,ans) -> do writeSTRef earliest (mqa_start (last new)) writeSTRef list new return (map mqa_ws ans) {- MUTABLE SCRATCH DATA STRUCTURES -} data SScratch s = SScratch { _s_1 :: !(MScratch s) , _s_2 :: !(MScratch s) , _s_rest :: !( MQ s , BlankScratch s , STArray s Index ((Index,Instructions),STUArray s Tag Position,OrbitLog) ) } data MScratch s = MScratch { m_pos :: !(STArray s Index (Maybe (STUArray s Tag Position))) , m_orbit :: !(STArray s Index OrbitLog) } newtype BlankScratch s = BlankScratch { blank_pos :: (STUArray s Tag Position) } newtype WScratch s = WScratch { w_pos :: (STUArray s Tag Position) } {- DEBUGGING HELPERS -} {- indent :: String -> String indent xs = ' ':' ':xs showMS :: MScratch s -> Index -> ST s String showMS s i = do ma <- m_pos s !! i mc <- m_orbit s !! i a <- case ma of Nothing -> return "No pos" Just pos -> fmap show (getAssocs pos) let c = show mc return $ unlines [ "MScratch, index = "++show i , indent a , indent c] showMS2 :: MScratch s -> ST s String showMS2 s = do (lo,hi) <- getBounds (m_pos s) strings <- forM [lo..hi] (showMS s) return (unlines strings) showWS :: WScratch s -> ST s String showWS (WScratch pos) = do a <- getAssocs pos return $ unlines [ "WScratch" , indent (show a)] -} {- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -} {-# INLINE newA #-} newA :: (MArray (STUArray s) e (ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e) newA b_tags initial = newArray b_tags initial {-# INLINE newA_ #-} newA_ :: (MArray (STUArray s) e (ST s)) => (Tag,Tag) -> S.ST s (STUArray s Tag e) newA_ b_tags = newArray_ b_tags newScratch :: (Index,Index) -> (Tag,Tag) -> S.ST s (SScratch s) newScratch b_index b_tags = do s1 <- newMScratch b_index s2 <- newMScratch b_index winQ <- newMQ blank <- fmap BlankScratch (newA b_tags (-1)) which <- (newArray b_index ((-1,err "newScratch which 1"),err "newScratch which 2",err "newScratch which 3")) return (SScratch s1 s2 (winQ,blank,which)) newMScratch :: (Index,Index) -> S.ST s (MScratch s) newMScratch b_index = do pos's <- newArray b_index Nothing orbit's <- newArray b_index mempty return (MScratch pos's orbit's) {- COMPOSE A FUNCTION CLOSURE TO COMPARE TAG VALUES -} newtype F s = F ([F s] -> C s) type C s = Position -> ((Int, Instructions), STUArray s Tag Position, IntMap Orbits) -> [(Int, Action)] -> ((Int, Instructions), STUArray s Tag Position, IntMap Orbits) -> [(Int, Action)] -> ST s Ordering {-# INLINE orderOf #-} orderOf :: Action -> Action -> Ordering orderOf post1 post2 = case (post1,post2) of (SetPre,SetPre) -> EQ (SetPost,SetPost) -> EQ (SetPre,SetPost) -> LT (SetPost,SetPre) -> GT (SetVal v1,SetVal v2) -> compare v1 v2 _ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (post1,post2) ditzyComp'3 :: forall s. Array Tag OP -> C s ditzyComp'3 aTagOP = comp0 where (F comp1:compsRest) = allcomps 1 comp0 :: C s comp0 preTag x1@(_state1,pos1,_orbit1') np1 x2@(_state2,pos2,_orbit2') np2 = do c <- liftM2 compare (pos2!!0) (pos1!!0) -- reversed since Minimize case c of EQ -> comp1 compsRest preTag x1 np1 x2 np2 answer -> return answer allcomps :: Tag -> [F s] allcomps tag | tag > top = [F (\ _ _ _ _ _ _ -> return EQ)] | otherwise = case aTagOP ! tag of Orbit -> F (challenge_Orb tag) : allcomps (succ tag) Maximize -> F (challenge_Max tag) : allcomps (succ tag) Ignore -> F (challenge_Ignore tag) : allcomps (succ tag) Minimize -> err "allcomps Minimize" where top = snd (bounds aTagOP) challenge_Ignore !tag (F next:comps) preTag x1 np1 x2 np2 = case np1 of ((t1,_):rest1) | t1==tag -> case np2 of ((t2,_):rest2) | t2==tag -> next comps preTag x1 rest1 x2 rest2 _ -> next comps preTag x1 rest1 x2 np2 _ -> do case np2 of ((t2,_):rest2) | t2==tag -> next comps preTag x1 np1 x2 rest2 _ -> next comps preTag x1 np1 x2 np2 challenge_Ignore _ [] _ _ _ _ _ = err "impossible 2347867" challenge_Max !tag (F next:comps) preTag x1@(_state1,pos1,_orbit1') np1 x2@(_state2,pos2,_orbit2') np2 = case np1 of ((t1,b1):rest1) | t1==tag -> case np2 of ((t2,b2):rest2) | t2==tag -> if b1==b2 then next comps preTag x1 rest1 x2 rest2 else return (orderOf b1 b2) _ -> do p2 <- pos2 !! tag let p1 = case b1 of SetPre -> preTag SetPost -> succ preTag SetVal v -> v if p1==p2 then next comps preTag x1 rest1 x2 np2 else return (compare p1 p2) _ -> do p1 <- pos1 !! tag case np2 of ((t2,b2):rest2) | t2==tag -> do let p2 = case b2 of SetPre -> preTag SetPost -> succ preTag SetVal v -> v if p1==p2 then next comps preTag x1 np1 x2 rest2 else return (compare p1 p2) _ -> do p2 <- pos2 !! tag if p1==p2 then next comps preTag x1 np1 x2 np2 else return (compare p1 p2) challenge_Max _ [] _ _ _ _ _ = err "impossible 9384324" challenge_Orb !tag (F next:comps) preTag x1@(_state1,_pos1,orbit1') np1 x2@(_state2,_pos2,orbit2') np2 = let s1 = IMap.lookup tag orbit1' s2 = IMap.lookup tag orbit2' in case (s1,s2) of (Nothing,Nothing) -> next comps preTag x1 np1 x2 np2 (Just o1,Just o2) | inOrbit o1 == inOrbit o2 -> case compare (ordinal o1) (ordinal o2) `mappend` comparePos (viewl (getOrbits o1)) (viewl (getOrbits o2)) of EQ -> next comps preTag x1 np1 x2 np2 answer -> return answer _ -> err $ unlines [ "challenge_Orb is too stupid to handle mismatched orbit data :" , show(tag,preTag,np1,np2) , show s1 , show s2 ] challenge_Orb _ [] _ _ _ _ _ = err "impossible 0298347" comparePos :: (ViewL Position) -> (ViewL Position) -> Ordering comparePos EmptyL EmptyL = EQ comparePos EmptyL _ = GT comparePos _ EmptyL = LT comparePos (p1 :< ps1) (p2 :< ps2) = compare p1 p2 `mappend` comparePos (viewl ps1) (viewl ps2) {- CONVERT WINNERS TO MATCHARRAY -} tagsToGroupsST :: forall s. Array GroupIndex [GroupInfo] -> WScratch s -> S.ST s MatchArray tagsToGroupsST aGroups (WScratch {w_pos=pos})= do let b_max = snd (bounds (aGroups)) ma <- newArray (0,b_max) (-1,0) :: ST s (STArray s Int (MatchOffset,MatchLength)) startPos0 <- pos !! 0 stopPos0 <- pos !! 1 set ma 0 (startPos0,stopPos0-startPos0) let act _this_index [] = return () act this_index ((GroupInfo _ parent start stop flagtag):gs) = do flagVal <- pos !! flagtag if (-1) == flagVal then act this_index gs else do startPos <- pos !! start stopPos <- pos !! stop (startParent,lengthParent) <- ma !! parent let ok = (0 <= startParent && 0 <= lengthParent && startParent <= startPos && stopPos <= startPos + lengthParent) if not ok then act this_index gs else set ma this_index (startPos,stopPos-startPos) forM_ (range (1,b_max)) $ (\i -> act i (aGroups!i)) unsafeFreeze ma {- MUTABLE TAGGED TRANSITION (returning Tag-0 value) -} {-# INLINE spawnStart #-} -- Reset the entry at "Index", or allocate such an entry. -- set tag 0 to the "Position" spawnStart :: (Tag,Tag) -> BlankScratch s -> Index -> MScratch s -> Position -> S.ST s Position spawnStart b_tags (BlankScratch blankPos) i s1 thisPos = do oldPos <- m_pos s1 !! i pos <- case oldPos of Nothing -> do pos' <- newA_ b_tags set (m_pos s1) i (Just pos') return pos' Just pos -> return pos copySTU blankPos pos set (m_orbit s1) i $! mempty set pos 0 thisPos return thisPos {-# INLINE updateCopy #-} updateCopy :: ((Index, Instructions), STUArray s Tag Position, OrbitLog) -> Index -> MScratch s -> Int -> ST s Position updateCopy ((_i1,instructions),oldPos,newOrbit) preTag s2 i2 = do b_tags <- getBounds oldPos newerPos <- maybe (do a <- newA_ b_tags set (m_pos s2) i2 (Just a) return a) return =<< m_pos s2 !! i2 copySTU oldPos newerPos doActions preTag newerPos (newPos instructions) set (m_orbit s2) i2 $! newOrbit newerPos !! 0 {- USING memcpy TO COPY STUARRAY DATA -} -- #ifdef __GLASGOW_HASKELL__ foreign import ccall unsafe "memcpy" memcpy :: MutableByteArray# RealWorld -> MutableByteArray# RealWorld -> Int# -> IO () {- Prelude Data.Array.Base> :i STUArray data STUArray s i e = STUArray !i !i !Int (GHC.Prim.MutableByteArray# s) -- Defined in Data.Array.Base -} -- This has been updated for ghc 6.8.3 and still works with ghc 6.10.1 {-# INLINE copySTU #-} copySTU :: (Show i,Ix i,MArray (STUArray s) e (S.ST s)) => STUArray s i e -> STUArray s i e -> S.ST s () -- (STUArray s i e) copySTU _souce@(STUArray _ _ _ msource) _destination@(STUArray _ _ _ mdest) = -- do b1 <- getBounds s1 -- b2 <- getBounds s2 -- when (b1/=b2) (error ("\n\nWTF copySTU: "++show (b1,b2))) ST $ \s1# -> case sizeofMutableByteArray# msource of { n# -> case unsafeCoerce# memcpy mdest msource n# s1# of { (# s2#, () #) -> (# s2#, () #) }} {- #else /* !__GLASGOW_HASKELL__ */ copySTU :: (MArray (STUArray s) e (S.ST s))=> STUArray s Tag e -> STUArray s Tag e -> S.ST s (STUArray s i e) copySTU source destination = do b@(start,stop) <- getBounds source b' <- getBounds destination -- traceCopy ("> copySTArray "++show b) $ do when (b/=b') (fail $ "Text.Regex.TDFA.RunMutState copySTUArray bounds mismatch"++show (b,b')) forM_ (range b) $ \index -> set destination index =<< source !! index return destination #endif /* !__GLASGOW_HASKELL__ */ -} regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/Engine_FA.hs0000644000000000000000000006145511536532137017645 0ustar0000000000000000-- | This is the code for the main engine. This captures the posix -- subexpressions. There is also a non-capturing engine, and a -- testing engine. -- -- It is polymorphic over the internal Uncons type class, and -- specialized to produce the needed variants. module Text.Regex.TDFA.NewDFA.Engine_FA(execMatch) where import Data.Array.Base(unsafeRead,unsafeWrite,STUArray(..)) -- #ifdef __GLASGOW_HASKELL__ import GHC.Arr(STArray(..)) import GHC.ST(ST(..)) import GHC.Prim(MutableByteArray#,RealWorld,Int#,sizeofMutableByteArray#,unsafeCoerce#) {- -- #else import Control.Monad.ST(ST) import Data.Array.ST(STArray) -- #endif -} import Prelude hiding ((!!)) import Control.Monad(when,unless,forM,forM_,liftM2,foldM) import Data.Array.MArray(MArray(..),unsafeFreeze) import Data.Array.IArray(Array,bounds,assocs,Ix(range)) import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) import Data.IntMap(IntMap) import qualified Data.IntMap as IMap(null,toList,lookup,insert) import Data.Maybe(catMaybes) import Data.Monoid(Monoid(..)) import qualified Data.IntSet as ISet(toAscList,null) import Data.Array.IArray((!)) import Data.List(sortBy,groupBy) import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef) import qualified Control.Monad.ST.Strict as S(ST,runST) import Data.Sequence(Seq,ViewL(..),viewl) import qualified Data.Sequence as Seq(null) import qualified Data.ByteString.Char8 as SBS(ByteString) import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) import Text.Regex.TDFA.Common hiding (indent) import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) --import Debug.Trace -- trace :: String -> a -> a -- trace _ a = a err :: String -> a err s = common_error "Text.Regex.TDFA.NewDFA.Engine_FA" s {-# INLINE (!!) #-} (!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e (!!) = unsafeRead {-# INLINE set #-} set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s () set = unsafeWrite noSource :: ((Index, Instructions),STUArray s Tag Position,OrbitLog) noSource = ((-1,err "noSource"),err "noSource",err "noSource") {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] execMatch (Regex { regex_dfa = DFA {d_id=didIn,d_dt=dtIn} , regex_init = startState , regex_b_index = b_index , regex_b_tags = b_tags_all , regex_tags = aTags , regex_groups = aGroups , regex_compOptions = CompOption { multiline = newline } } ) offsetIn prevIn inputIn = S.runST goNext where b_tags :: (Tag,Tag) !b_tags = b_tags_all orbitTags :: [Tag] !orbitTags = map fst . filter ((Orbit==).snd) . assocs $ aTags !test = mkTest newline comp :: C s comp = {-# SCC "matchHere.comp" #-} ditzyComp'3 aTags goNext = {-# SCC "goNext" #-} do (SScratch s1In s2In (winQ,blank,which)) <- newScratch b_index b_tags spawnAt b_tags blank startState s1In offsetIn let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-} case dt of Testing' {dt_test=wt,dt_a=a,dt_b=b} -> if test wt offset prev input then next s1 s2 did a offset prev input else next s1 s2 did b offset prev input Simple' {dt_win=w,dt_trans=t,dt_other=o} -> do unless (IMap.null w) $ processWinner s1 w offset case uncons input of Nothing -> finalizeWinner Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_single=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} | ISet.null did' -> finalizeWinner | otherwise -> findTrans s1 s2 did did' dt' dtrans offset c input' -- compressOrbits gets all the current Tag-0 start information from -- the NFA states; then it loops through all the Orbit tags with -- compressOrbit. -- -- compressOrbit on such a Tag loops through all the NFS states' -- m_orbit record, discardind ones that are Nothing and discarding -- ones that are too new to care about (after the cutoff value). -- -- compressOrbit then groups the Orbits records by the Tag-0 start -- position and the basePos position. Entried in different groups -- will never be comparable in the future so they can be processed -- separately. Groups could probably be even more finely -- distinguished, as a futher optimization, but the justification will -- be tricky. -- -- Current Tag-0 values are at most offset and all newly spawned -- groups will have Tag-0 of at least (succ offset) so the current -- groups are closed to those spawned in the future. The basePos may -- be as large as offset and may be overwritten later with values of -- offset or larger (and this will also involve deleting the Orbits -- record). Thus there could be a future collision between a current -- group with basePos==offset and an updated record that acquires -- basePos==offset. By excluding groups with basePos before the -- current offset the collision between existing and future records -- is avoided. -- -- An entry in a group can only collide with that group's -- descendents. compressOrbit sends each group to the compressGroup -- command. -- -- compressGroup on a single record checks whether it's Seq can be -- cleared and if so it will clear it (and set ordinal to Nothing but -- this this not particularly important). -- -- compressGroup on many records sorts and groups the members and zips -- the groups with their new ordinal value. The comparision is based -- on the old ordinal value, then the inOrbit value, and then the (Seq -- Position) data. -- -- The old ordinals of the group will all be Nothing or all be Just, -- but this condition is neither checked nor violations detected. -- This comparision is justified because once records get different -- ordinals assigned they will never change places. -- -- The inOrbit Bool is only different if one of them has set the stop -- position to at most (succ offset). They will obly be compared if -- the other one leaves, an its stop position will be at least offset. -- The previous sentence is justified by inspectin of the "assemble" -- function in the TDFA module: there is no (PostUpdate -- LeaveOrbitTask) so the largest possible value for the stop Tag is -- (pred offset). Thus the record with inOrbit==False would beat (be -- GT than) the record with inOrbit==True. -- -- The Seq comparison is safe because the largest existing Position -- value is (pred offset) and the smallest future Position value is -- offset. The previous sentence is justified by inspectin of the -- "assemble" function in the TDFA module: there is no (PostUpdate -- EnterOrbitTags) so the largest possible value in the Seq is (pred -- offset). -- -- The updated Orbits get the new ordinal value and an empty (Seq -- Position). compressOrbits s1 did offset = do let getStart state = do start <- maybe (err "compressOrbit,1") (!! 0) =<< m_pos s1 !! state return (state,start) cutoff = offset - 50 -- Require: cutoff <= offset, MAGIC TUNABLE CONSTANT 50 ss <- mapM getStart (ISet.toAscList did) let compressOrbit tag = do mos <- forM ss ( \ p@(state,_start) -> do mo <- fmap (IMap.lookup tag) (m_orbit s1 !! state) case mo of Just orbits | basePos orbits < cutoff -> return (Just (p,orbits)) | otherwise -> return Nothing _ -> return Nothing ) let compressGroup [((state,_),orbit)] | Seq.null (getOrbits orbit) = return () | otherwise = set (m_orbit s1) state . (IMap.insert tag $! (orbit { ordinal = Nothing, getOrbits = mempty})) =<< m_orbit s1 !! state compressGroup gs = do let sortPos (_,b1) (_,b2) = compare (ordinal b1) (ordinal b2) `mappend` compare (inOrbit b2) (inOrbit b1) `mappend` comparePos (viewl (getOrbits b1)) (viewl (getOrbits b2)) groupPos (_,b1) (_,b2) = ordinal b1 == ordinal b2 && getOrbits b1 == getOrbits b2 gs' = zip [(1::Int)..] (groupBy groupPos . sortBy sortPos $ gs) forM_ gs' $ \ (!n,eqs) -> do forM_ eqs $ \ ((state,_),orbit) -> set (m_orbit s1) state . (IMap.insert tag $! (orbit { ordinal = Just n, getOrbits = mempty })) =<< m_orbit s1 !! state let sorter ((_,a1),b1) ((_,a2),b2) = compare a1 a2 `mappend` compare (basePos b1) (basePos b2) grouper ((_,a1),b1) ((_,a2),b2) = a1==a2 && basePos b1 == basePos b2 orbitGroups = groupBy grouper . sortBy sorter . catMaybes $ mos mapM_ compressGroup orbitGroups mapM_ compressOrbit orbitTags -- findTrans has to (part 1) decide, for each destination, "which" of -- zero or more source NFA states will be the chosen source. Then it -- has to (part 2) perform the transition or spawn. It keeps track of -- the starting index while doing so, and compares the earliest start -- with the stored winners. (part 3) If some winners are ready to be -- released then the future continuation of the search is placed in -- "storeNext". If no winners are ready to be released then the -- computation continues immediately. findTrans s1 s2 did did' dt' dtrans offset prev' input' = {-# SCC "goNext.findTrans" #-} do -- findTrans part 0 -- MAGIC TUNABLE CONSTANT 100 (and 100-1). TODO: (offset .&. 127 == 127) instead? when (not (null orbitTags) && (offset `rem` 100 == 99)) (compressOrbits s1 did offset) -- findTrans part 1 let findTransTo (destIndex,sources) | IMap.null sources = set which destIndex noSource | otherwise = do let prep (sourceIndex,(_dopa,instructions)) = {-# SCC "goNext.findTrans.prep" #-} do pos <- maybe (err $ "findTrans,1 : "++show (sourceIndex,destIndex,did')) return =<< m_pos s1 !! sourceIndex orbit <- m_orbit s1 !! sourceIndex let orbit' = maybe orbit (\ f -> f offset orbit) (newOrbits instructions) return ((sourceIndex,instructions),pos,orbit') challenge x1@((_si1,ins1),_p1,_o1) x2@((_si2,ins2),_p2,_o2) = {-# SCC "goNext.findTrans.challenge" #-} do check <- comp offset x1 (newPos ins1) x2 (newPos ins2) if check==LT then return x2 else return x1 (first:rest) <- mapM prep (IMap.toList sources) set which destIndex =<< foldM challenge first rest let dl = IMap.toList dtrans mapM_ findTransTo dl -- findTrans part 2 let performTransTo (destIndex,_sources) = {-# SCC "goNext.findTrans.performTransTo" #-} do x@((sourceIndex,_instructions),_pos,_orbit') <- which !! destIndex unless (sourceIndex == (-1)) $ (updateCopy x offset s2 destIndex) mapM_ performTransTo dl -- findTrans part 3 let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input' -- The "newWinnerThenProceed" can find both a new non-empty winner and -- a new empty winner. A new non-empty winner can cause some of the -- NFA states that comprise the DFA state to be eliminated, and if the -- startState is eliminated then it must then be respawned. And -- imperative flag setting and resetting style is used. -- -- A non-empty winner from the startState might obscure a potential -- empty winner (form the startState at the current offset). This -- winEmpty possibility is also checked for. (unit test pattern ".*") -- (futher test "(.+|.+.)*" on "aa\n") {-# INLINE processWinner #-} processWinner s1 w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do let prep x@(sourceIndex,instructions) = {-# SCC "goNext.newWinnerThenProceed.prep" #-} do pos <- maybe (err "newWinnerThenProceed,1") return =<< m_pos s1 !! sourceIndex startPos <- pos !! 0 orbit <- m_orbit s1 !! sourceIndex let orbit' = maybe orbit (\ f -> f offset orbit) (newOrbits instructions) return (startPos,(x,pos,orbit')) challenge x1@((_si1,ins1),_p1,_o1) x2@((_si2,ins2),_p2,_o2) = {-# SCC "goNext.newWinnerThenProceed.challenge" #-} do check <- comp offset x1 (newPos ins1) x2 (newPos ins2) if check==LT then return x2 else return x1 prep'd <- mapM prep (IMap.toList w) case map snd prep'd of [] -> return () (first:rest) -> newWinner offset =<< foldM challenge first rest newWinner preTag ((_sourceIndex,winInstructions),oldPos,_newOrbit) = {-# SCC "goNext.newWinner" #-} do newerPos <- newA_ b_tags copySTU oldPos newerPos doActions preTag newerPos (newPos winInstructions) putMQ (WScratch newerPos) winQ finalizeWinner = do mWinner <- readSTRef (mq_mWin winQ) case mWinner of Nothing -> return [] Just winner -> resetMQ winQ >> mapM (tagsToGroupsST aGroups) [winner] -- goNext then ends with the next statement next s1In s2In didIn dtIn offsetIn prevIn inputIn {-# INLINE doActions #-} doActions :: Position -> STUArray s Tag Position -> [(Tag, Action)] -> ST s () doActions preTag pos ins = mapM_ doAction ins where postTag = succ preTag doAction (tag,SetPre) = set pos tag preTag doAction (tag,SetPost) = set pos tag postTag doAction (tag,SetVal v) = set pos tag v ---- {-# INLINE mkTest #-} mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool mkTest isMultiline = if isMultiline then test_multiline else test_singleline ---- {- MUTABLE WINNER QUEUE -} newtype MQ s = MQ { mq_mWin :: STRef s (Maybe (WScratch s)) } newMQ :: S.ST s (MQ s) newMQ = do mWin <- newSTRef Nothing return (MQ mWin) resetMQ :: MQ s -> S.ST s () resetMQ (MQ {mq_mWin=mWin}) = do writeSTRef mWin Nothing putMQ :: WScratch s -> MQ s -> S.ST s () putMQ ws (MQ {mq_mWin=mWin}) = do writeSTRef mWin (Just ws) {- MUTABLE SCRATCH DATA STRUCTURES -} data SScratch s = SScratch { _s_1 :: !(MScratch s) , _s_2 :: !(MScratch s) , _s_rest :: !( MQ s , BlankScratch s , STArray s Index ((Index,Instructions),STUArray s Tag Position,OrbitLog) ) } data MScratch s = MScratch { m_pos :: !(STArray s Index (Maybe (STUArray s Tag Position))) , m_orbit :: !(STArray s Index OrbitLog) } newtype BlankScratch s = BlankScratch { _blank_pos :: (STUArray s Tag Position) } newtype WScratch s = WScratch { w_pos :: (STUArray s Tag Position) } {- DEBUGGING HELPERS -} {- indent :: String -> String indent xs = ' ':' ':xs showMS :: MScratch s -> Index -> ST s String showMS s i = do ma <- m_pos s !! i mc <- m_orbit s !! i a <- case ma of Nothing -> return "No pos" Just pos -> fmap show (getAssocs pos) let c = show mc return $ unlines [ "MScratch, index = "++show i , indent a , indent c] showWS :: WScratch s -> ST s String showWS (WScratch pos) = do a <- getAssocs pos return $ unlines [ "WScratch" , indent (show a)] -} {- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -} {-# INLINE newA #-} newA :: (MArray (STUArray s) e (ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e) newA b_tags initial = newArray b_tags initial {-# INLINE newA_ #-} newA_ :: (MArray (STUArray s) e (ST s)) => (Tag,Tag) -> S.ST s (STUArray s Tag e) newA_ b_tags = newArray_ b_tags newScratch :: (Index,Index) -> (Tag,Tag) -> S.ST s (SScratch s) newScratch b_index b_tags = do s1 <- newMScratch b_index s2 <- newMScratch b_index winQ <- newMQ blank <- fmap BlankScratch (newA b_tags (-1)) which <- (newArray b_index ((-1,err "newScratch which 1"),err "newScratch which 2",err "newScratch which 3")) return (SScratch s1 s2 (winQ,blank,which)) newMScratch :: (Index,Index) -> S.ST s (MScratch s) newMScratch b_index = do pos's <- newArray b_index Nothing orbit's <- newArray b_index mempty return (MScratch pos's orbit's) {- COMPOSE A FUNCTION CLOSURE TO COMPARE TAG VALUES -} newtype F s = F ([F s] -> C s) type C s = Position -> ((Int, Instructions), STUArray s Tag Position, IntMap Orbits) -> [(Int, Action)] -> ((Int, Instructions), STUArray s Tag Position, IntMap Orbits) -> [(Int, Action)] -> ST s Ordering {-# INLINE orderOf #-} orderOf :: Action -> Action -> Ordering orderOf post1 post2 = case (post1,post2) of (SetPre,SetPre) -> EQ (SetPost,SetPost) -> EQ (SetPre,SetPost) -> LT (SetPost,SetPre) -> GT (SetVal v1,SetVal v2) -> compare v1 v2 _ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (post1,post2) ditzyComp'3 :: forall s. Array Tag OP -> C s ditzyComp'3 aTagOP = comp0 where (F comp1:compsRest) = allcomps 1 comp0 :: C s comp0 preTag x1@(_state1,pos1,_orbit1') np1 x2@(_state2,pos2,_orbit2') np2 = do c <- liftM2 compare (pos2!!0) (pos1!!0) -- reversed since Minimize case c of EQ -> comp1 compsRest preTag x1 np1 x2 np2 answer -> return answer allcomps :: Tag -> [F s] allcomps tag | tag > top = [F (\ _ _ _ _ _ _ -> return EQ)] | otherwise = case aTagOP ! tag of Orbit -> F (challenge_Orb tag) : allcomps (succ tag) Maximize -> F (challenge_Max tag) : allcomps (succ tag) Ignore -> F (challenge_Ignore tag) : allcomps (succ tag) Minimize -> err "allcomps Minimize" where top = snd (bounds aTagOP) challenge_Ignore !tag (F next:comps) preTag x1 np1 x2 np2 = case np1 of ((t1,_):rest1) | t1==tag -> case np2 of ((t2,_):rest2) | t2==tag -> next comps preTag x1 rest1 x2 rest2 _ -> next comps preTag x1 rest1 x2 np2 _ -> do case np2 of ((t2,_):rest2) | t2==tag -> next comps preTag x1 np1 x2 rest2 _ -> next comps preTag x1 np1 x2 np2 challenge_Ignore _ [] _ _ _ _ _ = err "impossible 2347867" challenge_Max !tag (F next:comps) preTag x1@(_state1,pos1,_orbit1') np1 x2@(_state2,pos2,_orbit2') np2 = case np1 of ((t1,b1):rest1) | t1==tag -> case np2 of ((t2,b2):rest2) | t2==tag -> if b1==b2 then next comps preTag x1 rest1 x2 rest2 else return (orderOf b1 b2) _ -> do p2 <- pos2 !! tag let p1 = case b1 of SetPre -> preTag SetPost -> succ preTag SetVal v -> v if p1==p2 then next comps preTag x1 rest1 x2 np2 else return (compare p1 p2) _ -> do p1 <- pos1 !! tag case np2 of ((t2,b2):rest2) | t2==tag -> do let p2 = case b2 of SetPre -> preTag SetPost -> succ preTag SetVal v -> v if p1==p2 then next comps preTag x1 np1 x2 rest2 else return (compare p1 p2) _ -> do p2 <- pos2 !! tag if p1==p2 then next comps preTag x1 np1 x2 np2 else return (compare p1 p2) challenge_Max _ [] _ _ _ _ _ = err "impossible 9384324" challenge_Orb !tag (F next:comps) preTag x1@(_state1,_pos1,orbit1') np1 x2@(_state2,_pos2,orbit2') np2 = let s1 = IMap.lookup tag orbit1' s2 = IMap.lookup tag orbit2' in case (s1,s2) of (Nothing,Nothing) -> next comps preTag x1 np1 x2 np2 (Just o1,Just o2) | inOrbit o1 == inOrbit o2 -> case compare (ordinal o1) (ordinal o2) `mappend` comparePos (viewl (getOrbits o1)) (viewl (getOrbits o2)) of EQ -> next comps preTag x1 np1 x2 np2 answer -> return answer _ -> err $ unlines [ "challenge_Orb is too stupid to handle mismatched orbit data :" , show(tag,preTag,np1,np2) , show s1 , show s2 ] challenge_Orb _ [] _ _ _ _ _ = err "impossible 0298347" comparePos :: (ViewL Position) -> (ViewL Position) -> Ordering comparePos EmptyL EmptyL = EQ comparePos EmptyL _ = GT comparePos _ EmptyL = LT comparePos (p1 :< ps1) (p2 :< ps2) = compare p1 p2 `mappend` comparePos (viewl ps1) (viewl ps2) {- CONVERT WINNERS TO MATCHARRAY -} tagsToGroupsST :: forall s. Array GroupIndex [GroupInfo] -> WScratch s -> S.ST s MatchArray tagsToGroupsST aGroups (WScratch {w_pos=pos})= do let b_max = snd (bounds (aGroups)) ma <- newArray (0,b_max) (-1,0) :: ST s (STArray s Int (MatchOffset,MatchLength)) startPos0 <- pos !! 0 stopPos0 <- pos !! 1 set ma 0 (startPos0,stopPos0-startPos0) let act _this_index [] = return () act this_index ((GroupInfo _ parent start stop flagtag):gs) = do flagVal <- pos !! flagtag if (-1) == flagVal then act this_index gs else do startPos <- pos !! start stopPos <- pos !! stop (startParent,lengthParent) <- ma !! parent let ok = (0 <= startParent && 0 <= lengthParent && startParent <= startPos && stopPos <= startPos + lengthParent) if not ok then act this_index gs else set ma this_index (startPos,stopPos-startPos) forM_ (range (1,b_max)) $ (\i -> act i (aGroups!i)) unsafeFreeze ma {- MUTABLE TAGGED TRANSITION (returning Tag-0 value) -} {-# INLINE spawnAt #-} -- Reset the entry at "Index", or allocate such an entry. -- set tag 0 to the "Position" spawnAt :: (Tag,Tag) -> BlankScratch s -> Index -> MScratch s -> Position -> S.ST s () spawnAt b_tags (BlankScratch blankPos) i s1 thisPos = do oldPos <- m_pos s1 !! i pos <- case oldPos of Nothing -> do pos' <- newA_ b_tags set (m_pos s1) i (Just pos') return pos' Just pos -> return pos copySTU blankPos pos set (m_orbit s1) i $! mempty set pos 0 thisPos {-# INLINE updateCopy #-} updateCopy :: ((Index, Instructions), STUArray s Tag Position, OrbitLog) -> Index -> MScratch s -> Int -> ST s () updateCopy ((_i1,instructions),oldPos,newOrbit) preTag s2 i2 = do b_tags <- getBounds oldPos newerPos <- maybe (do a <- newA_ b_tags set (m_pos s2) i2 (Just a) return a) return =<< m_pos s2 !! i2 copySTU oldPos newerPos doActions preTag newerPos (newPos instructions) set (m_orbit s2) i2 $! newOrbit {- USING memcpy TO COPY STUARRAY DATA -} -- #ifdef __GLASGOW_HASKELL__ foreign import ccall unsafe "memcpy" memcpy :: MutableByteArray# RealWorld -> MutableByteArray# RealWorld -> Int# -> IO () {- Prelude Data.Array.Base> :i STUArray data STUArray s i e = STUArray !i !i !Int (GHC.Prim.MutableByteArray# s) -- Defined in Data.Array.Base -} -- This has been updated for ghc 6.8.3 and still works with ghc 6.10.1 {-# INLINE copySTU #-} copySTU :: (Show i,Ix i,MArray (STUArray s) e (S.ST s)) => STUArray s i e -> STUArray s i e -> S.ST s () -- (STUArray s i e) copySTU _souce@(STUArray _ _ _ msource) _destination@(STUArray _ _ _ mdest) = -- do b1 <- getBounds s1 -- b2 <- getBounds s2 -- when (b1/=b2) (error ("\n\nWTF copySTU: "++show (b1,b2))) ST $ \s1# -> case sizeofMutableByteArray# msource of { n# -> case unsafeCoerce# memcpy mdest msource n# s1# of { (# s2#, () #) -> (# s2#, () #) }} {- #else /* !__GLASGOW_HASKELL__ */ copySTU :: (MArray (STUArray s) e (S.ST s))=> STUArray s Tag e -> STUArray s Tag e -> S.ST s (STUArray s i e) copySTU source destination = do b@(start,stop) <- getBounds source b' <- getBounds destination -- traceCopy ("> copySTArray "++show b) $ do when (b/=b') (fail $ "Text.Regex.TDFA.RunMutState copySTUArray bounds mismatch"++show (b,b')) forM_ (range b) $ \index -> set destination index =<< source !! index return destination #endif /* !__GLASGOW_HASKELL__ */ -} regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/Engine_NC.hs0000644000000000000000000002336111536532137017651 0ustar0000000000000000-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String module Text.Regex.TDFA.NewDFA.Engine_NC(execMatch) where import Control.Monad(when,join,filterM) import Data.Array.Base(unsafeRead,unsafeWrite) import Prelude hiding ((!!)) import Data.Array.MArray(MArray(..),unsafeFreeze) import Data.Array.IArray(Ix) import Data.Array.ST(STArray,STUArray) import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) import qualified Data.IntMap as IMap(null,toList,keys,member) import qualified Data.IntSet as ISet(toAscList) import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef) import qualified Control.Monad.ST.Lazy as L(runST,strictToLazyST) import qualified Control.Monad.ST.Strict as S(ST) import Data.Sequence(Seq) import qualified Data.ByteString.Char8 as SBS(ByteString) import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc) import Text.Regex.TDFA.Common hiding (indent) import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) -- import Debug.Trace -- trace :: String -> a -> a -- trace _ a = a err :: String -> a err s = common_error "Text.Regex.TDFA.NewDFA.Engine_NC" s {-# INLINE (!!) #-} (!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e (!!) = unsafeRead {-# INLINE set #-} set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s () set = unsafeWrite {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] execMatch (Regex { regex_dfa = (DFA {d_id=didIn,d_dt=dtIn}) , regex_init = startState , regex_b_index = b_index , regex_trie = trie , regex_compOptions = CompOption { multiline = newline } } ) offsetIn prevIn inputIn = L.runST runCaptureGroup where !test = mkTest newline runCaptureGroup = {-# SCC "runCaptureGroup" #-} do obtainNext <- L.strictToLazyST constructNewEngine let loop = do vals <- L.strictToLazyST obtainNext if null vals -- force vals before defining valsRest then return [] else do valsRest <- loop return (vals ++ valsRest) loop constructNewEngine :: S.ST s (S.ST s [MatchArray]) constructNewEngine = {-# SCC "constructNewEngine" #-} do storeNext <- newSTRef undefined writeSTRef storeNext (goNext storeNext) let obtainNext = join (readSTRef storeNext) return obtainNext goNext storeNext = {-# SCC "goNext" #-} do (SScratch s1In s2In winQ) <- newScratch b_index set s1In startState offsetIn writeSTRef storeNext (err "obtainNext called while goNext is running!") eliminatedStateFlag <- newSTRef False let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-} case dt of Testing' {dt_test=wt,dt_a=a,dt_b=b} -> if test wt offset prev input then next s1 s2 did a offset prev input else next s1 s2 did b offset prev input Simple' {dt_win=w,dt_trans=t, dt_other=o} | IMap.null w -> case uncons input of Nothing -> finalizeWinners Just (c,input') -> do case CMap.findWithDefault o c t of Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> findTrans s1 s2 did' dt' dtrans offset c input' | otherwise -> do (did',dt') <- processWinner s1 did dt w offset next' s1 s2 did' dt' offset prev input next' s1 s2 did dt offset prev input = {-# SCC "goNext'.next" #-} case dt of Testing' {dt_test=wt,dt_a=a,dt_b=b} -> if test wt offset prev input then next' s1 s2 did a offset prev input else next' s1 s2 did b offset prev input Simple' {dt_trans=t, dt_other=o} -> case uncons input of Nothing -> finalizeWinners Just (c,input') -> do case CMap.findWithDefault o c t of Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> findTrans s1 s2 did' dt' dtrans offset c input' findTrans s1 s2 did' dt' dtrans offset prev' input' = {-# SCC "goNext.findTrans" #-} do -- let findTransTo (destIndex,sources) = do val <- if IMap.null sources then return (succ offset) else return . minimum =<< mapM (s1 !!) (IMap.keys sources) set s2 destIndex val return val earlyStart <- fmap minimum $ mapM findTransTo (IMap.toList dtrans) -- earlyWin <- readSTRef (mq_earliest winQ) if earlyWin < earlyStart then do winnersR <- getMQ earlyStart winQ writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input') mapM wsToGroup (reverse winnersR) else do let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input' processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do let getStart (sourceIndex,_) = s1 !! sourceIndex vals <- mapM getStart (IMap.toList w) let low = minimum vals -- perhaps a non-empty winner high = maximum vals -- perhaps an empty winner if low < offset then do putMQ (WScratch low offset) winQ when (high==offset || IMap.member startState w) $ putMQ (WScratch offset offset) winQ let keepState i1 = do startsAt <- s1 !! i1 let keep = (startsAt <= low) || (offset <= startsAt) if keep then return True else if i1 == startState then {- check for additional empty winner -} set s1 i1 (succ offset) >> return True else writeSTRef eliminatedStateFlag True >> return False states' <- filterM keepState (ISet.toAscList did) flag <- readSTRef eliminatedStateFlag if flag then do writeSTRef eliminatedStateFlag False let DFA {d_id=did',d_dt=dt'} = Trie.lookupAsc trie states' return (did',dt') else do return (did,dt) else do -- offset == low == minimum vals == maximum vals == high; vals == [offset] putMQ (WScratch offset offset) winQ return (did,dt) finalizeWinners = do winnersR <- readSTRef (mq_list winQ) resetMQ winQ writeSTRef storeNext (return []) mapM wsToGroup (reverse winnersR) -- goNext then ends with the next statement next s1In s2In didIn dtIn offsetIn prevIn inputIn ---- {-# INLINE mkTest #-} mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool mkTest isMultiline = if isMultiline then test_multiline else test_singleline ---- {- MUTABLE WINNER QUEUE -} data MQ s = MQ { mq_earliest :: !(STRef s Position) , mq_list :: !(STRef s [WScratch]) } newMQ :: S.ST s (MQ s) newMQ = do earliest <- newSTRef maxBound list <- newSTRef [] return (MQ earliest list) resetMQ :: MQ s -> S.ST s () resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do writeSTRef earliest maxBound writeSTRef list [] putMQ :: WScratch -> MQ s -> S.ST s () putMQ ws@(WScratch {ws_start=start}) (MQ {mq_earliest=earliest,mq_list=list}) = do startE <- readSTRef earliest if start <= startE then writeSTRef earliest start >> writeSTRef list [ws] else do old <- readSTRef list let !rest = dropWhile (\ w -> start <= ws_start w) old !new = ws : rest writeSTRef list new getMQ :: Position -> MQ s -> S.ST s [WScratch] getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do old <- readSTRef list case span (\ w -> pos <= ws_start w) old of ([],ans) -> do writeSTRef earliest maxBound writeSTRef list [] return ans (new,ans) -> do writeSTRef earliest (ws_start (last new)) writeSTRef list new return ans {- MUTABLE SCRATCH DATA STRUCTURES -} data SScratch s = SScratch { _s_1 :: !(MScratch s) , _s_2 :: !(MScratch s) , _s_mq :: !(MQ s) } type MScratch s = STUArray s Index Position data WScratch = WScratch {ws_start,_ws_stop :: !Position} deriving Show {- DEBUGGING HELPERS -} {- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -} {-# INLINE newA #-} newA :: (MArray (STUArray s) e (S.ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e) newA b_tags initial = newArray b_tags initial newScratch :: (Index,Index) -> S.ST s (SScratch s) newScratch b_index = do s1 <- newMScratch b_index s2 <- newMScratch b_index winQ <- newMQ return (SScratch s1 s2 winQ) newMScratch :: (Index,Index) -> S.ST s (MScratch s) newMScratch b_index = newA b_index (-1) {- CONVERT WINNERS TO MATCHARRAY -} wsToGroup :: WScratch -> S.ST s MatchArray wsToGroup (WScratch start stop) = do ma <- newArray (0,0) (start,stop-start) :: S.ST s (STArray s Int (MatchOffset,MatchLength)) unsafeFreeze ma regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/Engine_NC_FA.hs0000644000000000000000000000562511536532137020222 0ustar0000000000000000-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String module Text.Regex.TDFA.NewDFA.Engine_NC_FA(execMatch) where import Control.Monad(unless) import Prelude hiding ((!!)) import Data.Array.MArray(MArray(newArray),unsafeFreeze) import Data.Array.ST(STArray) import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) import qualified Data.IntMap as IMap(null) import qualified Data.IntSet as ISet(null) import qualified Data.Array.MArray() import Data.STRef(newSTRef,readSTRef,writeSTRef) import qualified Control.Monad.ST.Strict as S(ST,runST) import Data.Sequence(Seq) import qualified Data.ByteString.Char8 as SBS(ByteString) import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) import Text.Regex.TDFA.Common hiding (indent) import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline) --import Debug.Trace -- trace :: String -> a -> a -- trace _ a = a {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} {-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] execMatch (Regex { regex_dfa = DFA {d_dt=dtIn} }) offsetIn _prevIn inputIn = S.runST goNext where test wt off input = test_singleline wt off '\n' input goNext = {-# SCC "goNext" #-} do winQ <- newSTRef Nothing let next dt offset input = {-# SCC "goNext.next" #-} case dt of Testing' {dt_test=wt,dt_a=a,dt_b=b} -> if test wt offset input then next a offset input else next b offset input Simple' {dt_win=w,dt_trans=t, dt_other=o} -> do unless (IMap.null w) $ writeSTRef winQ (Just offset) case uncons input of Nothing -> finalizeWinner Just (c,input') -> do case CMap.findWithDefault o c t of Transition {trans_single=DFA {d_id=did',d_dt=dt'}} | ISet.null did' -> finalizeWinner | otherwise -> let offset' = succ offset in seq offset' $ next dt' offset' input' finalizeWinner = do mWinner <- readSTRef winQ case mWinner of Nothing -> return [] Just winner -> mapM (makeGroup offsetIn) [winner] next dtIn offsetIn inputIn ---- {- CONVERT WINNERS TO MATCHARRAY -} makeGroup :: Position -> Position -> S.ST s MatchArray makeGroup start stop = do ma <- newArray (0,0) (start,stop-start) :: S.ST s (STArray s Int (MatchOffset,MatchLength)) unsafeFreeze ma regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/MakeTest.hs0000644000000000000000000000460611536532137017602 0ustar0000000000000000module Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) where import qualified Data.IntSet as ISet(IntSet,member,fromAscList) import Text.Regex.TDFA.Common(WhichTest(..),Index) import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) {-# INLINE test_singleline #-} {-# INLINE test_multiline #-} {-# INLINE test_common #-} test_singleline,test_multiline,test_common :: Uncons text => WhichTest -> Index -> Char -> text -> Bool test_multiline Test_BOL _off prev _input = prev == '\n' test_multiline Test_EOL _off _prev input = case uncons input of Nothing -> True Just (next,_) -> next == '\n' test_multiline test off prev input = test_common test off prev input test_singleline Test_BOL off _prev _input = off == 0 test_singleline Test_EOL _off _prev input = case uncons input of Nothing -> True _ -> False test_singleline test off prev input = test_common test off prev input test_common Test_BOB off _prev _input = off==0 test_common Test_EOB _off _prev input = case uncons input of Nothing -> True _ -> False test_common Test_BOW _off prev input = not (isWord prev) && case uncons input of Nothing -> False Just (c,_) -> isWord c test_common Test_EOW _off prev input = isWord prev && case uncons input of Nothing -> True Just (c,_) -> not (isWord c) test_common Test_EdgeWord _off prev input = if isWord prev then case uncons input of Nothing -> True Just (c,_) -> not (isWord c) else case uncons input of Nothing -> False Just (c,_) -> isWord c test_common Test_NotEdgeWord _off prev input = not (test_common Test_EdgeWord _off prev input) test_common Test_BOL _ _ _ = undefined test_common Test_EOL _ _ _ = undefined isWord :: Char -> Bool isWord c = ISet.member (fromEnum c) wordSet where wordSet :: ISet.IntSet wordSet = ISet.fromAscList . map fromEnum $ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/Tester.hs0000644000000000000000000000714411536532137017333 0ustar0000000000000000-- | Like Engine, but merely checks to see whether any match at all is found. -- module Text.Regex.TDFA.NewDFA.Tester(matchTest) where import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) import qualified Data.IntMap as IMap(null) import qualified Data.IntSet as ISet(null) import Data.Sequence(Seq) import qualified Data.ByteString.Char8 as SBS(ByteString) import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) import Text.Regex.Base() import Text.Regex.TDFA.Common hiding (indent) import Text.Regex.TDFA.NewDFA.Uncons (Uncons(uncons)) import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) {-# SPECIALIZE matchTest :: Regex -> ([] Char) -> Bool #-} {-# SPECIALIZE matchTest :: Regex -> (Seq Char) -> Bool #-} {-# SPECIALIZE matchTest :: Regex -> SBS.ByteString -> Bool #-} {-# SPECIALIZE matchTest :: Regex -> LBS.ByteString -> Bool #-} matchTest :: Uncons text => Regex -> text -> Bool matchTest (Regex { regex_dfa = dfaIn , regex_isFrontAnchored = ifa } ) inputIn = ans where ans = case ifa of True -> single0 (d_dt dfaIn) inputIn False -> multi0 (d_dt dfaIn) inputIn multi0 (Testing' {dt_test=wt,dt_a=a,dt_b=b}) input = if test0 wt input then multi0 a input else multi0 b input multi0 (Simple' {dt_win=w,dt_trans=t, dt_other=o}) input | IMap.null w = case uncons input of Nothing -> False Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_many=DFA {d_dt=dt'}} -> multi dt' c input' | otherwise = True multi (Testing' {dt_test=wt,dt_a=a,dt_b=b}) prev input = if test wt prev input then multi a prev input else multi b prev input multi (Simple' {dt_win=w,dt_trans=t, dt_other=o}) _prev input | IMap.null w = case uncons input of Nothing -> False Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_many=DFA {d_dt=dt'}} -> multi dt' c input' | otherwise = True single0 (Testing' {dt_test=wt,dt_a=a,dt_b=b}) input = if testFA0 wt input then single0 a input else single0 b input single0 (Simple' {dt_win=w,dt_trans=t, dt_other=o}) input | IMap.null w = case uncons input of Nothing -> False Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_single=DFA {d_id=did',d_dt=dt'}} | ISet.null did' -> False | otherwise -> single dt' c input' | otherwise = True single (Testing' {dt_test=wt,dt_a=a,dt_b=b}) prev input = if testFA wt prev input then single a prev input else single b prev input single (Simple' {dt_win=w,dt_trans=t, dt_other=o}) _prev input | IMap.null w = case uncons input of Nothing -> False Just (c,input') -> case CMap.findWithDefault o c t of Transition {trans_single=DFA {d_id=did',d_dt=dt'}} | ISet.null did' -> False | otherwise -> single dt' c input' | otherwise = True {-# INLINE testFA0 #-} testFA0 :: Uncons text => WhichTest -> text -> Bool testFA0 wt text = test_singleline wt 0 '\n' text {-# INLINE testFA #-} testFA :: Uncons text => WhichTest -> Char -> text -> Bool testFA wt prev text = test_singleline wt 1 prev text {-# INLINE test0 #-} test0 :: Uncons text => WhichTest -> text -> Bool test0 wt input = test_multiline wt 0 '\n' input {-# INLINE test #-} test :: Uncons text => WhichTest -> Char -> text -> Bool test wt prev input = test_multiline wt 1 prev input regex-tdfa-1.1.8/Text/Regex/TDFA/NewDFA/Uncons.hs0000644000000000000000000000136211536532137017326 0ustar0000000000000000module Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) where import qualified Data.ByteString.Char8 as SBS(ByteString,uncons) import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString,uncons) import Data.Sequence(Seq,viewl,ViewL(EmptyL,(:<))) class Uncons a where {- INLINE uncons #-} uncons :: a -> Maybe (Char,a) instance Uncons ([] Char) where {- INLINE uncons #-} uncons [] = Nothing uncons (x:xs) = Just (x,xs) instance Uncons (Seq Char) where {- INLINE uncons #-} uncons s = case viewl s of EmptyL -> Nothing x :< xs -> Just (x,xs) instance Uncons SBS.ByteString where {- INLINE uncons #-} uncons = SBS.uncons instance Uncons LBS.ByteString where {- INLINE uncons #-} uncons = LBS.uncons