hxt-9.3.1.18/examples/0000755000000000000000000000000012474566610012575 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/0000755000000000000000000000000012474566610014112 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/AGentleIntroductionToHXT/0000755000000000000000000000000012474566610020722 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/AGentleIntroductionToHXT/PicklerExample/0000755000000000000000000000000012474566610023627 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/HelloWorld/0000755000000000000000000000000012474566610016165 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/absurls/0000755000000000000000000000000012474566610015565 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/dtd2hxt/0000755000000000000000000000000012474566610015473 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/hparser/0000755000000000000000000000000012474566610015556 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/performance/0000755000000000000000000000000012474566610016413 5ustar0000000000000000hxt-9.3.1.18/examples/arrows/pickle/0000755000000000000000000000000012474566610015361 5ustar0000000000000000hxt-9.3.1.18/examples/xhtml/0000755000000000000000000000000012474566610013731 5ustar0000000000000000hxt-9.3.1.18/src/0000755000000000000000000000000012474566610011546 5ustar0000000000000000hxt-9.3.1.18/src/Control/0000755000000000000000000000000012474566610013166 5ustar0000000000000000hxt-9.3.1.18/src/Control/Arrow/0000755000000000000000000000000013506134332014245 5ustar0000000000000000hxt-9.3.1.18/src/Data/0000755000000000000000000000000012474566610012417 5ustar0000000000000000hxt-9.3.1.18/src/Data/Function/0000755000000000000000000000000012474566610014204 5ustar0000000000000000hxt-9.3.1.18/src/Data/Tree/0000755000000000000000000000000012474566610013316 5ustar0000000000000000hxt-9.3.1.18/src/Data/Tree/NTree/0000755000000000000000000000000013205353551014322 5ustar0000000000000000hxt-9.3.1.18/src/Data/Tree/NTree/Zipper/0000755000000000000000000000000012474566610015604 5ustar0000000000000000hxt-9.3.1.18/src/Data/Tree/NavigatableTree/0000755000000000000000000000000012474566610016353 5ustar0000000000000000hxt-9.3.1.18/src/Text/0000755000000000000000000000000012474566610012472 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/0000755000000000000000000000000012474566610013132 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/0000755000000000000000000000000012506166053013566 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/Arrow/0000755000000000000000000000000013506133461014656 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Pickle/0000755000000000000000000000000013506133732016066 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/0000755000000000000000000000000013205353551016417 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/DOM/0000755000000000000000000000000013205353551014203 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/DTDValidation/0000755000000000000000000000000012474566610016223 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/IO/0000755000000000000000000000000012474566610014104 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/Parser/0000755000000000000000000000000013205353551015020 5ustar0000000000000000hxt-9.3.1.18/src/Text/XML/HXT/XMLSchema/0000755000000000000000000000000012474566610015356 5ustar0000000000000000hxt-9.3.1.18/src/Control/Arrow/ArrowExc.hs0000644000000000000000000000166712474566610016360 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowExc Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: not portable The exception arrow class -} -- ------------------------------------------------------------ module Control.Arrow.ArrowExc ( ArrowExc(..) ) where import Control.Arrow import Control.Arrow.ArrowIO import Control.Exception ( SomeException ) class (Arrow a, ArrowChoice a, ArrowZero a, ArrowIO a) => ArrowExc a where tryA :: a b c -> a b (Either SomeException c) catchA :: a b c -> a SomeException c -> a b c catchA f h = tryA f >>> ( h ||| returnA ) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowIO.hs0000644000000000000000000000420212474566610016134 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowIO Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Lifting of IO actions to arrows -} -- ------------------------------------------------------------ module Control.Arrow.ArrowIO ( ArrowIO(..) , ArrowIOIf(..) ) where import Control.Arrow -- | the interface for converting an IO action into an arrow class Arrow a => ArrowIO a where -- | construct an arrow from an IO action arrIO :: (b -> IO c) -> a b c -- | construct an arrow from an IO action without any parameter arrIO0 :: IO c -> a b c arrIO0 f = arrIO (const f) {-# INLINE arrIO0 #-} -- | construction of a 2 argument arrow from a binary IO action -- | -- | example: @ a1 &&& a2 >>> arr2 f @ arrIO2 :: (b1 -> b2 -> IO c) -> a (b1, b2) c arrIO2 f = arrIO (\ ~(x1, x2) -> f x1 x2) {-# INLINE arrIO2 #-} -- | construction of a 3 argument arrow from a 3-ary IO action -- | -- | example: @ a1 &&& a2 &&& a3 >>> arr3 f @ arrIO3 :: (b1 -> b2 -> b3 -> IO c) -> a (b1, (b2, b3)) c arrIO3 f = arrIO (\ ~(x1, ~(x2, x3)) -> f x1 x2 x3) {-# INLINE arrIO3 #-} -- | construction of a 4 argument arrow from a 4-ary IO action -- | -- | example: @ a1 &&& a2 &&& a3 &&& a4 >>> arr4 f @ arrIO4 :: (b1 -> b2 -> b3 -> b4 -> IO c) -> a (b1, (b2, (b3, b4))) c arrIO4 f = arrIO (\ ~(x1, ~(x2, ~(x3, x4))) -> f x1 x2 x3 x4) {-# INLINE arrIO4 #-} -- | the interface for converting an IO predicate into a list arrow class (Arrow a, ArrowIO a) => ArrowIOIf a where -- | builds an arrow from an IO predicate -- -- if the predicate holds, the single list containing the input is returned, else the empty list, -- similar to 'Control.Arrow.ArrowList.isA' isIOA :: (b -> IO Bool) -> a b b -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowIf.hs0000644000000000000000000001253612474566610016174 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowIf Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Conditionals for List Arrows This module defines conditional combinators for list arrows. The empty list as result represents False, none empty lists True. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowIf ( module Control.Arrow.ArrowIf ) where import Control.Arrow import Control.Arrow.ArrowList import Data.List ( partition ) -- ------------------------------------------------------------ -- | The interface for arrows as conditionals. -- -- Requires list arrows because False is represented as empty list, True as none empty lists. -- -- Only 'ifA' and 'orElse' don't have default implementations class ArrowList a => ArrowIf a where -- | if lifted to arrows ifA :: a b c -> a b d -> a b d -> a b d -- | shortcut: @ ifP p = ifA (isA p) @ ifP :: (b -> Bool) -> a b d -> a b d -> a b d ifP p = ifA (isA p) {-# INLINE ifP #-} -- | negation: @ neg f = ifA f none this @ neg :: a b c -> a b b neg f = ifA f none this {-# INLINE neg #-} -- | @ f \`when\` g @ : when the predicate g holds, f is applied, else the identity filter this when :: a b b -> a b c -> a b b f `when` g = ifA g f this {-# INLINE when #-} -- | shortcut: @ f \`whenP\` p = f \`when\` (isA p) @ whenP :: a b b -> (b -> Bool) -> a b b f `whenP` g = ifP g f this {-# INLINE whenP #-} -- | @ f \`whenNot\` g @ : when the predicate g does not hold, f is applied, else the identity filter this whenNot :: a b b -> a b c -> a b b f `whenNot` g = ifA g this f {-# INLINE whenNot #-} -- | like 'whenP' whenNotP :: a b b -> (b -> Bool) -> a b b f `whenNotP` g = ifP g this f {-# INLINE whenNotP #-} -- | @ g \`guards\` f @ : when the predicate g holds, f is applied, else none guards :: a b c -> a b d -> a b d f `guards` g = ifA f g none {-# INLINE guards #-} -- | like 'whenP' guardsP :: (b -> Bool) -> a b d -> a b d f `guardsP` g = ifP f g none {-# INLINE guardsP #-} -- | shortcut for @ f `guards` this @ filterA :: a b c -> a b b filterA f = ifA f this none {-# INLINE filterA #-} -- | @ f \`containing\` g @ : keep only those results from f for which g holds -- -- definition: @ f \`containing\` g = f >>> g \`guards\` this @ containing :: a b c -> a c d -> a b c f `containing` g = f >>> g `guards` this {-# INLINE containing #-} -- | @ f \`notContaining\` g @ : keep only those results from f for which g does not hold -- -- definition: @ f \`notContaining\` g = f >>> ifA g none this @ notContaining :: a b c -> a c d -> a b c f `notContaining` g = f >>> ifA g none this {-# INLINE notContaining #-} -- | @ f \`orElse\` g @ : directional choice: if f succeeds, the result of f is the result, else g is applied orElse :: a b c -> a b c -> a b c -- | generalisation of 'orElse' for multi way branches like in case expressions. -- -- An auxiliary data type 'IfThen' with an infix constructor ':->' is used for writing multi way branches -- -- example: @ choiceA [ p1 :-> e1, p2 :-> e2, this :-> default ] @ choiceA :: [IfThen (a b c) (a b d)] -> a b d choiceA = foldr ifA' none where ifA' (g :-> f) = ifA g f -- | tag a value with Left or Right, if arrow has success, input is tagged with Left, else with Right tagA :: a b c -> a b (Either b b) tagA p = ifA p (arr Left) (arr Right) -- | split a list value with an arrow and returns a pair of lists. -- This is the arrow version of 'span'. The arrow is deterministic. -- -- example: @ runLA (spanA (isA (\/= \'-\'))) \"abc-def\" @ gives @ [(\"abc\",\"-def\")] @ as result spanA :: a b b -> a [b] ([b],[b]) spanA p = ifA ( arrL (take 1) >>> p ) ( arr head &&& (arr tail >>> spanA p) >>> arr (\ ~(x, ~(xs,ys)) -> (x : xs, ys)) ) ( arr (\ l -> ([],l)) ) -- | partition a list of values into a pair of lists -- -- This is the arrow Version of 'Data.List.partition' partitionA :: a b b -> a [b] ([b],[b]) partitionA p = listA ( arrL id >>> tagA p ) >>^ ( (\ ~(l1, l2) -> (unTag l1, unTag l2) ) . partition (isLeft) ) where isLeft (Left _) = True isLeft _ = False unTag = map (either id id) -- ------------------------------------------------------------ -- | an auxiliary data type for 'choiceA' data IfThen a b = a :-> b -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowList.hs0000644000000000000000000002777413506134332016550 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowList Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable The list arrow class This module defines the interface for list arrows. A list arrow is a function that gives a list of results for a given argument. A single element result represents a normal function. An empty list often indicates that the function is undefined for the given argument. The empty list may also represent False, non-empty lists True. A list with more than one element gives all results for a so called nondeterministic function. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowList ( ArrowList(..) ) where import Control.Arrow infixl 8 >>., >. infixl 2 $<, $<<, $<<<, $<<<< infixl 2 $<$ -- ------------------------------------------------------------ -- | The interface for list arrows -- -- Only 'mkA', 'isA' '(>>.)' don't have default implementations class (Arrow a, ArrowPlus a, ArrowZero a, ArrowApply a) => ArrowList a where -- | construction of a 2 argument arrow from a binary function -- | -- | example: @ a1 &&& a2 >>> arr2 f @ arr2 :: (b1 -> b2 -> c) -> a (b1, b2) c arr2 = arr . uncurry {-# INLINE arr2 #-} -- | construction of a 3 argument arrow from a 3-ary function -- | -- | example: @ a1 &&& a2 &&& a3 >>> arr3 f @ arr3 :: (b1 -> b2 -> b3 -> c) -> a (b1, (b2, b3)) c arr3 f = arr (\ ~(x1, ~(x2, x3)) -> f x1 x2 x3) {-# INLINE arr3 #-} -- | construction of a 4 argument arrow from a 4-ary function -- | -- | example: @ a1 &&& a2 &&& a3 &&& a4 >>> arr4 f @ arr4 :: (b1 -> b2 -> b3 -> b4 -> c) -> a (b1, (b2, (b3, b4))) c arr4 f = arr (\ ~(x1, ~(x2, ~(x3, x4))) -> f x1 x2 x3 x4) {-# INLINE arr4 #-} -- | construction of a 2 argument arrow from a singe argument arrow arr2A :: (b -> a c d) -> a (b, c) d arr2A f = first (arr f) >>> app {-# INLINE arr2A #-} -- | constructor for a list arrow from a function with a list as result arrL :: (b -> [c]) -> a b c -- | constructor for a list arrow with 2 arguments arr2L :: (b -> c -> [d]) -> a (b, c) d arr2L = arrL . uncurry {-# INLINE arr2L #-} -- | constructor for a const arrow: @ constA = arr . const @ constA :: c -> a b c constA = arr . const {-# INLINE constA #-} -- | constructor for a const arrow: @ constL = arrL . const @ constL :: [c] -> a b c constL = arrL . const {-# INLINE constL #-} -- | builds an arrow from a predicate. -- If the predicate holds, the single list containing the input is returned, else the empty list isA :: (b -> Bool) -> a b b -- | combinator for converting the result of a list arrow into another list -- -- example: @ foo >>. reverse @ reverses the the result of foo -- -- example: @ foo >>. take 1 @ constructs a deterministic version of foo by deleting all further results (>>.) :: a b c -> ([c] -> [d]) -> a b d -- | combinator for converting the result of an arrow into a single element result (>.) :: a b c -> ([c] -> d ) -> a b d af >. f = af >>. ((:[]) . f) {-# INLINE (>.) #-} -- | combinator for converting an arrow into a determinstic version with all results collected in a single element list -- -- @ listA af = af >>. (:[]) @ -- -- this is useful when the list of results computed by an arrow must be manipulated (e.g. sorted) -- -- example for sorting the results of a filter -- -- > collectAndSort :: a b c -> a b c -- > -- > collectAndSort collect = listA collect >>> arrL sort listA :: a b c -> a b [c] listA af = af >>. (:[]) {-# INLINE listA #-} -- | the inverse of 'listA' -- -- @ listA af >>> unlistA = af @ -- -- unlistA is defined as @ arrL id @ unlistA :: a [b] b unlistA = arrL id {-# INLINE unlistA #-} -- | the identity arrow, alias for returnA this :: a b b this = returnA {-# INLINE this #-} -- | the zero arrow, alias for zeroArrow none :: a b c none = zeroArrow {-# INLINE none #-} -- | converts an arrow, that may fail, into an arrow that always succeeds -- -- example: @ withDefault none \"abc\" @ is equivalent to @ constA \"abc\" @ withDefault :: a b c -> c -> a b c withDefault a d = a >>. \ x -> if null x then [d] else x {-# INLINE withDefault #-} -- | makes a list arrow deterministic, the number of results is at most 1 -- -- definition -- -- > single f = f >>. take 1 -- -- examples with strings: -- -- > runLA ( single none ) "x" == [] -- > runLA ( single this ) "x" == ["x"] -- > runLA ( single -- > (constA "y" -- > <+> this ) ) "x" == ["y"] single :: a b c -> a b c single f = f >>. take 1 -- | compute an arrow from the input and apply the arrow to this input -- -- definition: @ (f &&& this) >>> app @ -- -- in a point free style, there is no way to use an argument in 2 places, -- this is a combinator for simulating this. first the argument is used to compute an arrow, -- then this new arrow is applied to the input -- -- applyA coresponds to: @ apply f x = let g = f x in g x @ -- -- see also: '$<', '$<<', '$<<<', '$<<<<', '$<$' applyA :: a b (a b c) -> a b c applyA f = (f &&& this) >>> app -- | compute the parameter for an arrow with extra parameters from the input -- and apply the arrow for all parameter values to the input -- -- a kind of \"function call\" for arrows, useful for joining arrows -- -- > infixl 2 ($<) -- -- definition: -- -- > g $< f = applyA (f >>> arr g) -- -- if @f@ fails, the whole arrow fails, e.g. @ g \$\< none == none @ -- -- if @f@ computes n values and @g@ is deterministic, the whole arrow computes n values -- -- examples with simple list arrows with strings -- -- > prefixString :: String -> a String String -- > prefixString s = arr (s++) -- > -- > runLA ( prefixString $< none ) "x" == [] -- > runLA ( prefixString $< constA "y" ) "x" == ["yx"] -- > runLA ( prefixString $< this ) "x" == ["xx"] -- > runLA ( prefixString $< constA "y" -- > <+> constA "z" ) "x" == ["yx","zx"] -- > runLA ( prefixString $< constA "y" -- > <+> this -- > <+> constA "z" ) "x" == ["yx","xx","zx"] -- -- see also: 'applyA', '$<<', '$<<<', '$<<<<', '$<$' ($<) :: (c -> a b d) -> a b c -> a b d g $< f = applyA (f >>> arr g) -- | binary version of '$<' -- -- example with simple list arrows with strings -- -- > infixString :: String -> String -> a String String -- > infixString s1 s2 -- > = arr (\ s -> s1 ++ s ++ s2) -- > -- > runLA ( infixString $<< constA "y" &&& constA "z" ) "x" = ["yxz"] -- > runLA ( infixString $<< this &&& this ) "x" = ["xxx"] -- > runLA ( infixString $<< constA "y" -- > &&& (constA "z" <+> this) ) "x" = ["yxz", "yxx"] ($<<) :: (c1 -> c2 -> a b d) -> a b (c1, c2) -> a b d f $<< g = applyA (g >>> arr2 f) -- | version of '$<' for arrows with 3 extra parameters -- -- typical usage -- -- > f $<<< g1 &&& g2 &&& g3 ($<<<) :: (c1 -> c2 -> c3 -> a b d) -> a b (c1, (c2, c3)) -> a b d f $<<< g = applyA (g >>> arr3 f) -- | version of '$<' for arrows with 4 extra parameters -- -- typical usage -- -- > f $<<<< g1 &&& g2 &&& g3 &&& g4 ($<<<<) :: (c1 -> c2 -> c3 -> c4 -> a b d) -> a b (c1, (c2, (c3, c4))) -> a b d f $<<<< g = applyA (g >>> arr4 f) -- | compute the parameter for an arrow @f@ with an extra parameter by an arrow @g@ -- and apply all the results from @g@ sequentially to the input -- -- > infixl 2 ($<$) -- -- typical usage: -- -- > g :: a b c -- > g = ... -- > -- > f :: c -> a b b -- > f x = ... x ... -- > -- > f $<$ g -- -- @f@ computes the extra parameters for @g@ from the input of type @b@ and @g@ is applied with this -- parameter to the input. This allows programming in a point wise style in @g@, which becomes -- neccessary, when a value is needed more than once. -- -- this combinator is useful, when transforming a single value (document) step by step, -- with @g@ for collecting the data for all steps, and @f@ for transforming the input step by step -- -- if @g@ is deterministic (computes exactly one result), -- @ g $\<$ f == g $\< f @ holds -- -- if @g@ fails, @ f $<$ g == this @ -- -- if @g@ computes more than one result, @f@ is applied sequentially to the input for every result from @g@ -- -- examples with simple list arrows with strings -- -- > prefixString :: String -> a String String -- > prefixString s = arr (s++) -- > -- > runLA ( prefixString $<$ none ) "x" == ["x"] -- > runLA ( prefixString $<$ constA "y" ) "x" == ["yx"] -- > runLA ( prefixString $<$ constA "y" <+> constA "z" ) "x" == ["zyx"] -- > runLA ( prefixString $<$ constA "y" <+> this -- > <+> constA "z" ) "x" == ["zxyx"] -- -- example with two extra parameter -- -- > g1 :: a b c1 -- > g2 :: a b c2 -- > -- > f :: (c1, c2) -> a b b -- > f (x1, x2) = ... x1 ... x2 ... -- > -- > f $<$ g1 &&& g2 -- -- see also: 'applyA', '$<' ($<$) :: (c -> (a b b)) -> a b c -> a b b g $<$ f = applyA (listA (f >>> arr g) >>> arr seqA) -- | merge the result pairs of an arrow with type @a a1 (b1, b2)@ -- by combining the tuple components with the @op@ arrow -- -- examples with simple list arrows working on strings and XmlTrees -- -- > a1 :: a String (XmlTree, XmlTree) -- > a1 = selem "foo" [this >>> mkText] -- > &&& -- > selem "bar" [arr (++"0") >>> mkText] -- > -- > runLA (a1 >>> mergeA (<+>) >>> xshow this) "42" == ["42","420"] -- > runLA (a1 >>> mergeA (+=) >>> xshow this) "42" == ["42420"] -- -- see also: 'applyA', '$<' and '+=' in class 'Text.XML.HXT.Arrow.ArrowXml' mergeA :: (a (a1, b1) a1 -> a (a1, b1) b1 -> a (a1, b1) c) -> a (a1, b1) c mergeA op = (\ x -> arr fst `op` constA (snd x)) $< this -- | useful only for arrows with side effects: perform applies an arrow to the input -- ignores the result and returns the input -- -- example: @ ... >>> perform someTraceArrow >>> ... @ perform :: a b c -> a b b perform f = listA f &&& this >>> arr snd {-# INLINE perform #-} -- | generalization of arrow combinator '<+>' -- -- definition: @ catA = foldl (\<+\>) none @ catA :: [a b c] -> a b c catA = foldl (<+>) none {-# INLINE catA #-} -- | generalization of arrow combinator '>>>' -- -- definition: @ seqA = foldl (>>>) this @ seqA :: [a b b] -> a b b seqA = foldl (>>>) this {-# INLINE seqA #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowNF.hs0000644000000000000000000000362313506133742016127 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowNF Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: non-portable Arrows for evaluation of normal form results -} -- ------------------------------------------------------------ module Control.Arrow.ArrowNF where import Control.Arrow import Control.Arrow.ArrowList import Control.DeepSeq import Control.FlatSeq -- | -- complete evaluation of an arrow result using 'Control.DeepSeq' -- -- this is sometimes useful for preventing space leaks, especially after reading -- and validation of a document, all DTD stuff is not longer in use and can be -- recycled by the GC. strictA :: (Arrow a, NFData b) => a b b strictA = arr $ \ x -> deepseq x x class (Arrow a) => ArrowNF a where rnfA :: (NFData c) => a b c -> a b c rnfA f = f >>^ (\ x -> deepseq x x) {-# INLINE rnfA #-} -- | -- partial evaluation of an arrow result using 'Control.FlatSeq' -- -- There are two arrows with force the partial evaluation. By convention -- the 2. should be less lazy than the 1. -- -- These arrows are sometimes useful for preventing space leaks, especially when parsing -- complex data structures. In many cases the evaluated AST is more space efficient -- than the unevaluaded with a lot of closures. class (Arrow a, ArrowList a) => ArrowWNF a where rwnfA :: (WNFData c) => a b c -> a b c rwnfA f = f >>. \ x -> rlnf rwnf x `seq` x {-# INLINE rwnfA #-} rwnf2A :: (WNFData c) => a b c -> a b c rwnf2A f = f >>. \ x -> rlnf rwnf2 x `seq` x {-# INLINE rwnf2A #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowNavigatableTree.hs0000644000000000000000000002640212474566610020670 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowNavigatableTree Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable List arrows for navigatable trees Trees that implement the "Data.Tree.NavigatableTree.Class" interface, can be processed with these arrows. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowNavigatableTree where import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Data.Maybe import Data.Tree.NavigatableTree.Class ( NavigatableTree , NavigatableTreeToTree , NavigatableTreeModify ) import qualified Data.Tree.NavigatableTree.Class as T import qualified Data.Tree.NavigatableTree.XPathAxis as T -- ------------------------------------------------------------ -- | The interface for navigatable tree arrows -- -- all functions have default implementations class (ArrowList a) => ArrowNavigatableTree a where -- move one step towards the root moveUp :: NavigatableTree t => a (t b) (t b) moveUp = arrL $ maybeToList . T.mvUp -- descend one step to the leftmost child moveDown :: NavigatableTree t => a (t b) (t b) moveDown = arrL $ maybeToList . T.mvDown -- move to the left neighbour moveLeft :: NavigatableTree t => a (t b) (t b) moveLeft = arrL $ maybeToList . T.mvLeft -- move to the right neighbour moveRight :: NavigatableTree t => a (t b) (t b) moveRight = arrL $ maybeToList . T.mvRight -- derived functions parentAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) parentAxis = arrL T.parentAxis -- | XPath axis: ancestor ancestorAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) ancestorAxis = arrL T.ancestorAxis -- | XPath axis: ancestor or self ancestorOrSelfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) ancestorOrSelfAxis = arrL T.ancestorOrSelfAxis -- | XPath axis: child childAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) childAxis = arrL T.childAxis -- | XPath axis: descendant descendantAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) descendantAxis = arrL T.descendantAxis -- | XPath axis: descendant or self descendantOrSelfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) descendantOrSelfAxis = arrL T.descendantOrSelfAxis -- | not an XPath axis but useful: descendant or following descendantOrFollowingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) descendantOrFollowingAxis = descendantAxis <+> followingAxis -- | not an official XPath axis but useful: reverse descendant or self, used in preceding axis revDescendantOrSelfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) revDescendantOrSelfAxis = arrL T.revDescendantOrSelfAxis -- | XPath axis: following sibling followingSiblingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) followingSiblingAxis = arrL T.followingSiblingAxis -- | XPath axis: preceeding sibling precedingSiblingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) precedingSiblingAxis = arrL T.precedingSiblingAxis -- | XPath axis: self selfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) selfAxis = arrL T.selfAxis -- | XPath axis: following followingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) followingAxis = arrL T.followingAxis -- | XPath axis: preceding precedingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) precedingAxis = arrL T.precedingAxis -- ------------------------------------------------------------ -- | move to the root moveToRoot :: (Arrow a, NavigatableTree t) => a (t b) (t b) moveToRoot = arr T.mvToRoot isAtRoot :: (ArrowList a, NavigatableTree t) => a (t b) (t b) isAtRoot = isA (null . T.ancestorAxis) -- ------------------------------------------------------------ -- | Conversion from a tree into a navigatable tree addNav :: ( ArrowList a , NavigatableTreeToTree nt t ) => a (t b) (nt b) addNav = arr T.fromTree -- | Conversion from a navigatable tree into an ordinary tree remNav :: ( ArrowList a , NavigatableTreeToTree nt t ) => a (nt b) (t b) remNav = arr T.toTree -- | apply an operation using navigation to an ordinary tree -- -- This root and all children may be visited in arbitrary order withNav :: ( ArrowList a , NavigatableTreeToTree nt t ) => a (nt b) (nt c) -> a (t b) (t c) withNav f = addNav >>> f >>> remNav -- | apply a simple operation without use of navigation to a navigatable tree -- -- This enables to apply arbitrary tree operations to navigatable trees withoutNav :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) withoutNav f = ( (remNav >>> f) -- apply the simple arrow to the tree &&& this -- remember the navigation context ) >>> arr (uncurry T.substThisTree) -- resore the context -- ------------------------------------------------------------ -- | Filter an axis with an ordinary tree predicate -- -- Example: In a tree of Ints find all nodes in the subtrees (in preorder) that have label 42 -- -- > descendantAxis >>> filterAxis (hasNode (== 42)) -- -- Example: In an XML Tree find the following nodes of a node with attribute id and value 42 -- -- > descendantAxis >>> filterAxis (hasAttrValue "id" (=="42")) >>> followingAxis filterAxis :: ( ArrowIf a , NavigatableTreeToTree nt t ) => a (t b) c -> a (nt b) (nt b) filterAxis p = (remNav >>> p) `guards` this {-# INLINE filterAxis #-} -- | Move to the next tree on a given axis. Deterministic arrow -- -- Example: Move to the next node in a preorder visit: next child or else next following -- -- > moveOn descendantOrFollowingAxis moveOn :: ( ArrowList a , NavigatableTree t ) => a (t b) (t b) -> a (t b) (t b) moveOn axis = single $ axis {-# INLINE moveOn #-} -- ------------------------------------------------------------ -- | Change the current subtree of a navigatable tree. -- -- The arrow for computing the changes should be deterministic. If it fails -- nothing is changed. changeThisTree :: ( ArrowList a , ArrowIf a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) changeThisTree cf = withoutNav $ single cf `orElse` this -- | Substitute the current subtree of a navigatable tree by a given tree substThisTree :: ( ArrowList a , ArrowIf a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => t b -> a (nt b) (nt b) substThisTree t = changeThisTree (constA t) -- ------------------------------------------------------------ -- | apply an ordinary arrow to the current subtree of a navigatabe tree and add the result trees in front of the current tree. -- -- If this arrow is applied to the root, it will fail, because we want a tree as result, not a forest. addToTheLeft :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) addToTheLeft = addToOneSide $ foldl (\ acc t -> acc >>= T.addTreeLeft t) {-# INLINE addToTheLeft #-} -- | apply an ordinary arrow to the current subtree of a navigatabe tree and add the result trees behind the current tree. -- -- If this arrow is applied to the root, it will fail, because we want a tree as result, not a forest. addToTheRight :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) addToTheRight = addToOneSide $ foldr (\ t acc -> acc >>= T.addTreeRight t) {-# INLINE addToTheRight #-} -- | addToOneSide does the real work for 'addToTheLeft' and 'addToTheRight' addToOneSide :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => ( Maybe (nt b) -> [t b] -> Maybe (nt b) ) -> a (t b) (t b) -> a (nt b) (nt b) addToOneSide side f = ( ( remNav >>> listA f ) &&& this ) >>> arrL ( uncurry (\ ts nt -> side (Just nt) ts) >>> maybeToList ) -- ------------------------------------------------------------ -- | drop the direct left sibling tree of the given navigatable tree -- -- If this arrow is applied to the root or a leftmost tree, it will fail, because there is nothing to remove dropFromTheLeft :: ( ArrowList a -- , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (nt b) (nt b) dropFromTheLeft = arrL $ T.dropTreeLeft >>> maybeToList {-# INLINE dropFromTheLeft #-} -- | drop the direct left sibling tree of the given navigatable tree -- -- If this arrow is applied to the root or a rightmost tree, it will fail, because there is nothing to remove dropFromTheRight :: ( ArrowList a , NavigatableTreeModify nt t ) => a (nt b) (nt b) dropFromTheRight = arrL $ T.dropTreeRight >>> maybeToList {-# INLINE dropFromTheRight #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowState.hs0000644000000000000000000000456612474566610016722 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies #-} -- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowState Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: multi parameter classes and functional depenedencies required Arrows for managing an explicit state State arrows work similar to state monads. A state value is threaded through the application of arrows. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowState ( ArrowState(..) ) where import Control.Arrow -- | The interface for accessing and changing the state component. -- -- Multi parameter classes and functional dependencies are required. class Arrow a => ArrowState s a | a -> s where -- | change the state of a state arrow by applying a function -- for computing a new state from the old and the arrow input. -- Result is the arrow input changeState :: (s -> b -> s) -> a b b -- | access the state with a function using the arrow input -- as data for selecting state components. accessState :: (s -> b -> c) -> a b c -- | read the complete state, ignore arrow input -- -- definition: @ getState = accessState (\\ s x -> s) @ getState :: a b s getState = accessState (\ s _x -> s) {-# INLINE getState #-} -- | overwrite the old state -- -- definition: @ setState = changeState (\\ s x -> x) @ setState :: a s s setState = changeState (\ _s x -> x) -- changeState (const id) {-# INLINE setState #-} -- | change state (and ignore input) and return new state -- -- convenience function, -- usefull for generating e.g. unique identifiers: -- -- example with SLA state list arrows -- -- > newId :: SLA Int b String -- > newId = nextState (+1) -- > >>> -- > arr (('#':) . show) -- > -- > runSLA 0 (newId <+> newId <+> newId) undefined -- > = ["#1", "#2", "#3"] nextState :: (s -> s) -> a b s nextState sf = changeState (\s -> const (sf s)) >>> getState -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ArrowTree.hs0000644000000000000000000003336013205353551016522 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowTree Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : stable Portability: portable List arrows for tree processing. Trees that implement the "Data.Tree.Class" interface, can be processed with these arrows. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowTree ( ArrowTree(..) , Tree ) where import Data.Tree.Class (Tree) import qualified Data.Tree.Class as T hiding (Tree) import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.ArrowIf infixl 5 />, //>, ArrowTree a where -- | construct a leaf mkLeaf :: Tree t => b -> a c (t b) mkLeaf = constA . T.mkLeaf {-# INLINE mkLeaf #-} -- | construct an inner node mkTree :: Tree t => b -> [t b] -> a c (t b) mkTree n = constA . T.mkTree n {-# INLINE mkTree #-} -- | select the children of the root of a tree getChildren :: Tree t => a (t b) (t b) getChildren = arrL T.getChildren {-# INLINE getChildren #-} -- | select the node info of the root of a tree getNode :: Tree t => a (t b) b getNode = arr T.getNode {-# INLINE getNode #-} -- | select the attribute of the root of a tree hasNode :: Tree t => (b -> Bool) -> a (t b) (t b) hasNode p = (getNode >>> isA p) `guards` this {-# INLINE hasNode #-} -- | substitute the children of the root of a tree setChildren :: Tree t => [t b] -> a (t b) (t b) setChildren cs = arr (T.setChildren cs) {-# INLINE setChildren #-} -- | substitute the attribute of the root of a tree setNode :: Tree t => b -> a (t b) (t b) setNode n = arr (T.setNode n) {-# INLINE setNode #-} -- | edit the children of the root of a tree changeChildren :: Tree t => ([t b] -> [t b]) -> a (t b) (t b) changeChildren csf = arr (T.changeChildren csf) {-# INLINE changeChildren #-} -- | edit the attribute of the root of a tree changeNode :: Tree t => (b -> b) -> a (t b) (t b) changeNode nf = arr (T.changeNode nf) {-# INLINE changeNode #-} -- compound arrows -- | apply an arrow element wise to all children of the root of a tree -- collect these results and substitute the children with this result -- -- example: @ processChildren isText @ deletes all subtrees, for which isText does not hold -- -- example: @ processChildren (none \`when\` isCmt) @ removes all children, for which isCmt holds processChildren :: Tree t => a (t b) (t b) -> a (t b) (t b) processChildren f = arr T.getNode &&& listA (arrL T.getChildren >>> f) -- new children, deterministic filter: single element result >>> arr2 T.mkTree -- | similar to processChildren, but the new children are computed by processing -- the whole input tree -- -- example: @ replaceChildren (deep isText) @ selects all subtrees for which isText holds -- and substitutes the children component of the root node with this list replaceChildren :: Tree t => a (t b) (t b) -> a (t b) (t b) replaceChildren f = arr T.getNode &&& listA f -- compute new children >>> arr2 T.mkTree -- | -- pronounced \"slash\", meaning g inside f -- -- defined as @ f \/> g = f >>> getChildren >>> g @ -- -- example: @ hasName \"html\" \/> hasName \"body\" \/> hasName \"h1\" @ -- -- This expression selects -- all \"h1\" elements in the \"body\" element of an \"html\" element, an expression, that -- corresponds 1-1 to the XPath selection path \"html\/body\/h1\" (/>) :: Tree t => a b (t c) -> a (t c) d -> a b d f /> g = f >>> getChildren >>> g {-# INLINE (/>) #-} -- | -- pronounced \"double slash\", meaning g arbitrarily deep inside f -- -- defined as @ f \/\/> g = f >>> getChildren >>> deep g @ -- -- example: @ hasName \"html\" \/\/> hasName \"table\" @ -- -- This expression selects -- all top level \"table\" elements within an \"html\" element, an expression. -- Attention: This does not correspond -- to the XPath selection path \"html\/\/table\". The latter on matches all table elements -- even nested ones, but @\/\/>@ gives in many cases the appropriate functionality. (//>) :: Tree t => a b (t c) -> a (t c) d -> a b d f //> g = f >>> getChildren >>> deep g {-# INLINE (//>) #-} -- | -- pronounced \"outside\" meaning f containing g -- -- defined as @ f \<\/ g = f \`containing\` (getChildren >>> g) @ ( a (t b) (t b) -> a (t b) (t b) -> a (t b) (t b) f >> g) {-# INLINE ( a (t b) c -> a (t b) c deep f = f -- success when applying f `orElse` (getChildren >>> deep f) -- seach children -- | recursively searches a whole tree for subrees, for which a predicate holds. -- The search is performed bottom up. -- -- example: @ deepest isHtmlTable @ selects all innermost table elements in a document -- but no table elements containing tables. See 'deep' and 'multi' for other search strategies. deepest :: Tree t => a (t b) c -> a (t b) c deepest f = (getChildren >>> deepest f) -- seach children `orElse` f -- no success: apply f to root -- | recursively searches a whole tree for subtrees, for which a predicate holds. -- The search is performed top down. All nodes of the tree are searched, even within the -- subtrees of trees for which the predicate holds. -- -- example: @ multi isHtmlTable @ selects all table elements, even nested ones. multi :: Tree t => a (t b) c -> a (t b) c multi f = f -- combine result for root <+> (getChildren >>> multi f) -- with result for all descendants -- | recursively transforms a whole tree by applying an arrow to all subtrees, -- this is done bottom up depth first, leaves first, root as last tree -- -- example: @ processBottomUp (getChildren \`when\` isHtmlFont) @ removes all font tags in a HTML document, even nested ones -- (with an appropriate definition of isHtmlFont) processBottomUp :: Tree t => a (t b) (t b) -> a (t b) (t b) processBottomUp f = processChildren (processBottomUp f) -- process all descendants first >>> f -- then process root -- | similar to 'processBottomUp', but recursively transforms a whole tree by applying an arrow to all subtrees -- with a top down depth first traversal strategie. In many cases 'processBottomUp' and 'processTopDown' -- give same results. processTopDown :: Tree t => a (t b) (t b) -> a (t b) (t b) processTopDown f = f -- first process root >>> processChildren (processTopDown f) -- then process all descendants of new root -- | recursively transforms a whole tree by applying an arrow to all subtrees, -- but transformation stops when a predicte does not hold for a subtree, -- leaves are transformed first processBottomUpWhenNot :: Tree t => a (t b) (t b) -> a (t b) (t b) -> a (t b) (t b) processBottomUpWhenNot f p = ( processChildren (processBottomUpWhenNot f p) >>> f ) `whenNot` p -- | recursively transforms a whole tree by applying an arrow to all subtrees, -- but transformation stops when a tree is successfully transformed. -- the transformation is done top down -- -- example: @ processTopDownUntil (isHtmlTable \`guards\` tranformTable) @ -- transforms all top level table elements into something else, but inner tables remain unchanged processTopDownUntil :: Tree t => a (t b) (t b) -> a (t b) (t b) processTopDownUntil f = f `orElse` processChildren (processTopDownUntil f) -- | computes a list of trees by applying an arrow to the input -- and inserts this list in front of index i in the list of children -- -- example: @ insertChildrenAt 0 (deep isCmt) @ selects all subtrees for which isCmt holds -- and copies theses in front of the existing children insertChildrenAt :: Tree t => Int -> a (t b) (t b) -> a (t b) (t b) insertChildrenAt i f = listA f &&& this >>> arr2 insertAt where insertAt newcs = T.changeChildren (\ cs -> let (cs1, cs2) = splitAt i cs in cs1 ++ newcs ++ cs2 ) -- | similar to 'insertChildrenAt', but the insertion position is searched with a predicate insertChildrenAfter :: Tree t => a (t b) (t b) -> a (t b) (t b) -> a (t b) (t b) insertChildrenAfter p f = replaceChildren ( ( ( listA getChildren >>> spanA p ) &&& listA f ) >>> arr2L (\ (xs1, xs2) xs -> xs1 ++ xs ++ xs2) ) -- | an arrow for inserting a whole subtree with some holes in it (a template) -- into a document. The holes can be filled with contents from the input. -- -- Example -- -- > insertTreeTemplateTest :: ArrowXml a => a b XmlTree -- > insertTreeTemplateTest -- > = doc -- > >>> -- > insertTreeTemplate template pattern -- > where -- > doc -- the input data -- > = constA "The TitleThe content" -- > >>> xread -- > template -- the output template with 2 holes: xxx and yyy -- > = constA "xxx

yyy

" -- > >>> xread -- > pattern -- > = [ hasText (== "xxx") -- fill the xxx hole with the input contents from element "x/y" -- > :-> ( getChildren >>> hasName "y" >>> deep isText ) -- > -- > , hasText (== "yyy") -- fill the yyy hole with the input contents from element "x/z" -- > :-> ( getChildren >>> hasName "z" >>> getChildren ) -- > ] -- -- computes the XML tree for the following document -- -- > "The Title

The content

" insertTreeTemplate :: Tree t => a (t b) (t b) -> -- the the template [IfThen (a (t b) c) (a (t b) (t b))] -> -- the list of nodes in the template to be substituted a (t b) (t b) insertTreeTemplate template choices = insertTree $< this where insertTree t = template -- swap input and template >>> processTemplate where processTemplate = choiceA choices' -- check whether node is a "hole" within the template `orElse` processChildren processTemplate -- else descent into template tree choices' = map feedTree choices -- modify choices, such that the input is feed into the action arrows feedTree (cond :-> action) = cond :-> (constA t >>> action) -- the real input becomes the input at the holes hxt-9.3.1.18/src/Control/Arrow/IOListArrow.hs0000644000000000000000000001216212474566610016774 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.IOListArrow Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of pure list arrows with IO -} -- ------------------------------------------------------------ module Control.Arrow.IOListArrow ( IOLA(..) ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.DeepSeq import Control.Exception ( SomeException , try ) -- ------------------------------------------------------------ -- | list arrow combined with IO monad newtype IOLA a b = IOLA { runIOLA :: a -> IO [b] } instance Category IOLA where id = IOLA $ return . (:[]) IOLA g . IOLA f = IOLA $ \ x -> do ys <- f x zs <- sequence . map g $ ys return (concat zs) instance Arrow IOLA where arr f = IOLA $ \ x -> return [f x] first (IOLA f) = IOLA $ \ ~(x1, x2) -> do ys1 <- f x1 return [ (y1, x2) | y1 <- ys1 ] -- just for efficiency second (IOLA g) = IOLA $ \ ~(x1, x2) -> do ys2 <- g x2 return [ (x1, y2) | y2 <- ys2 ] -- just for efficiency IOLA f *** IOLA g = IOLA $ \ ~(x1, x2) -> do ys1 <- f x1 ys2 <- g x2 return [ (y1, y2) | y1 <- ys1, y2 <- ys2 ] -- just for efficiency IOLA f &&& IOLA g = IOLA $ \ x -> do ys1 <- f x ys2 <- g x return [ (y1, y2) | y1 <- ys1, y2 <- ys2 ] instance ArrowZero IOLA where zeroArrow = IOLA $ const (return []) instance ArrowPlus IOLA where IOLA f <+> IOLA g = IOLA $ \ x -> do rs1 <- f x rs2 <- g x return (rs1 ++ rs2) instance ArrowChoice IOLA where left (IOLA f) = IOLA $ either (\ x -> f x >>= (\ y -> return (map Left y))) (return . (:[]) . Right) right (IOLA f) = IOLA $ either (return . (:[]) . Left) (\ x -> f x >>= (\ y -> return (map Right y))) instance ArrowApply IOLA where app = IOLA $ \ (IOLA f, x) -> f x instance ArrowList IOLA where arrL f = IOLA $ \ x -> return (f x) arr2A f = IOLA $ \ ~(x, y) -> runIOLA (f x) y constA c = IOLA $ const (return [c]) isA p = IOLA $ \x -> return (if p x then [x] else []) IOLA f >>. g = IOLA $ \x -> do ys <- f x return (g ys) instance ArrowIf IOLA where ifA (IOLA p) ta ea = IOLA $ \x -> do res <- p x runIOLA (if null res then ea else ta) x (IOLA f) `orElse` g = IOLA $ \x -> do res <- f x if null res then runIOLA g x else return res instance ArrowIO IOLA where arrIO cmd = IOLA $ \x -> do res <- cmd x return [res] instance ArrowExc IOLA where tryA f = IOLA $ \ x -> do res <- try' $ runIOLA f x return $ case res of Left er -> [Left er] Right ys -> [Right x' | x' <- ys] where try' :: IO a -> IO (Either SomeException a) try' = try instance ArrowIOIf IOLA where isIOA p = IOLA $ \x -> do res <- p x return (if res then [x] else []) instance ArrowTree IOLA instance ArrowNavigatableTree IOLA instance ArrowNF IOLA where rnfA (IOLA f) = IOLA $ \ x -> do res <- f x res `deepseq` return res instance ArrowWNF IOLA -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/IOStateListArrow.hs0000644000000000000000000002160112474566610017773 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} -- ------------------------------------------------------------ {- | Module : Control.Arrow.IOStateListArrow Copyright : Copyright (C) 2005-8 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of arrows with IO and a state -} -- ------------------------------------------------------------ module Control.Arrow.IOStateListArrow ( IOSLA(..) , liftSt , runSt ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.Arrow.ArrowState import Control.DeepSeq import Control.Exception ( SomeException , try ) {- import qualified Debug.Trace as T -} -- ------------------------------------------------------------ -- | list arrow combined with a state and the IO monad newtype IOSLA s a b = IOSLA { runIOSLA :: s -> a -> IO (s, [b]) } instance Category (IOSLA s) where id = IOSLA $ \ s x -> return (s, [x]) -- don't defined id = arr id, this gives loops during optimization {-# INLINE id #-} IOSLA g . IOSLA f = IOSLA $ \ s x -> do (s1, ys) <- f s x sequence' s1 ys where sequence' s' [] = return (s', []) sequence' s' (x':xs') = do (s1', ys') <- g s' x' (s2', zs') <- sequence' s1' xs' return (s2', ys' ++ zs') instance Arrow (IOSLA s) where arr f = IOSLA $ \ s x -> return (s, [f x]) {-# INLINE arr #-} first (IOSLA f) = IOSLA $ \ s (x1, x2) -> do (s', ys1) <- f s x1 return (s', [ (y1, x2) | y1 <- ys1 ]) -- just for efficiency second (IOSLA g) = IOSLA $ \ s (x1, x2) -> do (s', ys2) <- g s x2 return (s', [ (x1, y2) | y2 <- ys2 ]) -- just for efficiency IOSLA f *** IOSLA g = IOSLA $ \ s (x1, x2) -> do (s1, ys1) <- f s x1 (s2, ys2) <- g s1 x2 return (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) -- just for efficiency IOSLA f &&& IOSLA g = IOSLA $ \ s x -> do (s1, ys1) <- f s x (s2, ys2) <- g s1 x return (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) instance ArrowZero (IOSLA s) where zeroArrow = IOSLA $ \ s -> const (return (s, [])) {-# INLINE zeroArrow #-} instance ArrowPlus (IOSLA s) where IOSLA f <+> IOSLA g = IOSLA $ \ s x -> do (s1, rs1) <- f s x (s2, rs2) <- g s1 x return (s2, rs1 ++ rs2) instance ArrowChoice (IOSLA s) where left (IOSLA f) = IOSLA $ \ s -> either (\ x -> do (s1, y) <- f s x return (s1, map Left y) ) (\ x -> return (s, [Right x])) right (IOSLA f) = IOSLA $ \ s -> either (\ x -> return (s, [Left x])) (\ x -> do (s1, y) <- f s x return (s1, map Right y) ) instance ArrowApply (IOSLA s) where app = IOSLA $ \ s (IOSLA f, x) -> f s x {-# INLINE app #-} instance ArrowList (IOSLA s) where arrL f = IOSLA $ \ s x -> return (s, (f x)) {-# INLINE arrL #-} arr2A f = IOSLA $ \ s (x, y) -> runIOSLA (f x) s y {-# INLINE arr2A #-} constA c = IOSLA $ \ s -> const (return (s, [c])) {-# INLINE constA #-} isA p = IOSLA $ \ s x -> return (s, if p x then [x] else []) {-# INLINE isA #-} IOSLA f >>. g = IOSLA $ \ s x -> do (s1, ys) <- f s x return (s1, g ys) {-# INLINE (>>.) #-} -- just for efficency perform (IOSLA f) = IOSLA $ \ s x -> do (s1, _ys) <- f s x return (s1, [x]) {-# INLINE perform #-} instance ArrowIf (IOSLA s) where ifA (IOSLA p) ta ea = IOSLA $ \ s x -> do (s1, res) <- p s x runIOSLA ( if null res then ea else ta ) s1 x (IOSLA f) `orElse` g = IOSLA $ \ s x -> do r@(s1, res) <- f s x if null res then runIOSLA g s1 x else return r instance ArrowIO (IOSLA s) where arrIO cmd = IOSLA $ \ s x -> do res <- cmd x return (s, [res]) {-# INLINE arrIO #-} instance ArrowExc (IOSLA s) where tryA f = IOSLA $ \ s x -> do res <- try' $ runIOSLA f s x return $ case res of Left er -> (s, [Left er]) Right (s1, ys) -> (s1, [Right x' | x' <- ys]) where try' :: IO a -> IO (Either SomeException a) try' = try instance ArrowIOIf (IOSLA s) where isIOA p = IOSLA $ \ s x -> do res <- p x return (s, if res then [x] else []) {-# INLINE isIOA #-} instance ArrowState s (IOSLA s) where changeState cf = IOSLA $ \ s x -> let s' = cf s x in return (seq s' s', [x]) {-# INLINE changeState #-} accessState af = IOSLA $ \ s x -> return (s, [af s x]) {-# INLINE accessState #-} -- ------------------------------------------------------------ -- | -- lift the state of an IOSLA arrow to a state with an additional component. -- -- This is uesful, when running predefined IO arrows, e.g. for document input, -- in a context with a more complex state component. liftSt :: IOSLA s1 b c -> IOSLA (s1, s2) b c liftSt (IOSLA f) = IOSLA $ \ (s1, s2) x -> do (s1', ys) <- f s1 x return ((s1', s2), ys) -- | -- run an arrow with augmented state in the context of a simple state arrow. -- An initial value for the new state component is needed. -- -- This is useful, when running an arrow with an extra environment component, e.g. -- for namespace handling in XML. runSt :: s2 -> IOSLA (s1, s2) b c -> IOSLA s1 b c runSt s2 (IOSLA f) = IOSLA $ \ s1 x -> do ((s1', _s2'), ys) <- f (s1, s2) x return (s1', ys) -- ------------------------------------------------------------ instance ArrowTree (IOSLA s) instance ArrowNavigatableTree (IOSLA s) instance ArrowNF (IOSLA s) where rnfA (IOSLA f) = IOSLA $ \ s x -> do res <- f s x ( -- T.trace "start rnfA for IOSLA" $ snd res ) `deepseq` return ( -- T.trace "end rnfA for IOSLA" $ res ) instance ArrowWNF (IOSLA s) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ListArrow.hs0000644000000000000000000000750612474566610016552 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ListArrow Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of pure list arrows -} -- ------------------------------------------------------------ module Control.Arrow.ListArrow ( LA(..) , fromLA ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.DeepSeq import Data.List ( partition ) -- ------------------------------------------------------------ -- | pure list arrow data type newtype LA a b = LA { runLA :: a -> [b] } instance Category LA where id = LA $ (:[]) {-# INLINE id #-} LA g . LA f = LA $ concatMap g . f {-# INLINE (.) #-} instance Arrow LA where arr f = LA $ \ x -> [f x] {-# INLINE arr #-} first (LA f) = LA $ \ ~(x1, x2) -> [ (y1, x2) | y1 <- f x1 ] -- just for efficiency second (LA g) = LA $ \ ~(x1, x2) -> [ (x1, y2) | y2 <- g x2 ] LA f *** LA g = LA $ \ ~(x1, x2) -> [ (y1, y2) | y1 <- f x1, y2 <- g x2] LA f &&& LA g = LA $ \ x -> [ (y1, y2) | y1 <- f x , y2 <- g x ] instance ArrowZero LA where zeroArrow = LA $ const [] {-# INLINE zeroArrow #-} instance ArrowPlus LA where LA f <+> LA g = LA $ \ x -> f x ++ g x {-# INLINE (<+>) #-} instance ArrowChoice LA where left (LA f) = LA $ either (map Left . f) ((:[]) . Right) right (LA f) = LA $ either ((:[]) . Left) (map Right . f) LA f +++ LA g = LA $ either (map Left . f) (map Right . g) LA f ||| LA g = LA $ either f g instance ArrowApply LA where app = LA $ \ (LA f, x) -> f x {-# INLINE app #-} instance ArrowList LA where arrL = LA {-# INLINE arrL #-} arr2A f = LA $ \ ~(x, y) -> runLA (f x) y {-# INLINE arr2A #-} isA p = LA $ \ x -> if p x then [x] else [] {-# INLINE isA #-} LA f >>. g = LA $ g . f {-# INLINE (>>.) #-} withDefault a d = a >>. \ x -> if null x then [d] else x instance ArrowIf LA where ifA (LA p) t e = LA $ \ x -> runLA ( if null (p x) then e else t ) x {-# INLINE ifA #-} (LA f) `orElse` (LA g) = LA $ \ x -> ( let res = f x in if null res then g x else res ) {-# INLINE orElse #-} spanA p = LA $ (:[]) . span (not . null . runLA p) partitionA p = LA $ (:[]) . partition (not . null . runLA p) instance ArrowTree LA instance ArrowNavigatableTree LA instance ArrowNF LA where rnfA (LA f) = LA $ \ x -> let res = f x in res `deepseq` res instance ArrowWNF LA -- ------------------------------------------------------------ -- | conversion of pure list arrows into other possibly more complex -- list arrows fromLA :: ArrowList a => LA b c -> a b c fromLA f = arrL (runLA f) {-# INLINE fromLA #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/ListArrows.hs0000644000000000000000000000321012474566610016721 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ListArrows Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Module for importing all list arrows -} -- ------------------------------------------------------------ module Control.Arrow.ListArrows ( module Control.Arrow -- arrow classes , module Control.Arrow.ArrowExc , module Control.Arrow.ArrowIf , module Control.Arrow.ArrowIO , module Control.Arrow.ArrowList , module Control.Arrow.ArrowNavigatableTree , module Control.Arrow.ArrowNF , module Control.Arrow.ArrowState , module Control.Arrow.ArrowTree , module Control.Arrow.ListArrow -- arrow types , module Control.Arrow.StateListArrow , module Control.Arrow.IOListArrow , module Control.Arrow.IOStateListArrow , module Control.Arrow.NTreeEdit -- extra arrows ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowExc import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowNavigatableTree import Control.Arrow.ArrowNF import Control.Arrow.ArrowState import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import Control.Arrow.ListArrow -- arrow types import Control.Arrow.StateListArrow import Control.Arrow.IOListArrow import Control.Arrow.IOStateListArrow import Control.Arrow.NTreeEdit -- extra arrows -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/NTreeEdit.hs0000644000000000000000000000245312474566610016443 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.NTreeEdit Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable arrows for efficient editing of rose trees -} -- ------------------------------------------------------------ module Control.Arrow.NTreeEdit where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ListArrow import Data.Maybe import Data.Tree.NTree.TypeDefs import Data.Tree.NTree.Edit -- ------------------------------------------------------------ -- | Edit parts of a rose tree -- -- The subtrees to be modified are selected by the first part of the IfThen pairs -- The modification by the second part editNTreeA :: [IfThen (LA (NTree b) c) (LA (NTree b) (NTree b))] -> LA (NTree b) (NTree b) editNTreeA cs = arrL $ editNTreeBottomUp ef where ef = listToMaybe . (runLA . foldr (\ (g :-> h) -> ifA g (listA h)) none $ cs) fmapNTreeA :: (b -> Maybe b) -> LA (NTree b) (NTree b) fmapNTreeA f = arr $ mapNTree' f -- eof ------------------------------------------------------------ hxt-9.3.1.18/src/Control/Arrow/StateListArrow.hs0000644000000000000000000001607512474566610017554 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} -- ------------------------------------------------------------ {- | Module : Control.Arrow.StateListArrow Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of list arrows with a state -} -- ------------------------------------------------------------ module Control.Arrow.StateListArrow ( SLA(..) , fromSLA ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowState import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.DeepSeq -- ------------------------------------------------------------ -- | list arrow combined with a state newtype SLA s a b = SLA { runSLA :: s -> a -> (s, [b]) } instance Category (SLA s) where id = SLA $ \ s x -> (s, [x]) {-# INLINE id #-} SLA g . SLA f = SLA $ \ s x -> let ~(s1, ys) = f s x sequence' s' [] = (s', []) sequence' s' (x':xs') = let ~(s1', ys') = g s' x' ~(s2', zs') = sequence' s1' xs' in (s2', ys' ++ zs') in sequence' s1 ys instance Arrow (SLA s) where arr f = SLA $ \ s x -> (s, [f x]) {-# INLINE arr #-} first (SLA f) = SLA $ \ s ~(x1, x2) -> let ~(s', ys1) = f s x1 in (s', [ (y1, x2) | y1 <- ys1 ]) -- just for efficiency second (SLA g) = SLA $ \ s ~(x1, x2) -> let ~(s', ys2) = g s x2 in (s', [ (x1, y2) | y2 <- ys2 ]) -- just for efficiency SLA f *** SLA g = SLA $ \ s ~(x1, x2) -> let ~(s1, ys1) = f s x1 ~(s2, ys2) = g s1 x2 in (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) -- just for efficiency SLA f &&& SLA g = SLA $ \ s x -> let ~(s1, ys1) = f s x ~(s2, ys2) = g s1 x in (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) instance ArrowZero (SLA s) where zeroArrow = SLA $ \ s -> const (s, []) {-# INLINE zeroArrow #-} instance ArrowPlus (SLA s) where SLA f <+> SLA g = SLA $ \ s x -> let ~(s1, rs1) = f s x ~(s2, rs2) = g s1 x in (s2, rs1 ++ rs2) instance ArrowChoice (SLA s) where left (SLA f) = SLA $ \ s -> let lf x = (s1, map Left y) where ~(s1, y) = f s x rf x = (s, [Right x]) in either lf rf right (SLA f) = SLA $ \ s -> let lf x = (s, [Left x]) rf x = (s1, map Right y) where ~(s1, y) = f s x in either lf rf instance ArrowApply (SLA s) where app = SLA $ \ s (SLA f, x) -> f s x {-# INLINE app #-} instance ArrowList (SLA s) where arrL f = SLA $ \ s x -> (s, (f x)) {-# INLINE arrL #-} arr2A f = SLA $ \ s ~(x, y) -> runSLA (f x) s y {-# INLINE arr2A #-} constA c = SLA $ \ s -> const (s, [c]) {-# INLINE constA #-} isA p = SLA $ \ s x -> (s, if p x then [x] else []) {-# INLINE isA #-} SLA f >>. g = SLA $ \ s x -> let ~(s1, ys) = f s x in (s1, g ys) {-# INLINE (>>.) #-} -- just for efficency perform (SLA f) = SLA $ \ s x -> let ~(s1, _ys) = f s x in (s1, [x]) {-# INLINE perform #-} instance ArrowIf (SLA s) where ifA (SLA p) ta ea = SLA $ \ s x -> let ~(s1, res) = p s x in runSLA ( if null res then ea else ta ) s1 x (SLA f) `orElse` g = SLA $ \ s x -> let r@(s1, res) = f s x in if null res then runSLA g s1 x else r instance ArrowState s (SLA s) where changeState cf = SLA $ \ s x -> (cf s x, [x]) {-# INLINE changeState #-} accessState af = SLA $ \ s x -> (s, [af s x]) {-# INLINE accessState #-} instance ArrowTree (SLA s) instance ArrowNavigatableTree (SLA s) instance ArrowNF (SLA s) where rnfA (SLA f) = SLA $ \ s x -> let res = f s x in snd res `deepseq` res instance ArrowWNF (SLA s) -- ------------------------------------------------------------ -- | conversion of state list arrows into arbitray other -- list arrows. -- -- allows running a state list arrow within another arrow: -- -- example: -- -- > ... >>> fromSLA 0 (... setState ... getState ... ) >>> ... -- -- runs a state arrow with initial state 0 (e..g. an Int) within -- another arrow sequence fromSLA :: ArrowList a => s -> SLA s b c -> a b c fromSLA s f = arrL (snd . (runSLA f s)) {-# INLINE fromSLA #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Control/FlatSeq.hs0000644000000000000000000000502512474566610015063 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.FlatSeq Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Force evaluation like deepseq in Control.DeepSeq, but control the depth of evaluation. flatseq may evaluate more than seq but less than deepseq -} -- ------------------------------------------------------------ module Control.FlatSeq where import Data.Word -- ------------------------------------------------------------ infixr 0 $!! ($!!) :: WNFData a => (a -> b) -> a -> b f $!! x = rwnf x `seq` f x {-# INLINE ($!!) #-} flatseq :: WNFData a => a -> b -> b flatseq a b = rwnf a `seq` b {-# INLINE flatseq #-} rlnf :: (a -> ()) -> [a] -> () rlnf _ [] = () rlnf r (x:xs) = r x `seq` rlnf r xs {-# INLINE rlnf #-} -- | A class of types that can be partially evaluated, but evaluation can be propagated deeper than WHNF class WNFData a where -- | Default for rwnf is reduction to WHNF rwnf :: a -> () rwnf a = a `seq` () {-# INLINE rwnf #-} -- | Default for rwnf2 is rwnf rwnf2 :: a -> () rwnf2 = rwnf {-# INLINE rwnf2 #-} instance WNFData Int instance WNFData Integer instance WNFData Float instance WNFData Double instance WNFData Char instance WNFData Bool instance WNFData () instance WNFData Word instance WNFData Word8 instance WNFData Word16 instance WNFData Word32 instance WNFData Word64 instance WNFData a => WNFData [a] where rwnf [] = () rwnf (x:xs) = x `seq` rwnf xs {-# INLINE rwnf #-} instance (WNFData a, WNFData b) => WNFData (a,b) where rwnf (x,y) = rwnf x `seq` rwnf y {-# INLINE rwnf #-} instance (WNFData a, WNFData b, WNFData c) => WNFData (a,b,c) where rwnf (x,y,z) = rwnf x `seq` rwnf y `seq` rwnf z {-# INLINE rwnf #-} instance (WNFData a, WNFData b, WNFData c, WNFData d) => WNFData (a,b,c,d) where rwnf (x1,x2,x3,x4) = rwnf x1 `seq` rwnf x2 `seq` rwnf x3 `seq` rwnf x4 {-# INLINE rwnf #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Data/AssocList.hs0000644000000000000000000000364612474566610014670 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Data.AssocList Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Simple key value assocciation list implemented as unordered list of pairs -} -- ------------------------------------------------------------ module Data.AssocList ( module Data.AssocList ) where import Data.Maybe type AssocList k v = [(k, v)] -- lookup = lookup from Prelude -- | lookup with default value lookupDef :: Eq k => v -> k -> AssocList k v -> v lookupDef d k = fromMaybe d . lookup k -- | lookup with empty list (empty string) as default value lookup1 :: Eq k => k -> AssocList k [e] -> [e] lookup1 k = fromMaybe [] . lookup k -- | test for existence of a key hasEntry :: Eq k => k -> AssocList k v -> Bool hasEntry k = isJust . lookup k -- | add an entry, remove an existing entry before adding the new one at the top of the list, addEntry is strict addEntry :: Eq k => k -> v -> AssocList k v -> AssocList k v addEntry k v l = ( (k,v) : ) $! delEntry k l -- let l' = delEntry k l in seq l' ((k, v) : l') -- | add a whole list of entries with 'addEntry' addEntries :: Eq k => AssocList k v -> AssocList k v -> AssocList k v addEntries = foldr (.) id . map (uncurry addEntry) . reverse -- | delete an entry, delEntry is strict delEntry :: Eq k => k -> AssocList k v -> AssocList k v delEntry _ [] = [] delEntry k (x@(k1,_) : rest) | k == k1 = rest | otherwise = ( x : ) $! delEntry k rest -- delEntry k = filter ((/= k) . fst) -- | delete a list of entries with 'delEntry' delEntries :: Eq k => [k] -> AssocList k v -> AssocList k v delEntries = foldl (.) id . map delEntry -- ----------------------------------------------------------------------------- hxt-9.3.1.18/src/Data/Atom.hs0000644000000000000000000001177112474566610013662 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} -- ------------------------------------------------------------ {- | Module : Data.Atom Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: non-portable Unique Atoms generated from Strings and managed as flyweights Data.Atom can be used for caching and storage optimisation of frequently used strings. An @Atom@ is constructed from a @String@. For two equal strings the identical atom is returned. This module can be used for optimizing memory usage when working with strings or names. Many applications use data types like @Map String SomeAttribute@ where a rather fixed set of keys is used. Especially XML applications often work with a limited set of element and attribute names. For these applications it becomes more memory efficient when working with types like @Map Atom SomeAttribute@ and convert the keys into atoms before operating on such a map. Internally this module manages a map of atoms. The atoms are internally represented by @ByteString@s. When creating a new atom from a string, the string is first converted into an UTF8 @Word8@ sequence, which is packed into a @ByteString@. This @ByteString@ is looked up in the table of atoms. If it is already there, the value in the map is used as atom, else the new @ByteString@ is inserted into the map. Of course the implementation of this name cache uses @unsavePerformIO@. The global cache is managed by ue of an @IORef@ and atomicModifyIORef. The following laws hold for atoms > > s == t => newAtom s == newAtom t > s `compare` t => newAtom s `compare` newAtom t > show . newAtom == id Equality test for @Atom@s runs in /O(1)/, it is just a pointer comarison. The @Ord@ comparisons have the same runtime like the @ByteString@ comparisons. Internally there is an UTF8 comparison, but UTF8 encoding preserves the total order. Warning: The internal cache never shrinks during execution. So using it in a undisciplined way can lead to memory leaks. -} ----------------------------------------------------------------------------- module Data.Atom ( -- * Atom objects Atom, -- instance (Eq, Ord, Read, Show) newAtom, -- :: String -> Atom share -- :: String -> String ) where import Control.DeepSeq import Data.ByteString (ByteString, pack, unpack) import Data.ByteString.Internal (c2w, toForeignPtr, w2c) import Data.IORef import qualified Data.Map as M import Data.String.Unicode (unicodeToUtf8) import Data.String.UTF8Decoding (decodeUtf8) import Data.Typeable import System.IO.Unsafe (unsafePerformIO) -- ------------------------------------------------------------ type Atoms = M.Map ByteString ByteString newtype Atom = A { bs :: ByteString } deriving (Typeable) -- ------------------------------------------------------------ -- | the internal cache for the strings theAtoms :: IORef Atoms theAtoms = unsafePerformIO (newIORef M.empty) {-# NOINLINE theAtoms #-} -- | insert a bytestring into the atom cache insertAtom :: ByteString -> Atoms -> (Atoms, Atom) insertAtom s m = maybe (M.insert s s m, A s) (\ s' -> (m, A s')) . M.lookup s $ m -- | creation of an @Atom@ from a @String@ newAtom :: String -> Atom newAtom = unsafePerformIO . newAtom' {-# NOINLINE newAtom #-} -- | The internal operation running in the IO monad newAtom' :: String -> IO Atom newAtom' s = do -- putStrLn "insert atom into cache" res <- atomicModifyIORef theAtoms insert -- putStrLn "atom cache updated" return res where insert m = let r = insertAtom (pack. map c2w . unicodeToUtf8 $ s) m in fst r `seq` r -- | Insert a @String@ into the atom cache and convert the atom back into a @String@. -- -- locically @share == id@ holds, but internally equal strings share the same memory. share :: String -> String share = show . newAtom instance Eq Atom where a1 == a2 = fp1 == fp2 where (fp1, _, _) = toForeignPtr . bs $ a1 (fp2, _, _) = toForeignPtr . bs $ a2 instance Ord Atom where compare a1 a2 | a1 == a2 = EQ | otherwise = compare (bs a1) (bs a2) instance Read Atom where readsPrec p str = [ (newAtom x, y) | (x, y) <- readsPrec p str ] instance Show Atom where show = fst . decodeUtf8 . map w2c . unpack . bs -- show = show . toForeignPtr . bs -- for debug only instance NFData Atom where rnf x = seq x () ----------------------------------------------------------------------------- hxt-9.3.1.18/src/Data/Function/Selector.hs0000644000000000000000000000662512474566610016331 0ustar0000000000000000{-# OPTIONS -XMultiParamTypeClasses -XFunctionalDependencies -XFlexibleInstances #-} module Data.Function.Selector where import Prelude hiding (id,(.)) import Control.Arrow import Control.Category infixr 3 .&&&. -- ------------------------------------------------------------ -- | A Selector is a pair of an access function and a modifying function -- for reading and updating parts of a composite type data Selector s a = S { getS :: s -> a , setS :: a -> s -> s } chgS :: Selector s a -> (a -> a) -> (s -> s) chgS sel f s = setS sel x s where x = f . getS sel $ s chgM :: (Monad m) => Selector s a -> (a -> m a) -> (s -> m s) chgM sel f s = do y <- f x return $ setS sel y s where x = getS sel $ s -- | Alias for constructor S mkSelector :: (s -> a) -> (a -> s -> s) -> Selector s a mkSelector = S -- (.), (>>>), (<<<) instance Category Selector where id = S { getS = id , setS = const } (S g2 s2) . (S g1 s1) = S { getS = g2 . g1 , setS = \ x s -> let x1 = g1 s in let x1' = s2 x x1 in s1 x1' s } idS :: Selector s s idS = id (.&&&.) :: Selector s a -> Selector s b -> Selector s (a, b) (.&&&.) (S g1 s1) (S g2 s2) = S { getS = g1 &&& g2 , setS = \ (x, y) -> s2 y . s1 x } -- ------------------------------------------------------------ -- | Selectors for pairs and 3-tuples: comp1, comp2, comp3, -- this can be extended to n-tuples class Comp1 s a | s -> a where comp1 :: Selector s a class Comp2 s a | s -> a where comp2 :: Selector s a class Comp3 s a | s -> a where comp3 :: Selector s a instance Comp1 (a, b) a where comp1 = S { getS = fst , setS = \ x1 (_, x2) -> (x1, x2) } instance Comp2 (a, b) b where comp2 = S { getS = snd , setS = \ x2 (x1, _) -> (x1, x2) } instance Comp1 (a, b, c) a where comp1 = S { getS = \ (x1, _, _) -> x1 , setS = \ x1 (_, x2, x3) -> (x1, x2, x3) } instance Comp2 (a, b, c) b where comp2 = S { getS = \ (_, x2, _) -> x2 , setS = \ x2 (x1, _, x3) -> (x1, x2, x3) } instance Comp3 (a, b, c) c where comp3 = S { getS = \ (_, _, x3) -> x3 , setS = \ x3 (x1, x2, _) -> (x1, x2, x3) } -- ------------------------------------------------------------ hxt-9.3.1.18/src/Data/Tree/Class.hs0000644000000000000000000000771712474566610014733 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Data.Tree.Class Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Interface definition for trees -} -- ------------------------------------------------------------ module Data.Tree.Class ( module Data.Tree.Class ) where -- | The interface for trees class Tree t where -- | tree construction: a new tree is constructed by a node attribute and a list of children mkTree :: a -> [t a] -> t a -- | leaf construction: leafs don't have any children -- -- definition: @ mkLeaf n = mkTree n [] @ mkLeaf :: a -> t a mkLeaf n = mkTree n [] {-# INLINE mkLeaf #-} -- | leaf test: list of children empty? isLeaf :: t a -> Bool isLeaf = null . getChildren {-# INLINE isLeaf #-} -- | innner node test: @ not . isLeaf @ isInner :: t a -> Bool isInner = not . isLeaf {-# INLINE isInner #-} -- | select node attribute getNode :: t a -> a -- | select children getChildren :: t a -> [t a] -- | edit node attribute changeNode :: (a -> a) -> t a -> t a -- | edit children changeChildren :: ([t a] -> [t a]) -> t a -> t a -- | substitute node: @ setNode n = changeNode (const n) @ setNode :: a -> t a -> t a setNode n = changeNode (const n) {-# INLINE setNode #-} -- | substitute children: @ setChildren cl = changeChildren (const cl) @ setChildren :: [t a] -> t a -> t a setChildren cl = changeChildren (const cl) {-# INLINE setChildren #-} -- | fold for trees foldTree :: (a -> [b] -> b) -> t a -> b -- | all nodes of a tree nodesTree :: t a -> [a] nodesTree = foldTree (\ n rs -> n : concat rs) {-# INLINE nodesTree #-} -- | depth of a tree depthTree :: t a -> Int depthTree = foldTree (\ _ rs -> 1 + maximum (0 : rs)) -- | number of nodes in a tree cardTree :: t a -> Int cardTree = foldTree (\ _ rs -> 1 + sum rs) -- | format tree for readable trace output -- -- a /graphical/ representation of the tree in text format formatTree :: (a -> String) -> t a -> String formatTree nf n = formatNTree' nf (showString "---") (showString " ") n "" -- ------------------------------------------------------------ -- | -- convert a tree into a pseudo graphical string representation formatNTree' :: Tree t => (a -> String) -> (String -> String) -> (String -> String) -> t a -> String -> String formatNTree' node2String pf1 pf2 tree = formatNode . formatChildren pf2 l where n = getNode tree l = getChildren tree formatNode = pf1 . foldr (.) id (map trNL (node2String n)) . showNL trNL '\n' = showNL . pf2 trNL c = showChar c showNL = showChar '\n' formatChildren _ [] = id formatChildren pf (t:ts) | null ts = pfl' . formatTr pf2' t | otherwise = pfl' . formatTr pf1' t . formatChildren pf ts where pf0' = pf . showString indent1 pf1' = pf . showString indent2 pf2' = pf . showString indent3 pfl' = pf . showString indent4 formatTr = formatNTree' node2String pf0' indent1 = "+---" indent2 = "| " indent3 = " " indent4 = "|\n" -- eof ------------------------------------------------------------ hxt-9.3.1.18/src/Data/Tree/NTree/TypeDefs.hs0000644000000000000000000000751413205353551016410 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} -- ------------------------------------------------------------ {- | Module : Data.Tree.NTree.TypeDefs Copyright : Copyright (C) 2005-2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : stable Portability: portable Interface definition for trees n-ary tree structure (rose trees) -} -- ------------------------------------------------------------ module Data.Tree.NTree.TypeDefs where import Control.DeepSeq (NFData (..)) import Control.FlatSeq (WNFData (..), rlnf) import Data.Binary import Data.Monoid ((<>)) import Data.Tree.Class (Tree (..)) import Data.Typeable (Typeable) #if MIN_VERSION_base(4,8,2) #else import Control.Applicative ((<$>)) #endif #if MIN_VERSION_base(4,8,0) #else import Control.Applicative (Applicative (..)) import Data.Foldable (Foldable (..)) import Data.Monoid (Monoid (..)) import Data.Traversable (Traversable (..), sequenceA) #endif -- ------------------------------------------------------------ -- | n-ary ordered tree (rose trees) -- -- a tree consists of a node and a possible empty list of children. -- If the list of children is empty, the node is a leaf, else it's -- an inner node. -- -- NTree implements Eq, Ord, Show and Read data NTree a = NTree a (NTrees a) deriving (Eq, Ord, Show, Read, Typeable) -- | shortcut for a sequence of n-ary trees type NTrees a = [NTree a] -- ------------------------------------------------------------ instance (NFData a) => NFData (NTree a) where rnf (NTree n cl) = rnf n `seq` rnf cl {-# INLINE rnf #-} instance (WNFData a) => WNFData (NTree a) where rwnf (NTree n cl) = rwnf n `seq` rwnf cl {-# INLINE rwnf #-} -- | Evaluate a tree 2 steps deep, the top node and all children are evaluated with rwnf rwnf2 (NTree n cl) = rwnf n `seq` rlnf rwnf cl {-# INLINE rwnf2 #-} -- ------------------------------------------------------------ instance (Binary a) => Binary (NTree a) where put (NTree n cs) = put n >> put cs get = do n <- get cs <- get return (NTree n cs) -- ------------------------------------------------------------ -- | NTree implements class Functor instance Functor NTree where fmap f (NTree n cl) = NTree (f n) (map (fmap f) cl) {-# INLINE fmap #-} -- ------------------------------------------------------------ -- | NTree implements class Foldable instance Foldable NTree where foldMap f (NTree n cl) = f n <> mconcat (map (foldMap f) cl) {-# INLINE foldMap #-} -- ------------------------------------------------------------ -- | NTree implements class Taversable instance Traversable NTree where traverse f (NTree n cl) = NTree <$> f n <*> sequenceA (map (traverse f) cl) {-# INLINE traverse #-} -- ------------------------------------------------------------ -- | Implementation of "Data.Tree.Class" interface for rose trees instance Tree NTree where mkTree n cl = NTree n cl {-# INLINE mkTree #-} getNode ~(NTree n _ ) = n {-# INLINE getNode #-} getChildren ~(NTree _ cl) = cl {-# INLINE getChildren #-} changeNode cf ~(NTree n cl) = NTree (cf n) cl {-# INLINE changeNode #-} changeChildren cf ~(NTree n cl) = NTree n (cf cl) {-# INLINE changeChildren #-} foldTree f ~(NTree n cs) = f n (map (foldTree f) cs) {-# INLINE foldTree #-} -- eof ------------------------------------------------------------ hxt-9.3.1.18/src/Data/Tree/NTree/Edit.hs0000644000000000000000000000725012474566610015560 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Space and time efficient editing of rose trees -} -- ------------------------------------------------------------ module Data.Tree.NTree.Edit where import Data.Maybe import Data.Tree.NTree.TypeDefs -- import Debug.Trace -- | editNTreeBottomUp is a space optimized tree edit function -- -- The nodes in a tree are visited bottom up. An edit function is applied to -- all nodes. A Nothing result of the editing function indicates no changes. -- This is used to share the input tree within the resulting tree. -- -- The following law holds: -- -- > editNTreeBottomUp (const Nothing) t == [t] -- -- In this case the resulting tree does not only represent the same value -- but it is the same machine value (relative to some evaluations of closures -- during the tree walk -- -- With a simple fold like editing function the whole tree would be reconstructed -- in memory editNTreeBottomUp :: (NTree a -> Maybe [NTree a]) -> NTree a -> [NTree a] editNTreeBottomUp f t0 = maybe [t0] id . editNTreeBU $ t0 where -- editNTreeBU :: NTree a -> Maybe [NTree a] editNTreeBU t@(NTree n cs) | isNothing r' && isJust cl' = Just [t'] -- children have been change but not the node itself | otherwise = r' -- nothing has been changes or node has been changed where cl' = editNTreesBU cs -- the edited children t' = case cl' of -- the node to be processed with f Nothing -> t -- possibly with the new children (bottom up) Just cs' -> NTree n cs' r' = f t' -- the edited result -- editNTreesBU :: [NTree a] -> Maybe [NTree a] editNTreesBU [] = Nothing editNTreesBU (t : ts) = mergeRes (editNTreeBU t ) (editNTreesBU ts) where mergeRes r' = case r' of Nothing -> maybe Nothing (Just . (t :)) Just ts' -> Just . (ts' ++) . fromMaybe ts -- | A space optimized map for NTrees -- -- Subtrees, that are not changed are reused in the resulting tree -- See also: editNTreeBottomUp mapNTree' :: (a -> Maybe a) -> NTree a -> NTree a mapNTree' f t0 = maybe t0 id . map' $ t0 where -- map' :: NTree a -> Maybe (NTree a) map' (NTree n cs) = mergeRes (f n) (maps' cs) where mergeRes Nothing Nothing = Nothing mergeRes Nothing (Just cs') = Just (NTree n cs') mergeRes (Just n') cl = Just (NTree n' (fromMaybe cs cl)) -- maps' :: [NTree a] -> Maybe [NTree a] maps' [] = Nothing maps' (t : ts) = mergeRes (map' t ) (maps' ts) where mergeRes r' = case r' of Nothing -> maybe Nothing (Just . (t :)) Just t' -> Just . (t' :) . fromMaybe ts -- eof ------------------------------------------------------------hxt-9.3.1.18/src/Data/Tree/NTree/Zipper/TypeDefs.hs0000644000000000000000000001542512474566610017672 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies #-} {-# OPTIONS -fno-warn-orphans #-} -- ------------------------------------------------------------ {- | Module : Data.Tree.NTree.Zipper.TypeDefs Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : stable Portability: portable Implementation of navigateble trees for rose trees. The implementation is done with zippers. A description and introductory tutorial about zippers can be found in -} -- ------------------------------------------------------------ module Data.Tree.NTree.Zipper.TypeDefs {- ( NTZipper , NTree , toNTZipper , fromNTZipper ) -} where import Data.Tree.Class import Data.Tree.NavigatableTree.Class import Data.Tree.NavigatableTree.XPathAxis ( childAxis ) import Data.Tree.NTree.TypeDefs -- ------------------------------------------------------------ -- | Zipper for rose trees -- -- A zipper consist of the current tree and the branches on the way back to the root data NTZipper a = NTZ { ntree :: (NTree a) , context :: (NTBreadCrumbs a) } deriving (Show) -- | The list of unzipped nodes from a current tree back to the root type NTBreadCrumbs a = [NTCrumb a] -- | One unzipped step consists of the left siblings, the node info and the right siblings data NTCrumb a = NTC (NTrees a) -- left side a -- node (NTrees a) -- right side deriving (Show) -- ------------------------------------------------------------ -- | Conversion of a rose tree into a navigatable rose tree toNTZipper :: NTree a -> NTZipper a toNTZipper t = NTZ t [] {-# INLINE toNTZipper #-} -- | Conversion of a navigatable rose tree into an ordinary rose tree. -- -- The context, the parts for moving up to the root are just removed from the tree. -- So when transforming a navigatable tree by moving around and by changing some nodes, -- one has to navigate back -- to the root, else that parts are removed from the result fromNTZipper :: NTZipper a -> NTree a fromNTZipper = ntree {-# INLINE fromNTZipper #-} -- ------------------------------------------------------------ up :: NTZipper a -> Maybe (NTZipper a) up z | isTop z = Nothing | otherwise = Just $ NTZ (up1 t bc) bcs where NTZ t (bc : bcs) = z {-# INLINE up #-} down :: NTZipper a -> Maybe (NTZipper a) down (NTZ (NTree n cs) bcs) | null cs = Nothing | otherwise = Just $ NTZ (head cs) (NTC [] n (tail cs) : bcs) {-# INLINE down #-} toTheRight :: NTZipper a -> Maybe (NTZipper a) toTheRight z | isTop z || null rs = Nothing | otherwise = Just $ NTZ t' (bc' : bcs) where (NTZ t (bc : bcs)) = z (NTC ls n rs) = bc t' = head rs bc' = NTC (t : ls) n (tail rs) {-# INLINE toTheRight #-} toTheLeft :: NTZipper a -> Maybe (NTZipper a) toTheLeft z | isTop z || null ls = Nothing | otherwise = Just $ NTZ t' (bc' : bcs) where (NTZ t (bc : bcs)) = z (NTC ls n rs) = bc t' = head ls bc' = NTC (tail ls) n (t : rs) {-# INLINE toTheLeft #-} addToTheLeft :: NTree a -> NTZipper a -> Maybe (NTZipper a) addToTheLeft t z | isTop z = Nothing | otherwise = Just $ NTZ t' (NTC (t:ls) n rs : bcs) where (NTZ t' (bc : bcs)) = z (NTC ls n rs) = bc {-# INLINE addToTheLeft #-} addToTheRight :: NTree a -> NTZipper a -> Maybe (NTZipper a) addToTheRight t z | isTop z = Nothing | otherwise = Just $ NTZ t' (NTC ls n (t:rs) : bcs) where (NTZ t' (bc : bcs)) = z (NTC ls n rs) = bc {-# INLINE addToTheRight #-} dropFromTheLeft :: NTZipper a -> Maybe (NTZipper a) dropFromTheLeft z | isTop z = Nothing | null ls = Nothing | otherwise = Just $ NTZ t' (NTC (tail ls) n rs : bcs) where (NTZ t' (bc : bcs)) = z (NTC ls n rs) = bc {-# INLINE dropFromTheLeft #-} dropFromTheRight :: NTZipper a -> Maybe (NTZipper a) dropFromTheRight z | isTop z = Nothing | null rs = Nothing | otherwise = Just $ NTZ t' (NTC ls n (tail rs) : bcs) where (NTZ t' (bc : bcs)) = z (NTC ls n rs) = bc {-# INLINE dropFromTheRight #-} -- ------------------------------------------------------------ isTop :: NTZipper a -> Bool isTop = null . context {-# INLINE isTop #-} up1 :: NTree a -> NTCrumb a -> NTree a up1 t (NTC ls n rs) = NTree n (foldl (flip (:)) (t : rs) ls) {-# INLINE up1 #-} -- ------------------------------------------------------------ instance Functor NTZipper where fmap f (NTZ t xs) = NTZ (fmap f t) (map (fmap f) xs) {-# INLINE fmap #-} instance Functor NTCrumb where fmap f (NTC xs x ys)= NTC (map (fmap f) xs) (f x) (map (fmap f) ys) {-# INLINE fmap #-} instance Tree NTZipper where mkTree n cl = toNTZipper . mkTree n $ map ntree cl getNode = getNode . ntree {-# INLINE getNode #-} getChildren = childAxis {-# INLINE getChildren #-} changeNode cf t = t { ntree = changeNode cf (ntree t) } changeChildren cf t = t { ntree = setChildren (map ntree . cf . childAxis $ t) (ntree t) } foldTree f = foldTree f . ntree {-# INLINE foldTree #-} instance NavigatableTree NTZipper where mvDown = down {-# INLINE mvDown #-} mvUp = up {-# INLINE mvUp #-} mvLeft = toTheLeft {-# INLINE mvLeft #-} mvRight = toTheRight {-# INLINE mvRight #-} instance NavigatableTreeToTree NTZipper NTree where fromTree = toNTZipper {-# INLINE fromTree #-} toTree = fromNTZipper {-# INLINE toTree #-} instance NavigatableTreeModify NTZipper NTree where addTreeLeft = addToTheLeft {-# INLINE addTreeLeft #-} addTreeRight = addToTheRight {-# INLINE addTreeRight #-} dropTreeLeft = dropFromTheLeft {-# INLINE dropTreeLeft #-} dropTreeRight = dropFromTheRight {-# INLINE dropTreeRight #-} substThisTree t nt = nt { ntree = t } {-# INLINE substThisTree #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Data/Tree/NavigatableTree/Class.hs0000644000000000000000000000476312474566610017766 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies #-} -- ------------------------------------------------------------ {- | Module : Data.Tree.NavigatableTree.Class Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Interface definition for navigatable trees. Navigatable trees need to have operations to move up, down, left and right. With these elementary operations, most of the XPath axises can be defined. -} -- ------------------------------------------------------------ module Data.Tree.NavigatableTree.Class where -- ------------------------------------------------------------ -- | The interface for navigatable trees class NavigatableTree t where -- | move one step towards the root mvUp :: t a -> Maybe (t a) -- | descend one step to the leftmost child mvDown :: t a -> Maybe (t a) -- | move to the left neighbour mvLeft :: t a -> Maybe (t a) -- | move to the right neighbour mvRight :: t a -> Maybe (t a) -- ------------------------------------------------------------ -- | Conversion between trees and navigatable trees, -- -- There is only a single navigatable tree implementation for a given tree allowed -- (see the functional dependencies) class NavigatableTreeToTree nt t | t -> nt, nt -> t where -- | construct a navigatable tree fromTree :: t a -> nt a -- | remove navigation toTree :: nt a -> t a -- ------------------------------------------------------------ -- | Edit operation on navigatable trees -- -- There is only a single navigatable tree implementation for a given tree allowed -- (see the functional dependencies) class NavigatableTreeModify nt t | t -> nt, nt -> t where -- | add an ordinary tree in front of the given navigatable tree addTreeLeft :: t a -> nt a -> Maybe (nt a) -- | add an ordinary tree behind of the given navigatable tree addTreeRight :: t a -> nt a -> Maybe (nt a) -- | drop the direct left sibling tree of the given navigatable tree dropTreeLeft :: nt a -> Maybe (nt a) -- | drop the direct right sibling tree of the given navigatable tree dropTreeRight :: nt a -> Maybe (nt a) -- | change the tree but remain the navigation substThisTree :: t a -> nt a -> nt a -- ------------------------------------------------------------ hxt-9.3.1.18/src/Data/Tree/NavigatableTree/XPathAxis.hs0000644000000000000000000000755412474566610020573 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Data.Tree.NavigatableTree.XPathAxis Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Navigatable trees need to have operations to move up, down, left and right. With these elementary operations, the XPath axises can be defined. -} -- ------------------------------------------------------------ module Data.Tree.NavigatableTree.XPathAxis where import Data.Maybe ( maybeToList ) import Data.Tree.NavigatableTree.Class import Control.Arrow ( (>>>) ) import Control.Monad ( (>=>) ) -- ------------------------------------------------------------ -- -- mothers little helpers -- | collect all trees by moving into one direction, starting tree is included maybeStar :: (a -> Maybe a) -> (a -> [a]) maybeStar f x = x : maybe [] (maybeStar f) (f x) -- | collect all trees by moving into one direction, starting tree is not included maybePlus :: (a -> Maybe a) -> (a -> [a]) maybePlus f x = maybe [] (maybeStar f) (f x) {-# INLINE maybePlus #-} -- ------------------------------------------------------------ -- XPath axis -- | XPath axis: parent parentAxis :: NavigatableTree t => t a -> [t a] parentAxis = maybeToList . mvUp {-# INLINE parentAxis #-} -- | XPath axis: ancestor ancestorAxis :: NavigatableTree t => t a -> [t a] ancestorAxis = maybePlus mvUp {-# INLINE ancestorAxis #-} -- | XPath axis: ancestor or self ancestorOrSelfAxis :: NavigatableTree t => t a -> [t a] ancestorOrSelfAxis = maybeStar mvUp {-# INLINE ancestorOrSelfAxis #-} -- | XPath axis: child childAxis :: NavigatableTree t => t a -> [t a] childAxis = (mvDown >>> maybeToList) >=> maybeStar mvRight {-# INLINE childAxis #-} -- | XPath axis: descendant descendantAxis :: NavigatableTree t => t a -> [t a] descendantAxis = descendantOrSelfAxis >>> tail {-# INLINE descendantAxis #-} -- | XPath axis: descendant or self descendantOrSelfAxis :: NavigatableTree t => t a -> [t a] descendantOrSelfAxis = visit [] where visit k t = t : maybe k (visit' k) (mvDown t) visit' k t = visit (maybe k (visit' k) (mvRight t)) t -- | not an official XPath axis but useful: reverse descendant or self, used in preceding axis revDescendantOrSelfAxis :: NavigatableTree t => t a -> [t a] revDescendantOrSelfAxis t = t : concatMap revDescendantOrSelfAxis (reverse $ childAxis t) -- | XPath axis: following sibling followingSiblingAxis :: NavigatableTree t => t a -> [t a] followingSiblingAxis = maybePlus mvRight {-# INLINE followingSiblingAxis #-} -- | XPath axis: preceeding sibling precedingSiblingAxis :: NavigatableTree t => t a -> [t a] precedingSiblingAxis = maybePlus mvLeft {-# INLINE precedingSiblingAxis #-} -- | XPath axis: self selfAxis :: NavigatableTree t => t a -> [t a] selfAxis = (:[]) {-# INLINE selfAxis #-} -- | XPath axis: following followingAxis :: NavigatableTree t => t a -> [t a] followingAxis = ancestorOrSelfAxis >=> followingSiblingAxis >=> descendantOrSelfAxis -- | XPath axis: preceding precedingAxis :: NavigatableTree t => t a -> [t a] precedingAxis = ancestorOrSelfAxis >=> precedingSiblingAxis >=> revDescendantOrSelfAxis -- | move to the root mvToRoot :: NavigatableTree t => t a -> t a mvToRoot = ancestorOrSelfAxis >>> last {-# INLINE mvToRoot #-} isAtRoot :: NavigatableTree t => t a -> Bool isAtRoot = null . ancestorAxis {-# INLINE isAtRoot #-} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Binary.hs0000644000000000000000000000616412474566610016456 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Binary Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable De-/Serialisation arrows for XmlTrees and other arbitrary values with a Binary instance -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Binary ( readBinaryValue , writeBinaryValue ) where import Control.Arrow () import Control.Arrow.ArrowExc import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Data.Binary import qualified Data.ByteString.Lazy as B import System.IO (IOMode (..), hClose, openBinaryFile) import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ readBinaryValue :: (Binary a) => String -> IOStateArrow s b a readBinaryValue file = (uncurry $ decodeBinaryValue file) $< getSysVar ( theStrictDeserialize .&&&. theBinaryDeCompression ) -- | Read a serialied value from a file, optionally decompress it and decode the value -- In case of an error, the error message is issued and the arrow fails decodeBinaryValue :: (Binary a) => String -> Bool -> DeCompressionFct -> IOStateArrow s b a decodeBinaryValue file strict decompress = arrIO0 dec `catchA` issueExc "readBinaryValue" where dec = ( if strict then readItAll else B.readFile file ) >>= return . decode . decompress readItAll = do h <- openBinaryFile file ReadMode c <- B.hGetContents h B.length c `seq` do hClose h return c -- hack: force reading whole file and close it immediately -- | Serialize a value, optionally compress it, and write it to a file. -- In case of an error, the error message is issued and the arrow fails writeBinaryValue :: (Binary a) => String -> IOStateArrow s a () writeBinaryValue file = flip encodeBinaryValue file $< getSysVar theBinaryCompression encodeBinaryValue :: (Binary a) => CompressionFct -> String -> IOStateArrow s a () encodeBinaryValue compress file = arrIO enc `catchA` issueExc "writeBinaryXmlTree" where enc = B.writeFile file . compress . encode -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/DTDProcessing.hs0000644000000000000000000004451412474566610017703 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DTDProcessing Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable DTD processing function for including external parts of a DTD parameter entity substitution and general entity substitution Implemtation completely done with arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DTDProcessing ( processDTD ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlDTDdecl , parseXmlDTDdeclPart , parseXmlDTDEntityValue , parseXmlDTDPart ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import Data.Maybe import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ -- data DTDPart = Internal | External deriving (Eq) type RecList = [String] type DTDStateArrow b c = IOStateArrow PEEnv b c -- ------------------------------------------------------------ newtype PEEnv = PEEnv (M.Map String XmlTree) emptyPeEnv :: PEEnv emptyPeEnv = PEEnv M.empty lookupPeEnv :: String -> PEEnv -> Maybe XmlTree lookupPeEnv k (PEEnv env) = M.lookup k env addPeEntry :: String -> XmlTree -> PEEnv -> PEEnv addPeEntry k a (PEEnv env) = PEEnv $ M.insert k a env getPeValue :: DTDStateArrow String XmlTree getPeValue = (this &&& getUserState) >>> arrL (\ (n, env) -> maybeToList . lookupPeEnv n $ env) addPe :: String -> DTDStateArrow XmlTree XmlTree addPe n = traceMsg 2 ("substParamEntity: add entity " ++ show n ++ " to env") >>> changeUserState ins where ins t peEnv = addPeEntry n t peEnv -- ------------------------------------------------------------ -- | -- a filter for DTD processing -- -- inclusion of external parts of DTD, -- parameter entity substitution -- conditional section evaluation -- -- input tree must represent a complete document including root node processDTD :: IOStateArrow s XmlTree XmlTree processDTD = runInLocalURIContext ( processRoot >>> traceTree >>> traceSource ) `when` ( isRoot >>> getChildren ) where processRoot :: IOStateArrow s XmlTree XmlTree processRoot = ( traceMsg 1 ("processDTD: process parameter entities") >>> setSysAttrString a_standalone "" >>> processChildren substParamEntities >>> setDocumentStatusFromSystemState "in XML DTD processing" >>> traceMsg 1 ("processDTD: parameter entities processed") ) `when` documentStatusOk substParamEntities :: IOStateArrow s XmlTree XmlTree substParamEntities = withOtherUserState emptyPeEnv processParamEntities `when` isDTDDoctype where processParamEntities :: DTDStateArrow XmlTree XmlTree processParamEntities = mergeEntities $<<< ( listA processPredef &&& listA processInt &&& listA (runInLocalURIContext processExt) ) where mergeEntities dtdPre dtdInt dtdExt = replaceChildren (arrL $ const $ foldl1 mergeDTDs [dtdPre, dtdInt, dtdExt]) processPredef = predefDTDPart >>> substParamEntity Internal [] processInt = getChildren >>> substParamEntity Internal [] processExt = externalDTDPart >>> substParamEntity External [] mergeDTDs :: XmlTrees -> XmlTrees -> XmlTrees mergeDTDs dtdInt dtdExt = dtdInt ++ (filter (filterDTDNodes dtdInt) dtdExt) filterDTDNodes :: XmlTrees -> XmlTree -> Bool filterDTDNodes dtdPart t = not (any (filterDTDNode t) dtdPart) filterDTDNode :: XmlTree -> XmlTree -> Bool filterDTDNode t1 t2 = fromMaybe False $ do dp1 <- XN.getDTDPart t1 dp2 <- XN.getDTDPart t2 al1 <- XN.getDTDAttrl t1 al2 <- XN.getDTDAttrl t2 return ( dp1 == dp2 && ( dp1 `elem` [ELEMENT, NOTATION, ENTITY, ATTLIST] ) && ( lookup a_name al1 == lookup a_name al2 ) && ( dp1 /= ATTLIST || lookup a_value al1 == lookup a_value al2 ) ) substParamEntity :: DTDPart -> RecList -> DTDStateArrow XmlTree XmlTree substParamEntity loc recList = choiceA [ isDTDEntity :-> ( traceDTD "ENTITY declaration before DTD declaration parsing" >>> processChildren (substPeRefsInDTDdecl recList) >>> parseXmlDTDdecl >>> substPeRefsInEntityValue >>> traceDTD "ENTITY declaration after PE substitution" >>> processEntityDecl >>> traceDTD "ENTITY declaration after DTD declaration parsing" ) , ( isDTDElement <+> isDTDAttlist <+> isDTDNotation ) :-> ( traceDTD "DTD declaration before PE substitution" >>> processChildren (substPeRefsInDTDdecl recList) >>> parseXmlDTDdecl >>> traceDTD "DTD declaration after DTD declaration parsing" ) , isDTDPERef :-> substPeRefsInDTDpart recList , isDTDCondSect :-> ( if loc == Internal then issueErr "conditional sections in internal part of the DTD is not allowed" else evalCondSect $< getDTDAttrValue a_value ) , isCmt :-> none , this :-> this ] where processEntityDecl :: DTDStateArrow XmlTree XmlTree processEntityDecl = choiceA [ isDTDEntity :-> ( ifA (hasDTDAttr k_system) processExternalEntity processInternalEntity ) , isDTDPEntity :-> ( processParamEntity $< getDTDAttrValue a_name ) , this :-> none ] where processExternalEntity :: DTDStateArrow XmlTree XmlTree -- processing external entities is delayed until first usage processExternalEntity -- only the current base uri must be remembered = setDTDAttrValue a_url $< ( getDTDAttrValue k_system >>> mkAbsURI ) processInternalEntity :: DTDStateArrow XmlTree XmlTree processInternalEntity = this -- everything is already done in substPeRefsInEntityValue processParamEntity :: String -> DTDStateArrow XmlTree XmlTree processParamEntity peName = ifA (constA peName >>> getPeValue) ( issueWarn ("parameter entity " ++ show peName ++ " already defined") >>> none -- second def must be ignored ) ( ( ifA ( hasDTDAttr k_system ) -- is external param entity ? ( setDTDAttrValue a_url $< -- store absolut url ( getDTDAttrValue k_system >>> mkAbsURI ) ) -- this is too early, pe may be not referenced and file may be not there -- ( runInLocalURIContext getExternalParamEntityValue ) ( this ) -- everything is already done in substPeRefsInEntityValue ) >>> addPe peName ) substPERef :: String -> DTDStateArrow XmlTree XmlTree substPERef pn = choiceA [ isUndefinedRef :-> issueErr ("parameter entity " ++ show pn ++ " not found (forward reference?)") , isInternalRef :-> issueErr ("a parameter entity reference of " ++ show pn ++ " occurs in the internal subset of the DTD") , isUnreadExternalRef :-> ( perform ( peVal -- load the external pe value >>> -- update the pe env getExternalParamEntityValue pn -- and try again >>> addPe pn ) >>> substPERef pn ) , this :-> substPE ] `when` isDTDPERef where peVal = constA pn >>> getPeValue isUnreadExternalRef = ( peVal >>> getDTDAttrValue a_url >>> isA (not . null) ) `guards` this isInternalRef = none -- isA (const (loc == Internal)) -- TODO: check this restriction, it seams rather meaningless isUndefinedRef = neg peVal substPE = replaceChildren (peVal >>> getChildren) -- store PE value in children component substPeRefsInEntityValue :: DTDStateArrow XmlTree XmlTree substPeRefsInEntityValue = ( ( replaceChildren ( xshow ( getChildren -- substitute char entites >>> -- and parameter references transfCharRef -- combine all pieces to a single string >>> -- as the new entity value substPeRefsInValue [] ) >>> mkText ) ) `whenNot` hasDTDAttr k_system -- only apply for internal entities ) `when` ( isDTDEntity <+> isDTDPEntity ) -- only apply for entity declarations substPeRefsInDTDpart :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInDTDpart rl = recursionCheck "DTD part" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInDTDdecl: before parseXmlDTDPart" >>> ( runInPeContext ( getChildren >>> ( (constA ("parameter entity: " ++ pn)) &&& this ) >>> parseXmlDTDPart >>> traceDTD "substPeRefsInDTDpart: after parseXmlDTDPart" >>> substParamEntity loc (pn : recl) ) `when` isDTDPERef ) substPeRefsInDTDdecl :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInDTDdecl rl = recursionCheck "DTD declaration" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInDTDdecl: before parseXmlDTDdeclPart" >>> ( runInPeContext ( parseXmlDTDdeclPart >>> traceDTD "substPeRefsInDTDdecl: after parseXmlDTDdeclPart" >>> processChildren ( substPeRefsInDTDdecl (pn : recl) ) ) `when` isDTDPERef ) substPeRefsInValue :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInValue rl = recursionCheck "entity value" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> parseXmlDTDEntityValue >>> -- transfCharRef this must be done somewhere else -- >>> substPeRefsInValue (pn : recl) substPeRefsInCondSect :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInCondSect rl = recursionCheck "conditional section" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInCondSect: parseXmlDTDdeclPart" >>> runInPeContext ( parseXmlDTDdeclPart >>> traceDTD "substPeRefsInCondSect: after parseXmlDTDdeclPart" >>> processChildren ( substPeRefsInCondSect (pn : recl) ) ) recursionCheck :: String -> RecList -> (RecList -> String -> DTDStateArrow XmlTree XmlTree) -> DTDStateArrow XmlTree XmlTree recursionCheck wher rl subst = ( recusiveSubst $< getDTDAttrValue a_peref ) `when` isDTDPERef where recusiveSubst name | name `elem` rl = issueErr ("recursive call of parameter entity " ++ show name ++ " in " ++ wher) | otherwise = subst rl name runInPeContext :: DTDStateArrow XmlTree XmlTree -> DTDStateArrow XmlTree XmlTree runInPeContext f = ( runWithNewBase $< getDTDAttrValue a_url ) `orElse` f where runWithNewBase base = runInLocalURIContext ( perform (constA base >>> setBaseURI) >>> f ) evalCondSect :: String -> DTDStateArrow XmlTree XmlTree evalCondSect content = traceDTD "evalCondSect: process conditional section" >>> processChildren (substPeRefsInCondSect []) >>> parseXmlDTDdecl >>> ( hasText (== k_include) `guards` ( ( constA "conditional section" &&& txt content ) >>> parseXmlDTDPart >>> traceMsg 2 "evalCond: include DTD part" >>> substParamEntity External recList ) ) predefDTDPart :: DTDStateArrow XmlTree XmlTree predefDTDPart = ( constA "predefined entities" &&& ( constA predefinedEntities >>> mkText) ) >>> parseXmlDTDPart where predefinedEntities :: String predefinedEntities = concat [ "" , "" , "" , "" , "" ] externalDTDPart :: DTDStateArrow XmlTree XmlTree externalDTDPart = isDTDDoctype `guards` ( hasDTDAttr k_system `guards` ( getExternalDTDPart $< getDTDAttrValue k_system ) ) getExternalDTDPart :: String -> DTDStateArrow XmlTree XmlTree getExternalDTDPart src = root [sattr a_source src] [] >>> getXmlEntityContents >>> replaceChildren ( ( constA src &&& getChildren ) >>> parseXmlDTDPart ) >>> traceDoc "processExternalDTD: parsing DTD part done" >>> getChildren getExternalParamEntityValue :: String -> DTDStateArrow XmlTree XmlTree getExternalParamEntityValue pn = isDTDPEntity `guards` ( setEntityValue $< ( listA ( getEntityValue $< getDTDAttrValue a_url ) ) ) where getEntityValue :: String -> DTDStateArrow XmlTree XmlTree getEntityValue url = root [sattr a_source url] [] >>> runInLocalURIContext getXmlEntityContents >>> traceMsg 2 ("getExternalParamEntityValue: contents read for " ++ show pn ++ " from " ++ show url) >>> getChildren setEntityValue :: XmlTrees -> DTDStateArrow XmlTree XmlTree setEntityValue res | null res = issueErr ("illegal external parameter entity value for entity %" ++ pn ++";") | otherwise = replaceChildren (constL res) >>> setDTDAttrValue a_url "" -- mark entity as read traceDTD :: String -> DTDStateArrow XmlTree XmlTree traceDTD msg = traceMsg 3 msg >>> traceTree -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/DocumentInput.hs0000644000000000000000000003424513205353551020020 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentInput Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document input -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentInput ( getXmlContents , getXmlEntityContents , getEncoding , getTextEncoding , decodeDocument , addInputError ) where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Data.List (isPrefixOf) import Data.String.Unicode (getDecodingFct, guessEncoding, normalizeNL) import System.FilePath (takeExtension) import qualified Text.XML.HXT.IO.GetFILE as FILE import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.ParserInterface (parseXmlDocEncodingSpec, parseXmlEntityEncodingSpec, removeEncodingSpec) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ---------------------------------------------------------- protocolHandlers :: AssocList String (IOStateArrow s XmlTree XmlTree) protocolHandlers = [ ("file", getFileContents) , ("http", getHttpContents) , ("https", getHttpContents) , ("stdin", getStdinContents) ] getProtocolHandler :: IOStateArrow s String (IOStateArrow s XmlTree XmlTree) getProtocolHandler = arr (\ s -> lookupDef getUnsupported s protocolHandlers) getUnsupported :: IOStateArrow s XmlTree XmlTree getUnsupported = perform ( getAttrValue a_source >>> arr (("unsupported protocol in URI " ++) . show) >>> applyA (arr issueFatal) ) >>> setDocumentStatusFromSystemState "accessing documents" getStringContents :: IOStateArrow s XmlTree XmlTree getStringContents = setCont $< getAttrValue a_source >>> addAttr transferMessage "OK" >>> addAttr transferStatus "200" where setCont contents = replaceChildren (txt contents') >>> addAttr transferURI (take 7 contents) -- the "string:" prefix is stored, this is required by setBaseURIFromDoc >>> addAttr a_source (show . prefix 48 $ contents') -- a quoted prefix of the content, max 48 chars is taken as source name where contents' = drop (length stringProtocol) contents prefix l s | length s' > l = take (l - 3) s' ++ "..." | otherwise = s' where s' = take (l + 1) s getFileContents :: IOStateArrow s XmlTree XmlTree getFileContents = applyA ( ( getSysVar theStrictInput &&& ( getAttrValue transferURI >>> getPathFromURI ) ) >>> traceValue 2 (\ (b, f) -> "read file " ++ show f ++ " (strict input = " ++ show b ++ ")") >>> arrIO (uncurry FILE.getCont) >>> ( arr (uncurry addInputError) -- io error occured ||| arr addTxtContent -- content read ) ) >>> addMimeType getStdinContents :: IOStateArrow s XmlTree XmlTree getStdinContents = applyA ( getSysVar theStrictInput >>> arrIO FILE.getStdinCont >>> ( arr (uncurry addInputError) -- io error occured ||| arr addTxtContent -- content read ) ) addInputError :: Attributes -> String -> IOStateArrow s XmlTree XmlTree addInputError al e = issueFatal e >>> seqA (map (uncurry addAttr) al) >>> setDocumentStatusFromSystemState "accessing documents" addMimeType :: IOStateArrow s XmlTree XmlTree addMimeType = addMime $< ( ( getSysVar theFileMimeType >>> isA (not . null) ) `orElse` ( getAttrValue transferURI >>> ( uriToMime $< getMimeTypeTable ) ) ) where addMime mt = addAttr transferMimeType mt uriToMime mtt = arr $ ( \ uri -> extensionToMimeType (drop 1 . takeExtension $ uri) mtt ) addTxtContent :: Blob -> IOStateArrow s XmlTree XmlTree addTxtContent bc = replaceChildren (blb bc) >>> addAttr transferMessage "OK" >>> addAttr transferStatus "200" getHttpContents :: IOStateArrow s XmlTree XmlTree getHttpContents = withoutUserState $ applyA $ getSysVar theHttpHandler getContentsFromString :: IOStateArrow s XmlTree XmlTree getContentsFromString = ( getAttrValue a_source >>> isA (isPrefixOf stringProtocol) ) `guards` getStringContents getContentsFromDoc :: IOStateArrow s XmlTree XmlTree getContentsFromDoc = ( ( addTransferURI $< getBaseURI >>> getCont ) `when` ( setAbsURI $< ( getAttrValue a_source >>^ ( \ src-> (if null src then "stdin:" else src) ) -- empty document name -> read from stdin ) ) ) >>> setDocumentStatusFromSystemState "getContentsFromDoc" where setAbsURI src = ifA ( constA src >>> changeBaseURI ) this ( issueFatal ("illegal URI : " ++ show src) ) addTransferURI uri = addAttr transferURI uri getCont = applyA ( getBaseURI -- compute the handler and call it >>> traceValue 2 (("getContentsFromDoc: reading " ++) . show) >>> getSchemeFromURI >>> getProtocolHandler ) `orElse` this -- don't change tree, when no handler can be found setBaseURIFromDoc :: IOStateArrow s XmlTree XmlTree setBaseURIFromDoc = perform ( getAttrValue transferURI >>> isA (isPrefixOf stringProtocol) -- do not change base URI when reading from a string >>> setBaseURI ) {- | Read the content of a document. This routine is usually called from 'Text.XML.HXT.Arrow.ProcessDocument.getDocumentContents'. The input must be a root node (constructed with 'Text.XML.HXT.Arrow.XmlArrow.root'), usually without children. The attribute list contains all input parameters, e.g. URI or source file name, encoding preferences, ... If the source name is empty, the input is read from standard input. The source is transformed into an absolute URI. If the source is a relative URI, or a file name, it is expanded into an absolute URI with respect to the current base URI. The default base URI is of protocol \"file\" and points to the current working directory. The currently supported protocols are \"http\", \"file\", \"stdin\" and \"string\". The latter two are internal protocols. An uri of the form \"stdin:\" stands for the content of the standard input stream. \"string:some text\" means, that \"some text\" is taken as input. This internal protocol is used for reading from normal 'String' values. -} getXmlContents :: IOStateArrow s XmlTree XmlTree getXmlContents = getXmlContents' parseXmlDocEncodingSpec >>> setBaseURIFromDoc getXmlEntityContents :: IOStateArrow s XmlTree XmlTree getXmlEntityContents = traceMsg 2 "getXmlEntityContents" >>> addAttr transferMimeType text_xml_external_parsed_entity -- the default transfer mimetype >>> getXmlContents' parseXmlEntityEncodingSpec >>> addAttr transferMimeType text_xml_external_parsed_entity >>> processChildren ( removeEncodingSpec >>> changeText normalizeNL -- newline normalization must be done here ) -- the following calls of the parsers don't do this >>> setBaseURIFromDoc >>> traceMsg 2 "getXmlEntityContents done" getXmlContents' :: IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree getXmlContents' parseEncodingSpec = ( getContentsFromString -- no decoding done for string: protocol `orElse` ( getContentsFromDoc >>> choiceA [ isXmlHtmlDoc :-> ( parseEncodingSpec >>> filterErrorMsg >>> decodeDocument ) , isTextDoc :-> decodeDocument , this :-> this ] >>> perform ( getAttrValue transferURI >>> traceValue 1 (("getXmlContents: content read and decoded for " ++) . show) ) >>> traceDoc "getXmlContents'" ) ) `when` isRoot isMimeDoc :: (String -> Bool) -> IOStateArrow s XmlTree XmlTree isMimeDoc isMT = fromLA $ ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> null t || isMT t) ) `guards` this isTextDoc, isXmlHtmlDoc :: IOStateArrow s XmlTree XmlTree isTextDoc = isMimeDoc isTextMimeType isXmlHtmlDoc = isMimeDoc (\ mt -> isHtmlMimeType mt || isXmlMimeType mt) -- ------------------------------------------------------------ getEncoding :: IOStateArrow s XmlTree String getEncoding = catA [ xshow getChildren -- 1. guess: guess encoding by looking at the first few bytes >>> arr guessEncoding , getAttrValue transferEncoding -- 2. guess: take the transfer encoding , getAttrValue a_encoding -- 3. guess: take encoding parameter in root node , getSysVar theInputEncoding -- 4. guess: take encoding parameter in global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses getTextEncoding :: IOStateArrow s XmlTree String getTextEncoding = catA [ getAttrValue transferEncoding -- 1. guess: take the transfer encoding , getAttrValue a_encoding -- 2. guess: take encoding parameter in root node , getSysVar theInputEncoding -- 3. guess: take encoding parameter in global state , constA isoLatin1 -- default : no encoding ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses decodeDocument :: IOStateArrow s XmlTree XmlTree decodeDocument = choiceA [ ( isRoot >>> isXmlHtmlDoc ) :-> ( decodeX $< getSysVar theExpat) , ( isRoot >>> isTextDoc ) :-> ( decodeArr $< getTextEncoding ) , this :-> this ] where decodeX :: Bool -> IOStateArrow s XmlTree XmlTree decodeX False = decodeArr $< getEncoding decodeX True = noDecode $< getEncoding -- parse with expat noDecode enc = traceMsg 2 ("no decoding (done by expat): encoding is " ++ show enc) >>> addAttr transferEncoding enc decodeArr :: String -> IOStateArrow s XmlTree XmlTree decodeArr enc = maybe notFound found . getDecodingFct $ enc where found df = traceMsg 2 ("decodeDocument: encoding is " ++ show enc) >>> ( decodeText df $< getSysVar theEncodingErrors ) >>> addAttr transferEncoding enc notFound = issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "decoding document" {- just for performance test decodeText _ _ = this -} decodeText df withEncErrors = processChildren ( getText -- get the document content -- the following 3 lines -- don't seem to raise the space problem in decodeText -- space is allocated in blobToString and in parsec >>> arr df -- decode the text, result is (string, [errMsg]) >>> ( ( fst ^>> mkText ) -- take decoded string and build text node <+> ( if withEncErrors then ( arrL snd -- take the error messages >>> arr ((enc ++) . (" encoding error" ++)) -- prefix with enc error >>> applyA (arr issueErr) -- build issueErr arrow and apply >>> none -- neccessary for type match with <+> ) else none ) ) ) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/DocumentOutput.hs0000644000000000000000000002350312474566610020225 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentOutput Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document output -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , putXmlTree , putXmlSource , encodeDocument , encodeDocument' ) where import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import qualified Data.ByteString.Lazy as BS import Data.Maybe import Data.String.Unicode (getOutputEncodingFct') import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XS import Text.XML.HXT.Arrow.Edit (addHeadlineToXmlDoc, addXmlPi, addXmlPiEncoding, escapeHtmlRefs, escapeXmlRefs, indentDoc, numberLinesInXmlDoc, treeRepOfXmlDoc) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import System.IO (Handle, IOMode (..), hClose, hPutStrLn, hSetBinaryMode, openBinaryFile, openFile, stdout) -- ------------------------------------------------------------ -- -- | Write the contents of a document tree into an output stream (file or stdout). -- -- If textMode is set, writing is done with Haskell string output, else (default) -- writing is done with lazy ByteString output putXmlDocument :: Bool -> String -> IOStateArrow s XmlTree XmlTree putXmlDocument textMode dst = perform putDoc where putDoc = ( if textMode then ( xshow getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> hPutStrLn h s))) ) else ( xshowBlob getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> do BS.hPutStr h s BS.hPutStr h (stringToBlob "\n") ) ) ) ) ) >>> ( ( traceMsg 1 ("io error, document not written to " ++ outFile) >>> arr show >>> mkError c_fatal >>> filterErrorMsg ) ||| ( traceMsg 2 ("document written to " ++ outFile ++ ", textMode = " ++ show textMode) >>> none ) ) where isStdout = null dst || dst == "-" outFile = if isStdout then "stdout" else show dst hPutDocument :: (Handle -> IO ()) -> IO () hPutDocument action | isStdout = do hSetBinaryMode stdout (not textMode) action stdout hSetBinaryMode stdout False | otherwise = do handle <- ( if textMode then openFile else openBinaryFile ) dst WriteMode action handle hClose handle -- | -- write the tree representation of a document to a file putXmlTree :: String -> IOStateArrow s XmlTree XmlTree putXmlTree dst = perform ( treeRepOfXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- | -- write a document with indentaion and line numers putXmlSource :: String -> IOStateArrow s XmlTree XmlTree putXmlSource dst = perform ( (this ) `whenNot` isRoot >>> indentDoc >>> numberLinesInXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- ------------------------------------------------------------ getEncodingParam :: IOStateArrow s XmlTree String getEncodingParam = catA [ getSysVar theOutputEncoding -- 4. guess: take output encoding parameter from global state , getSysVar theInputEncoding -- 5. guess: take encoding parameter from global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) getOutputEncoding :: String -> IOStateArrow s XmlTree String getOutputEncoding defaultEnc = getEC $< getEncodingParam where getEC enc' = fromLA $ getOutputEncoding' defaultEnc enc' encodeDocument :: Bool -> Bool -> String -> IOStateArrow s XmlTree XmlTree encodeDocument quoteXml supressXmlPi defaultEnc = encode $< getOutputEncoding defaultEnc where encode enc = traceMsg 2 ("encodeDocument: encoding is " ++ show enc) >>> ( encodeDocument' quoteXml supressXmlPi enc `orElse` ( issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "encoding document" ) ) -- ------------------------------------------------------------ isBinaryDoc :: LA XmlTree XmlTree isBinaryDoc = ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> not (null t || isTextMimeType t || isXmlMimeType t)) ) `guards` this getOutputEncoding' :: String -> String -> LA XmlTree String getOutputEncoding' defaultEnc defaultEnc2 = catA [ isBinaryDoc >>> -- 0. guess: binary data found: no encoding at all constA isoLatin1 -- the content should usually be a blob -- this handling is like the decoding in DocumentInput, -- there nothing is decoded for non text or non xml contents , getChildren -- 1. guess: evaluate >>> ( ( isPi >>> hasName t_xml ) `guards` getAttrValue a_encoding ) , constA defaultEnc -- 2. guess: explicit parameter, may be "" , getAttrValue a_output_encoding -- 3. guess: take output encoding parameter in root node , constA defaultEnc2 -- default : UNICODE or utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses encodeDocument' :: ArrowXml a => Bool -> Bool -> String -> a XmlTree XmlTree encodeDocument' quoteXml supressXmlPi defaultEnc = fromLA (encode $< getOutputEncoding' defaultEnc utf8) where encode :: String -> LA XmlTree XmlTree encode encodingScheme | encodingScheme == unicodeString = replaceChildren ( (getChildren >. XS.xshow'' cQuot aQuot) >>> mkText ) | isNothing encodeFct = none | otherwise = ( if supressXmlPi then processChildren (none `when` isXmlPi) else ( addXmlPi >>> addXmlPiEncoding encodingScheme ) ) >>> ( isLatin1Blob `orElse` encodeDoc (fromJust encodeFct) ) >>> addAttr a_output_encoding encodingScheme where (cQuot, aQuot) | quoteXml = escapeXmlRefs | otherwise = escapeHtmlRefs encodeFct = getOutputEncodingFct' encodingScheme encodeDoc ef = replaceChildren ( xshowBlobWithEnc cQuot aQuot ef getChildren >>> mkBlob ) xshowBlobWithEnc cenc aenc enc f = f >. XS.xshow' cenc aenc enc -- if encoding scheme is isolatin1 and the contents is a single blob (bytestring) -- the encoding is the identity. -- This optimization enables processing (copying) of none XML contents -- without any conversions from and to strings isLatin1Blob | encodingScheme /= isoLatin1 = none | otherwise = childIsSingleBlob `guards` this where childIsSingleBlob = listA getChildren >>> isA (length >>> (== 1)) >>> unlistA >>> isBlob -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Edit.hs0000644000000000000000000006120412474566610016113 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Edit Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable common edit arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes , canonicalizeForXPath , canonicalizeContents , collapseAllXText , collapseXText , xshowEscapeXml , escapeXmlRefs , escapeHtmlRefs , haskellRepOfXmlDoc , treeRepOfXmlDoc , addHeadlineToXmlDoc , indentDoc , numberLinesInXmlDoc , preventEmptyElements , removeComment , removeAllComment , removeWhiteSpace , removeAllWhiteSpace , removeDocWhiteSpace , transfCdata , transfAllCdata , transfCharRef , transfAllCharRef , substAllXHTMLEntityRefs , substXHTMLEntityRef , rememberDTDAttrl , addDefaultDTDecl , hasXmlPi , addXmlPi , addXmlPiEncoding , addDoctypeDecl , addXHtmlDoctypeStrict , addXHtmlDoctypeTransitional , addXHtmlDoctypeFrameset ) where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Control.Arrow.NTreeEdit import Data.Char.Properties.XMLCharProps (isXmlSpaceChar) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.DOM.FormatXmlTree (formatXmlTree) import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XS import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.Parser.HtmlParsec (emptyHtmlTags) import Text.XML.HXT.Parser.XhtmlEntities (xhtmlEntities) import Text.XML.HXT.Parser.XmlEntities (xmlEntities) import Data.List (isPrefixOf) import qualified Data.Map as M import Data.Maybe -- ------------------------------------------------------------ -- | -- Applies some "Canonical XML" rules to a document tree. -- -- The rules differ slightly for canonical XML and XPath in handling of comments -- -- Note: This is not the whole canonicalization as it is specified by the W3C -- Recommendation. Adding attribute defaults or sorting attributes in lexicographic -- order is done by the @transform@ function of module @Text.XML.HXT.Validator.Validation@. -- Replacing entities or line feed normalization is done by the parser. -- -- -- Not implemented yet: -- -- - Whitespace within start and end tags is normalized -- -- - Special characters in attribute values and character content are replaced by character references -- -- see 'canonicalizeAllNodes' and 'canonicalizeForXPath' canonicalizeTree' :: LA XmlTree XmlTree -> LA XmlTree XmlTree canonicalizeTree' toBeRemoved = ( processChildren ( (none `when` (isText <+> isXmlPi)) -- remove XML PI and all text around XML root element >>> (deep isPi `when` isDTD) -- remove DTD parts, except PIs whithin DTD ) `when` isRoot ) >>> canonicalizeNodes toBeRemoved canonicalizeNodes :: LA XmlTree XmlTree -> LA XmlTree XmlTree canonicalizeNodes toBeRemoved = editNTreeA $ [ toBeRemoved :-> none , ( isElem >>> getAttrl >>> getChildren >>> isCharRef ) -- canonicalize attribute list :-> ( processAttrl ( processChildren transfCharRef >>> collapseXText' -- combine text in attribute values ) >>> ( collapseXText' -- and combine text in content `when` (getChildren >>. has2XText) ) ) , ( isElem >>> (getChildren >>. has2XText) ) :-> collapseXText' -- combine text in content , isCharRef :-> ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText ) , isCdata :-> ( getCdata >>> mkText ) ] -- | -- Applies some "Canonical XML" rules to a document tree. -- -- The rule differ slightly for canonical XML and XPath in handling of comments -- -- Note: This is not the whole canonicalization as it is specified by the W3C -- Recommendation. Adding attribute defaults or sorting attributes in lexicographic -- order is done by the @transform@ function of module @Text.XML.HXT.Validator.Validation@. -- Replacing entities or line feed normalization is done by the parser. -- -- Rules: remove DTD parts, processing instructions, comments and substitute char refs in attribute -- values and text -- -- Not implemented yet: -- -- - Whitespace within start and end tags is normalized -- -- - Special characters in attribute values and character content are replaced by character references canonicalizeAllNodes :: ArrowList a => a XmlTree XmlTree canonicalizeAllNodes = fromLA $ canonicalizeTree' isCmt -- remove comment {-# INLINE canonicalizeAllNodes #-} -- | -- Canonicalize a tree for XPath -- Like 'canonicalizeAllNodes' but comment nodes are not removed -- -- see 'canonicalizeAllNodes' canonicalizeForXPath :: ArrowList a => a XmlTree XmlTree canonicalizeForXPath = fromLA $ canonicalizeTree' none -- comment remains there {-# INLINE canonicalizeForXPath #-} -- | -- Canonicalize the contents of a document -- -- substitutes all char refs in text and attribute values, -- removes CDATA section and combines all sequences of resulting text -- nodes into a single text node -- -- see 'canonicalizeAllNodes' canonicalizeContents :: ArrowList a => a XmlTree XmlTree canonicalizeContents = fromLA $ canonicalizeNodes none {-# INLINE canonicalizeContents #-} -- ------------------------------------------------------------ has2XText :: XmlTrees -> XmlTrees has2XText ts0@(t1 : ts1@(t2 : ts2)) | XN.isText t1 = if XN.isText t2 then ts0 else has2XText ts2 | otherwise = has2XText ts1 has2XText _ = [] collapseXText' :: LA XmlTree XmlTree collapseXText' = replaceChildren ( listA getChildren >>> arrL (foldr mergeText' []) ) where mergeText' :: XmlTree -> XmlTrees -> XmlTrees mergeText' t1 (t2 : ts2) | XN.isText t1 && XN.isText t2 = let s1 = fromJust . XN.getText $ t1 s2 = fromJust . XN.getText $ t2 t = XN.mkText (s1 ++ s2) in t : ts2 mergeText' t1 ts = t1 : ts -- | -- Collects sequences of text nodes in the list of children of a node into one single text node. -- This is useful, e.g. after char and entity reference substitution collapseXText :: ArrowList a => a XmlTree XmlTree collapseXText = fromLA collapseXText' -- | -- Applies collapseXText recursively. -- -- -- see also : 'collapseXText' collapseAllXText :: ArrowList a => a XmlTree XmlTree collapseAllXText = fromLA $ processBottomUp collapseXText' -- ------------------------------------------------------------ -- | apply an arrow to the input and convert the resulting XML trees into an XML escaped string -- -- This is a save variant for converting a tree into an XML string representation -- that is parsable with 'Text.XML.HXT.Arrow.ReadDocument'. -- It is implemented with 'Text.XML.HXT.Arrow.XmlArrow.xshow', -- but xshow does no XML escaping. The XML escaping is done with -- 'Text.XML.HXT.Arrow.Edit.escapeXmlDoc' before xshow is applied. -- -- So the following law holds -- -- > xshowEscapeXml f >>> xread == f xshowEscapeXml :: ArrowXml a => a n XmlTree -> a n String xshowEscapeXml f = f >. (uncurry XS.xshow'' escapeXmlRefs) -- ------------------------------------------------------------ -- | -- escape XmlText, -- transform all special XML chars into char- or entity- refs type EntityRefTable = M.Map Int String xmlEntityRefTable , xhtmlEntityRefTable :: EntityRefTable xmlEntityRefTable = buildEntityRefTable $ xmlEntities xhtmlEntityRefTable = buildEntityRefTable $ xhtmlEntities buildEntityRefTable :: [(String, Int)] -> EntityRefTable buildEntityRefTable = M.fromList . map (\ (x,y) -> (y,x) ) type EntitySubstTable = M.Map String String xhtmlEntitySubstTable :: EntitySubstTable xhtmlEntitySubstTable = M.fromList . map (second $ (:[]) . toEnum) $ xhtmlEntities -- ------------------------------------------------------------ substXHTMLEntityRef :: LA XmlTree XmlTree substXHTMLEntityRef = ( getEntityRef >>> arrL subst >>> mkText ) `orElse` this where subst name = maybe [] (:[]) $ M.lookup name xhtmlEntitySubstTable substAllXHTMLEntityRefs :: ArrowXml a => a XmlTree XmlTree substAllXHTMLEntityRefs = fromLA $ processBottomUp substXHTMLEntityRef -- ------------------------------------------------------------ escapeXmlRefs :: (Char -> String -> String, Char -> String -> String) escapeXmlRefs = (cquote, aquote) where cquote c | c `elem` "<&" = ('&' :) . ((lookupRef c xmlEntityRefTable) ++) . (';' :) | otherwise = (c :) aquote c | c `elem` "<>\"\'&\n\r\t" = ('&' :) . ((lookupRef c xmlEntityRefTable) ++) . (';' :) | otherwise = (c :) escapeHtmlRefs :: (Char -> String -> String, Char -> String -> String) escapeHtmlRefs = (cquote, aquote) where cquote c | isHtmlTextEsc c = ('&' :) . ((lookupRef c xhtmlEntityRefTable) ++) . (';' :) | otherwise = (c :) aquote c | isHtmlAttrEsc c = ('&' :) . ((lookupRef c xhtmlEntityRefTable) ++) . (';' :) | otherwise = (c :) isHtmlTextEsc c = c >= toEnum(128) || ( c `elem` "<&" ) isHtmlAttrEsc c = c >= toEnum(128) || ( c `elem` "<>\"\'&\n\r\t" ) lookupRef :: Char -> EntityRefTable -> String lookupRef c = fromMaybe ('#' : show (fromEnum c)) . M.lookup (fromEnum c) {-# INLINE lookupRef #-} -- ------------------------------------------------------------ preventEmptyElements :: ArrowList a => [String] -> Bool -> a XmlTree XmlTree preventEmptyElements ns isHtml = fromLA $ editNTreeA [ ( isElem >>> isNoneEmpty >>> neg getChildren ) :-> replaceChildren (txt "") ] where isNoneEmpty | not (null ns) = hasNameWith (localPart >>> (`elem` ns)) | isHtml = hasNameWith (localPart >>> (`notElem` emptyHtmlTags)) | otherwise = this -- ------------------------------------------------------------ -- | -- convert a document into a Haskell representation (with show). -- -- Useful for debugging and trace output. -- see also : 'treeRepOfXmlDoc', 'numberLinesInXmlDoc' haskellRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree haskellRepOfXmlDoc = fromLA $ root [getAttrl] [show ^>> mkText] -- | -- convert a document into a text and add line numbers to the text representation. -- -- Result is a root node with a single text node as child. -- Useful for debugging and trace output. -- see also : 'haskellRepOfXmlDoc', 'treeRepOfXmlDoc' numberLinesInXmlDoc :: ArrowList a => a XmlTree XmlTree numberLinesInXmlDoc = fromLA $ processChildren (changeText numberLines) where numberLines :: String -> String numberLines str = concat $ zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str) where lineNr :: Int -> String lineNr n = (reverse (take 6 (reverse (show n) ++ replicate 6 ' '))) ++ " " -- | -- convert a document into a text representation in tree form. -- -- Useful for debugging and trace output. -- see also : 'haskellRepOfXmlDoc', 'numberLinesInXmlDoc' treeRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree treeRepOfXmlDoc = fromLA $ root [getAttrl] [formatXmlTree ^>> mkText] addHeadlineToXmlDoc :: ArrowXml a => a XmlTree XmlTree addHeadlineToXmlDoc = fromLA $ ( addTitle $< (getAttrValue a_source >>^ formatTitle) ) where addTitle str = replaceChildren ( txt str <+> getChildren <+> txt "\n" ) formatTitle str = "\n" ++ headline ++ "\n" ++ underline ++ "\n\n" where headline = "content of: " ++ str underline = map (const '=') headline -- ------------------------------------------------------------ -- | -- remove a Comment node removeComment :: ArrowXml a => a XmlTree XmlTree removeComment = none `when` isCmt -- | -- remove all comments in a tree recursively removeAllComment :: ArrowXml a => a XmlTree XmlTree removeAllComment = fromLA $ editNTreeA [isCmt :-> none] -- ------------------------------------------------------------ -- | -- simple filter for removing whitespace. -- -- no check on sigificant whitespace, e.g. in HTML \-elements, is done. -- -- -- see also : 'removeAllWhiteSpace', 'removeDocWhiteSpace' removeWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeWhiteSpace = fromLA $ none `when` isWhiteSpace -- | -- simple recursive filter for removing all whitespace. -- -- removes all text nodes in a tree that consist only of whitespace. -- -- -- see also : 'removeWhiteSpace', 'removeDocWhiteSpace' removeAllWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeAllWhiteSpace = fromLA $ editNTreeA [isWhiteSpace :-> none] -- fromLA $ processBottomUp removeWhiteSpace' -- less efficient -- ------------------------------------------------------------ -- | -- filter for removing all not significant whitespace. -- -- the tree traversed for removing whitespace between elements, -- that was inserted for indentation and readability. -- whitespace is only removed at places, where it's not significat -- preserving whitespace may be controlled in a document tree -- by a tag attribute @xml:space@ -- -- allowed values for this attribute are @default | preserve@ -- -- input is root node of the document to be cleaned up, -- output the semantically equivalent simplified tree -- -- -- see also : 'indentDoc', 'removeAllWhiteSpace' removeDocWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeDocWhiteSpace = fromLA $ removeRootWhiteSpace removeRootWhiteSpace :: LA XmlTree XmlTree removeRootWhiteSpace = processChildren processRootElement `when` isRoot where processRootElement :: LA XmlTree XmlTree processRootElement = removeWhiteSpace >>> processChild where processChild = choiceA [ isDTD :-> removeAllWhiteSpace -- whitespace in DTD is redundant , this :-> replaceChildren ( getChildren >>. indentTrees insertNothing False 1 ) ] -- ------------------------------------------------------------ -- | -- filter for indenting a document tree for pretty printing. -- -- the tree is traversed for inserting whitespace for tag indentation. -- -- whitespace is only inserted or changed at places, where it isn't significant, -- is's not inserted between tags and text containing non whitespace chars. -- -- whitespace is only inserted or changed at places, where it's not significant. -- preserving whitespace may be controlled in a document tree -- by a tag attribute @xml:space@ -- -- allowed values for this attribute are @default | preserve@. -- -- input is a complete document tree or a document fragment -- result is the semantically equivalent formatted tree. -- -- -- see also : 'removeDocWhiteSpace' indentDoc :: ArrowXml a => a XmlTree XmlTree indentDoc = fromLA $ ( ( isRoot `guards` indentRoot ) `orElse` (root [] [this] >>> indentRoot >>> getChildren) ) -- ------------------------------------------------------------ indentRoot :: LA XmlTree XmlTree indentRoot = processChildren indentRootChildren where indentRootChildren = removeText >>> indentChild >>> insertNL where removeText = none `when` isText insertNL = this <+> txt "\n" indentChild = ( replaceChildren ( getChildren >>. indentTrees (insertIndentation 2) False 1 ) `whenNot` isDTD ) -- ------------------------------------------------------------ -- -- copied from EditFilter and rewritten for arrows -- to remove dependency to the filter module indentTrees :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees indentTrees _ _ _ [] = [] indentTrees indentFilter preserveSpace level ts = runLAs lsf ls ++ indentRest rs where runLAs f l = runLA (constL l >>> f) undefined (ls, rs) = break XN.isElem ts isSignificant :: Bool isSignificant = preserveSpace || (not . null . runLAs isSignificantPart) ls isSignificantPart :: LA XmlTree XmlTree isSignificantPart = catA [ isText `guards` neg isWhiteSpace , isCdata , isCharRef , isEntityRef ] lsf :: LA XmlTree XmlTree lsf | isSignificant = this | otherwise = (none `when` isWhiteSpace) >>> (indentFilter level <+> this) indentRest :: XmlTrees -> XmlTrees indentRest [] | isSignificant = [] | otherwise = runLA (indentFilter (level - 1)) undefined indentRest (t':ts') = runLA ( ( indentElem >>> lsf ) `when` isElem ) t' ++ ( if null ts' then indentRest else indentTrees indentFilter preserveSpace level ) ts' where indentElem = replaceChildren ( getChildren >>. indentChildren ) xmlSpaceAttrValue :: String xmlSpaceAttrValue = concat . runLA (getAttrValue "xml:space") $ t' preserveSpace' :: Bool preserveSpace' = ( fromMaybe preserveSpace . lookup xmlSpaceAttrValue ) [ ("preserve", True) , ("default", False) ] indentChildren :: XmlTrees -> XmlTrees indentChildren cs' | all (maybe False (all isXmlSpaceChar) . XN.getText) cs' = [] | otherwise = indentTrees indentFilter preserveSpace' (level + 1) cs' -- filter for indenting elements insertIndentation :: Int -> Int -> LA a XmlTree insertIndentation indentWidth level = txt ('\n' : replicate (level * indentWidth) ' ') -- filter for removing all whitespace insertNothing :: Int -> LA a XmlTree insertNothing _ = none -- ------------------------------------------------------------ -- | -- converts a CDATA section into normal text nodes transfCdata :: ArrowXml a => a XmlTree XmlTree transfCdata = fromLA $ (getCdata >>> mkText) `when` isCdata -- | -- converts CDATA sections in whole document tree into normal text nodes transfAllCdata :: ArrowXml a => a XmlTree XmlTree transfAllCdata = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)] -- | -- converts a character reference to normal text transfCharRef :: ArrowXml a => a XmlTree XmlTree transfCharRef = fromLA $ ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText ) `when` isCharRef -- | -- recursively converts all character references to normal text transfAllCharRef :: ArrowXml a => a XmlTree XmlTree transfAllCharRef = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)] -- ------------------------------------------------------------ rememberDTDAttrl :: ArrowList a => a XmlTree XmlTree rememberDTDAttrl = fromLA $ ( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) ) `orElse` this ) where addDTDAttrl al = seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al addDefaultDTDecl :: ArrowList a => a XmlTree XmlTree addDefaultDTDecl = fromLA $ ( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) ) where hasDtdPrefix = isA (fst >>> (dtdPrefix `isPrefixOf`)) >>> arr (first (drop (length dtdPrefix))) addDTD [] = this addDTD al = replaceChildren ( mkDTDDoctype al none <+> txt "\n" <+> ( getChildren >>> (none `when` isDTDDoctype) ) -- remove old DTD decl ) -- ------------------------------------------------------------ hasXmlPi :: ArrowXml a => a XmlTree XmlTree hasXmlPi = fromLA ( getChildren >>> isPi >>> hasName t_xml ) -- | add an \ processing instruction -- if it's not already there addXmlPi :: ArrowXml a => a XmlTree XmlTree addXmlPi = fromLA ( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none >>> addAttr a_version "1.0" ) <+> txt "\n" ) `whenNot` hasXmlPi ) -- | add an encoding spec to the \ processing instruction addXmlPiEncoding :: ArrowXml a => String -> a XmlTree XmlTree addXmlPiEncoding enc = fromLA $ processChildren ( addAttr a_encoding enc `when` ( isPi >>> hasName t_xml ) ) -- | add an XHTML strict doctype declaration to a document addXHtmlDoctypeStrict , addXHtmlDoctypeTransitional , addXHtmlDoctypeFrameset :: ArrowXml a => a XmlTree XmlTree -- | add an XHTML strict doctype declaration to a document addXHtmlDoctypeStrict = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" -- | add an XHTML transitional doctype declaration to a document addXHtmlDoctypeTransitional = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" -- | add an XHTML frameset doctype declaration to a document addXHtmlDoctypeFrameset = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" -- | add a doctype declaration to a document -- -- The arguments are the root element name, the PUBLIC id and the SYSTEM id addDoctypeDecl :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree addDoctypeDecl rootElem public system = fromLA $ replaceChildren ( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) ) . ( if null system then id else ( (k_system, system) : ) ) $ [ (a_name, rootElem) ] ) none <+> txt "\n" <+> getChildren ) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/GeneralEntitySubstitution.hs0000644000000000000000000003154712474566610022444 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.GeneralEntitySubstitution Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable general entity substitution -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.GeneralEntitySubstitution ( processGeneralEntities ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlEntityValueAsAttrValue , parseXmlEntityValueAsContent ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ data GEContext = ReferenceInContent | ReferenceInAttributeValue | ReferenceInEntityValue -- or OccursInAttributeValue -- not used during substitution but during validation -- or ReferenceInDTD -- not used: syntax check detects errors type GESubstArrow = GEContext -> RecList -> GEArrow XmlTree XmlTree type GEArrow b c = IOStateArrow GEEnv b c type RecList = [String] -- ------------------------------------------------------------ newtype GEEnv = GEEnv (M.Map String GESubstArrow) emptyGeEnv :: GEEnv emptyGeEnv = GEEnv M.empty lookupGeEnv :: String -> GEEnv -> Maybe GESubstArrow lookupGeEnv k (GEEnv env) = M.lookup k env addGeEntry :: String -> GESubstArrow -> GEEnv -> GEEnv addGeEntry k a (GEEnv env) = GEEnv $ M.insert k a env -- ------------------------------------------------------------ -- | -- substitution of general entities -- -- input: a complete document tree including root node processGeneralEntities :: IOStateArrow s XmlTree XmlTree processGeneralEntities = ( traceMsg 1 "processGeneralEntities: collect and substitute general entities" >>> withOtherUserState emptyGeEnv (processChildren (processGeneralEntity ReferenceInContent [])) >>> setDocumentStatusFromSystemState "in general entity processing" >>> traceTree >>> traceSource ) `when` documentStatusOk processGeneralEntity :: GESubstArrow processGeneralEntity context recl = choiceA [ isElem :-> ( processAttrl (processChildren substEntitiesInAttrValue) >>> processChildren (processGeneralEntity context recl) ) , isEntityRef :-> substEntityRef , isDTDDoctype :-> processChildren (processGeneralEntity context recl) , isDTDEntity :-> addEntityDecl , isDTDAttlist :-> substEntitiesInAttrDefaultValue , this :-> this ] where addEntityDecl :: GEArrow XmlTree XmlTree addEntityDecl = perform ( choiceA [ isIntern :-> addInternalEntity -- don't change sequence of cases , isExtern :-> addExternalEntity , isUnparsed :-> addUnparsedEntity ] ) where isIntern = none `when` hasDTDAttr k_system isExtern = none `when` hasDTDAttr k_ndata isUnparsed = this addInternalEntity :: GEArrow XmlTree b addInternalEntity = insertInternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: general entity definition for " ++) . show) ) &&& xshow (getChildren >>> isText) ) where insertInternal entity contents = insertEntity (substInternal contents) entity >>> none addExternalEntity :: GEArrow XmlTree b addExternalEntity = insertExternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: external entity definition for " ++) . show) ) &&& getDTDAttrValue a_url -- the absolute URL, not the relative in attr: k_system ) where insertExternal entity uri = insertEntity (substExternalParsed1Time uri) entity >>> none addUnparsedEntity :: GEArrow XmlTree b addUnparsedEntity = getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: unparsed entity definition for " ++) . show) >>> applyA (arr (insertEntity substUnparsed)) >>> none insertEntity :: (String -> GESubstArrow) -> String -> GEArrow b b insertEntity fct entity = ( getUserState >>> applyA (arr checkDefined) ) `guards` addEntity fct entity where checkDefined geEnv = maybe ok alreadyDefined . lookupGeEnv entity $ geEnv where ok = this alreadyDefined _ = issueWarn ("entity " ++ show entity ++ " already defined, repeated definition ignored") >>> none addEntity :: (String -> GESubstArrow) -> String -> GEArrow b b addEntity fct entity = changeUserState ins where ins _ geEnv = addGeEntry entity (fct entity) geEnv substEntitiesInAttrDefaultValue :: GEArrow XmlTree XmlTree substEntitiesInAttrDefaultValue = applyA ( xshow ( getDTDAttrValue a_default -- parse the default value >>> -- substitute entities mkText -- and convert value into a string >>> parseXmlEntityValueAsAttrValue "default value of attribute" >>> filterErrorMsg >>> substEntitiesInAttrValue ) >>> arr (setDTDAttrValue a_default) ) `when` hasDTDAttr a_default substEntitiesInAttrValue :: GEArrow XmlTree XmlTree substEntitiesInAttrValue = ( processGeneralEntity ReferenceInAttributeValue recl `when` isEntityRef ) >>> changeText normalizeWhiteSpace >>> transfCharRef where normalizeWhiteSpace = map ( \c -> if c `elem` "\n\t\r" then ' ' else c ) substEntityRef :: GEArrow XmlTree XmlTree substEntityRef = applyA ( ( ( getEntityRef -- get the entity name and the env >>> -- and compute the arrow to be applied traceValue 2 (("processGeneralEntity: entity reference for entity " ++) . show) >>> traceMsg 3 ("recursion list = " ++ show recl) ) &&& getUserState ) >>> arr2 substA ) where substA :: String -> GEEnv -> GEArrow XmlTree XmlTree substA entity geEnv = maybe entityNotFound entityFound . lookupGeEnv entity $ geEnv where errMsg msg = issueErr msg entityNotFound = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, no definition found, (forward reference?)") entityFound fct | entity `elem` recl = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, cyclic definition") | otherwise = fct context recl substExternalParsed1Time :: String -> String -> GESubstArrow substExternalParsed1Time uri entity cx rl = perform ( traceMsg 2 ("substExternalParsed1Time: read and parse external parsed entity " ++ show entity) >>> runInLocalURIContext ( root [sattr a_source uri] [] -- uri must be an absolute uri >>> -- abs uri is computed during parameter entity handling getXmlEntityContents >>> processExternalEntityContents ) >>> applyA ( arr $ \ s -> addEntity (substExternalParsed s) entity ) ) >>> processGeneralEntity cx rl where processExternalEntityContents :: IOStateArrow s XmlTree String processExternalEntityContents = ( ( ( documentStatusOk -- reading entity succeeded >>> -- with content stored in a text node (getChildren >>> isText) ) `guards` this ) `orElse` issueErr ("illegal value for external parsed entity " ++ show entity) ) >>> xshow (getChildren >>> isText) substExternalParsed :: String -> String -> GESubstArrow substExternalParsed s entity ReferenceInContent rl = includedIfValidating s rl entity substExternalParsed _ entity ReferenceInAttributeValue _ = forbidden entity "external parsed general" "in attribute value" substExternalParsed _ _ ReferenceInEntityValue _ = bypassed substInternal :: String -> String -> GESubstArrow substInternal s entity ReferenceInContent rl = included s rl entity substInternal s entity ReferenceInAttributeValue rl = includedInLiteral s rl entity substInternal _ _ ReferenceInEntityValue _ = bypassed substUnparsed :: String -> GESubstArrow substUnparsed entity ReferenceInContent _ = forbidden entity "unparsed" "content" substUnparsed entity ReferenceInAttributeValue _ = forbidden entity "unparsed" "attribute value" substUnparsed entity ReferenceInEntityValue _ = forbidden entity "unparsed" "entity value" -- XML 1.0 chapter 4.4.2 included :: String -> RecList -> String -> GEArrow XmlTree XmlTree included s rl entity = traceMsg 3 ("substituting general entity " ++ show entity ++ " with value " ++ show s) >>> txt s >>> parseXmlEntityValueAsContent ("substituting general entity " ++ show entity ++ " in contents") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.3 includedIfValidating :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedIfValidating = included -- XML 1.0 chapter 4.4.4 forbidden :: String -> String -> String -> GEArrow XmlTree XmlTree forbidden entity msg cx = issueErr ("reference of " ++ msg ++ show entity ++ " forbidden in " ++ cx) -- XML 1.0 chapter 4.4.5 includedInLiteral :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedInLiteral s rl entity = txt s >>> parseXmlEntityValueAsAttrValue ("substituting general entity " ++ show entity ++ " in attribute value") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.7 bypassed :: GEArrow XmlTree XmlTree bypassed = this -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Namespace.hs0000644000000000000000000004052512474566610017125 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Namespace Copyright : Copyright (C) 2005-2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable namespace specific arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Namespace ( attachNsEnv , cleanupNamespaces , collectNamespaceDecl , collectPrefixUriPairs , isNamespaceDeclAttr , getNamespaceDecl , processWithNsEnv , processWithNsEnvWithoutAttrl , propagateNamespaces , uniqueNamespaces , uniqueNamespacesFromDeclAndQNames , validateNamespaces ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Data.Maybe ( isNothing , fromJust ) import Data.List ( nub ) -- ------------------------------------------------------------ -- | test whether an attribute node contains an XML Namespace declaration isNamespaceDeclAttr :: ArrowXml a => a XmlTree XmlTree isNamespaceDeclAttr = fromLA $ (getAttrName >>> isA isNameSpaceName) `guards` this {-# INLINE isNamespaceDeclAttr #-} -- | get the namespace prefix and the namespace URI out of -- an attribute tree with a namespace declaration (see 'isNamespaceDeclAttr') -- for all other nodes this arrow fails getNamespaceDecl :: ArrowXml a => a XmlTree (String, String) getNamespaceDecl = fromLA $ isNamespaceDeclAttr >>> ( ( getAttrName >>> arr getNsPrefix ) &&& xshow getChildren ) where getNsPrefix = drop 6 . qualifiedName -- drop "xmlns:" -- ------------------------------------------------------------ -- | collect all namespace declarations contained in a document -- -- apply 'getNamespaceDecl' to a whole XmlTree collectNamespaceDecl :: LA XmlTree (String, String) collectNamespaceDecl = multi getAttrl >>> getNamespaceDecl -- | collect all (namePrefix, namespaceUri) pairs from a tree -- -- all qualified names are inspected, whether a namespace uri is defined, -- for these uris the prefix and uri is returned. This arrow is useful for -- namespace cleanup, e.g. for documents generated with XSLT. It can be used -- together with 'collectNamespaceDecl' to 'cleanupNamespaces' collectPrefixUriPairs :: LA XmlTree (String, String) collectPrefixUriPairs = multi (isElem <+> getAttrl <+> isPi) >>> getQName >>> arrL getPrefixUri where getPrefixUri :: QName -> [(String, String)] getPrefixUri n | null uri = [] | px == a_xmlns || px == a_xml = [] -- these ones are reserved an predefined | otherwise = [(namePrefix n, uri)] where uri = namespaceUri n px = namePrefix n -- ------------------------------------------------------------ -- | generate unique namespaces and add all namespace declarations to all top nodes containing a namespace declaration -- Usually the top node containing namespace declarations is the root node, but this isn't mandatory. -- -- Calls 'cleanupNamespaces' with 'collectNamespaceDecl' uniqueNamespaces :: ArrowXml a => a XmlTree XmlTree uniqueNamespaces = fromLA $ cleanupNamespaces' collectNamespaceDecl -- | generate unique namespaces and add all namespace declarations for all prefix-uri pairs in all qualified names -- -- useful for cleanup of namespaces in generated documents. -- Calls 'cleanupNamespaces' with @ collectNamespaceDecl \<+> collectPrefixUriPairs @ uniqueNamespacesFromDeclAndQNames :: ArrowXml a => a XmlTree XmlTree uniqueNamespacesFromDeclAndQNames = fromLA $ cleanupNamespaces' ( collectNamespaceDecl <+> collectPrefixUriPairs ) cleanupNamespaces' :: LA XmlTree (String, String) -> LA XmlTree XmlTree cleanupNamespaces' collectNamespaces = processTopDownUntil ( hasNamespaceDecl `guards` cleanupNamespaces collectNamespaces ) where hasNamespaceDecl = isElem >>> getAttrl >>> isNamespaceDeclAttr -- | does the real work for namespace cleanup. -- -- The parameter is used for collecting namespace uris and prefixes from the input tree cleanupNamespaces :: LA XmlTree (String, String) -> LA XmlTree XmlTree cleanupNamespaces collectNamespaces = renameNamespaces $< (listA collectNamespaces >>^ (toNsEnv >>> nub)) where renameNamespaces :: NsEnv -> LA XmlTree XmlTree renameNamespaces env = processBottomUp ( processAttrl ( ( none `when` isNamespaceDeclAttr ) -- remove all namespace declarations >>> changeQName renamePrefix -- update namespace prefix of attribute names, if namespace uri is set ) >>> changeQName renamePrefix -- update namespace prefix of element names ) >>> attachEnv env1 -- add all namespaces as attributes to the root node attribute list where renamePrefix :: QName -> QName renamePrefix n | isNullXName uri = n | isNothing newPx = n | otherwise = setNamePrefix' (fromJust newPx) n where uri = namespaceUri' n newPx = lookup uri revEnv1 revEnv1 = map (\ (x, y) -> (y, x)) env1 env1 :: NsEnv env1 = newEnv [] uris uris :: [XName] uris = nub . map snd $ env genPrefixes :: [XName] genPrefixes = map (newXName . ("ns" ++) . show) [(0::Int)..] newEnv :: NsEnv -> [XName] -> NsEnv newEnv env' [] = env' newEnv env' (uri:rest) = newEnv env'' rest where env'' = (prefix, uri) : env' prefix = head (filter notAlreadyUsed $ preferedPrefixes ++ genPrefixes) preferedPrefixes = map fst . filter ((==uri).snd) $ env notAlreadyUsed s = isNothing . lookup s $ env' -- ------------------------------------------------------------ -- | auxiliary arrow for processing with a namespace environment -- -- process a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. -- The namespace environment is implemented as a 'Data.AssocList.AssocList'. -- Processing of attributes can be controlled by a boolean parameter processWithNsEnv1 :: ArrowXml a => Bool -> (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnv1 withAttr f env = ifA isElem -- the test is just an optimization ( processWithExtendedEnv $< arr (extendEnv env) ) -- only element nodes contain namespace declarations ( processWithExtendedEnv env ) where processWithExtendedEnv env' = f env' -- apply the env filter >>> ( ( if withAttr then processAttrl (f env') -- apply the env to all attributes else this ) >>> processChildren (processWithNsEnv f env') -- apply the env recursively to all children ) `when` isElem -- attrl and children only need processing for elem nodes extendEnv :: NsEnv -> XmlTree -> NsEnv extendEnv env' t' = addEntries (toNsEnv newDecls) env' where newDecls = runLA ( getAttrl >>> getNamespaceDecl ) t' -- ------------------------------------------------------------ -- | process a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. -- -- The namespace environment is implemented as a 'Data.AssocList.AssocList' processWithNsEnv :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnv = processWithNsEnv1 True -- | process all element nodes of a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. Attribute lists are not processed. -- -- See also: 'processWithNsEnv' processWithNsEnvWithoutAttrl :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnvWithoutAttrl = processWithNsEnv1 False -- ----------------------------------------------------------------------------- -- | attach all valid namespace declarations to the attribute list of element nodes. -- -- This arrow is useful for document processing, that requires access to all namespace -- declarations at any element node, but which cannot be done with a simple 'processWithNsEnv'. attachNsEnv :: ArrowXml a => NsEnv -> a XmlTree XmlTree attachNsEnv initialEnv = fromLA $ processWithNsEnvWithoutAttrl attachEnv initialEnv where attachEnv :: NsEnv -> LA XmlTree XmlTree attachEnv env = ( processAttrl (none `when` isNamespaceDeclAttr) >>> addAttrl (catA nsAttrl) ) `when` isElem where nsAttrl :: [LA XmlTree XmlTree] nsAttrl = map nsDeclToAttr env nsDeclToAttr :: (XName, XName) -> LA XmlTree XmlTree nsDeclToAttr (n, uri) = mkAttr qn (txt (unXN uri)) where qn :: QName qn | isNullXName n = newQName xmlnsXName nullXName xmlnsNamespaceXName | otherwise = newQName n xmlnsXName xmlnsNamespaceXName -- ----------------------------------------------------------------------------- -- | -- propagate all namespace declarations \"xmlns:ns=...\" to all element and attribute nodes of a document. -- -- This arrow does not check for illegal use of namespaces. -- The real work is done by 'propagateNamespaceEnv'. -- -- The arrow may be applied repeatedly if neccessary. propagateNamespaces :: ArrowXml a => a XmlTree XmlTree propagateNamespaces = fromLA $ propagateNamespaceEnv [ (xmlXName, xmlNamespaceXName) , (xmlnsXName, xmlnsNamespaceXName) ] -- | -- attaches the namespace info given by the namespace table -- to a tag node and its attributes and children. propagateNamespaceEnv :: NsEnv -> LA XmlTree XmlTree propagateNamespaceEnv = processWithNsEnv addNamespaceUri where addNamespaceUri :: NsEnv -> LA XmlTree XmlTree addNamespaceUri env' = choiceA [ isElem :-> changeElemName (setNamespace env') , isAttr :-> attachNamespaceUriToAttr env' , isPi :-> changePiName (setNamespace env') , this :-> this ] attachNamespaceUriToAttr :: NsEnv -> LA XmlTree XmlTree attachNamespaceUriToAttr attrEnv = ( ( getQName >>> isA (not . null . namePrefix) ) `guards` changeAttrName (setNamespace attrEnv) ) `orElse` ( changeAttrName (const xmlnsQN) `when` hasName a_xmlns ) -- ----------------------------------------------------------------------------- -- | -- validate the namespace constraints in a whole tree. -- -- Result is the list of errors concerning namespaces. -- Predicates 'isWellformedQName', 'isWellformedQualifiedName', 'isDeclaredNamespace' -- and 'isWellformedNSDecl' are applied to the appropriate elements and attributes. validateNamespaces :: ArrowXml a => a XmlTree XmlTree validateNamespaces = fromLA validateNamespaces1 validateNamespaces1 :: LA XmlTree XmlTree validateNamespaces1 = choiceA [ isRoot :-> ( getChildren >>> validateNamespaces1 ) -- root is correct by definition , this :-> multi validate1Namespaces ] -- | -- a single node for namespace constrains. validate1Namespaces :: LA XmlTree XmlTree validate1Namespaces = choiceA [ isElem :-> catA [ ( getQName >>> isA ( not . isWellformedQName ) ) `guards` nsError (\ n -> "element name " ++ show n ++ " is not a wellformed qualified name" ) , ( getQName >>> isA ( not . isDeclaredNamespace ) ) `guards` nsError (\ n -> "namespace for prefix in element name " ++ show n ++ " is undefined" ) , doubleOcc $< ( (getAttrl >>> getUniversalName) >>. doubles ) , getAttrl >>> validate1Namespaces ] , isAttr :-> catA [ ( getQName >>> isA ( not . isWellformedQName ) ) `guards` nsError (\ n -> "attribute name " ++ show n ++ " is not a wellformed qualified name" ) , ( getQName >>> isA ( not . isDeclaredNamespace ) ) `guards` nsError (\ n -> "namespace for prefix in attribute name " ++ show n ++ " is undefined" ) , ( hasNamePrefix a_xmlns >>> xshow getChildren >>> isA null ) `guards` nsError (\ n -> "namespace value of namespace declaration for " ++ show n ++ " has no value" ) , ( getQName >>> isA (not . isWellformedNSDecl ) ) `guards` nsError (\ n -> "illegal namespace declaration for name " ++ show n ++ " starting with reserved prefix " ++ show "xml" ) ] , isDTD :-> catA [ isDTDDoctype <+> isDTDAttlist <+> isDTDElement <+> isDTDName >>> getDTDAttrValue a_name >>> ( isA (not . isWellformedQualifiedName) `guards` nsErr (\ n -> "a DTD part contains a not wellformed qualified Name: " ++ show n) ) , isDTDAttlist >>> getDTDAttrValue a_value >>> ( isA (not . isWellformedQualifiedName) `guards` nsErr (\ n -> "an ATTLIST declaration contains as attribute name a not wellformed qualified Name: " ++ show n) ) , isDTDEntity <+> isDTDPEntity <+> isDTDNotation >>> getDTDAttrValue a_name >>> ( isA (not . isNCName) `guards` nsErr (\ n -> "an entity or notation declaration contains a not wellformed NCName: " ++ show n) ) ] , isPi :-> catA [ getName >>> ( isA (not . isNCName) `guards` nsErr (\ n -> "a PI contains a not wellformed NCName: " ++ show n) ) ] ] where nsError :: (QName -> String) -> LA XmlTree XmlTree nsError msg = getQName >>> nsErr msg nsErr :: (a -> String) -> LA a XmlTree nsErr msg = arr msg >>> mkError c_err doubleOcc :: String -> LA XmlTree XmlTree doubleOcc an = nsError (\ n -> "multiple occurences of universal name for attributes of tag " ++ show n ++ " : " ++ show an ) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/ParserInterface.hs0000644000000000000000000000555212474566610020307 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ParserInterface Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable interface to the HXT XML and DTD parsers -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ParserInterface ( module Text.XML.HXT.Arrow.ParserInterface ) where import Control.Arrow.ArrowList import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import qualified Text.XML.HXT.Parser.HtmlParsec as HP import qualified Text.XML.HXT.Parser.XmlParsec as XP import qualified Text.XML.HXT.Parser.XmlDTDParser as DP -- ------------------------------------------------------------ parseXmlDoc :: ArrowXml a => a (String, String) XmlTree parseXmlDoc = arr2L XP.parseXmlDocument parseXmlDTDPart :: ArrowXml a => a (String, XmlTree) XmlTree parseXmlDTDPart = arr2L XP.parseXmlDTDPart xreadCont :: ArrowXml a => a String XmlTree xreadCont = arrL XP.xread xreadDoc :: ArrowXml a => a String XmlTree xreadDoc = arrL XP.xreadDoc parseXmlEntityEncodingSpec , parseXmlDocEncodingSpec , removeEncodingSpec :: ArrowXml a => a XmlTree XmlTree parseXmlDocEncodingSpec = arrL XP.parseXmlDocEncodingSpec parseXmlEntityEncodingSpec = arrL XP.parseXmlEntityEncodingSpec removeEncodingSpec = arrL XP.removeEncodingSpec parseXmlDTDdeclPart :: ArrowXml a => a XmlTree XmlTree parseXmlDTDdeclPart = arrL DP.parseXmlDTDdeclPart parseXmlDTDdecl :: ArrowXml a => a XmlTree XmlTree parseXmlDTDdecl = arrL DP.parseXmlDTDdecl parseXmlDTDEntityValue :: ArrowXml a => a XmlTree XmlTree parseXmlDTDEntityValue = arrL DP.parseXmlDTDEntityValue parseXmlEntityValueAsContent :: ArrowXml a => String -> a XmlTree XmlTree parseXmlEntityValueAsContent = arrL . XP.parseXmlEntityValueAsContent parseXmlEntityValueAsAttrValue :: ArrowXml a => String -> a XmlTree XmlTree parseXmlEntityValueAsAttrValue = arrL . XP.parseXmlEntityValueAsAttrValue -- ------------------------------------------------------------ parseHtmlDoc :: ArrowList a => a (String, String) XmlTree parseHtmlDoc = arr2L HP.parseHtmlDocument hread :: ArrowList a => a String XmlTree hread = arrL HP.parseHtmlContent hreadDoc :: ArrowList a => a String XmlTree hreadDoc = arrL $ HP.parseHtmlDocument "string" -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Pickle.hs0000644000000000000000000002172013506133461016423 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Pickle Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Pickler functions for converting between user defined data types and XmlTree data. Usefull for persistent storage and retreival of arbitray data as XML documents This module is an adaptation of the pickler combinators developed by Andrew Kennedy ( https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2004\/01\/picklercombinators.pdf ) The difference to Kennedys approach is that the target is not a list of Chars but a list of XmlTrees. The basic picklers will convert data into XML text nodes. New are the picklers for creating elements and attributes. One extension was neccessary: The unpickling may fail. Therefore the unpickler has a Maybe result type. Failure is used to unpickle optional elements (Maybe data) and lists of arbitray length There is an example program demonstrating the use of the picklers for a none trivial data structure. (see \"examples\/arrows\/pickle\" directory) -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Pickle ( xpickleDocument -- from this module Text.XML.HXT.Arrow.Pickle , xunpickleDocument , xpickleWriteDTD , xpickleDTD , checkPickler , xpickleVal , xunpickleVal , thePicklerDTD , a_addDTD -- from Text.XML.HXT.Arrow.Pickle.Xml , pickleDoc , unpickleDoc , unpickleDoc' , showPickled , PU(..) , XmlPickler(..) , xp4Tuple , xp5Tuple , xp6Tuple , xp7Tuple , xp8Tuple , xp9Tuple , xp10Tuple , xp11Tuple , xp12Tuple , xp13Tuple , xp14Tuple , xp15Tuple , xp16Tuple , xp17Tuple , xp18Tuple , xp19Tuple , xp20Tuple , xp21Tuple , xp22Tuple , xp23Tuple , xp24Tuple , xpAddFixedAttr , xpAddNSDecl , xpAlt , xpAttr , xpAttrFixed , xpAttrImplied , xpAttrNS , xpCheckEmpty , xpCheckEmptyAttributes , xpCheckEmptyContents , xpTextAttr , xpChoice , xpDefault , xpElem , xpElemNS , xpElemWithAttrValue , xpFilterAttr , xpFilterCont , xpInt , xpLift , xpLiftEither , xpLiftMaybe , xpList , xpList1 , xpMap , xpOption , xpPair , xpPrim , xpSeq , xpSeq' , xpText , xpText0 , xpTextDT , xpText0DT , xpTree , xpTrees , xpTriple , xpUnit , xpWrap , xpWrapEither , xpWrapMaybe , xpXmlText , xpZero -- from Text.XML.HXT.Arrow.Pickle.Schema , Schema , Schemas , DataTypeDescr ) where import Control.Arrow.ListArrows import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.ReadDocument import Text.XML.HXT.Arrow.WriteDocument import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.Pickle.Xml import Text.XML.HXT.Arrow.Pickle.Schema import Text.XML.HXT.Arrow.Pickle.DTD -- ------------------------------------------------------------ -- the arrow interface for pickling and unpickling -- | store an arbitray value in a persistent XML document -- -- The pickler converts a value into an XML tree, this is written out with -- 'Text.XML.HXT.Arrow.writeDocument'. The option list is passed to 'Text.XML.HXT.Arrow.writeDocument' -- -- An option evaluated by this arrow is 'a_addDTD'. -- If 'a_addDTD' is set ('v_1'), the pickler DTD is added as an inline DTD into the document. xpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s a XmlTree xpickleDocument xp config dest = localSysEnv $ configSysVars config >>> xpickleVal xp >>> traceMsg 1 "xpickleVal applied" >>> ifA ( getSysAttr a_addDTD >>> isA (== v_1) ) ( replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren) <+> getChildren ) ) this >>> writeDocument [] dest -- | Option for generating and adding DTD when document is pickled a_addDTD :: String a_addDTD = "addDTD" -- | read an arbitray value from an XML document -- -- The document is read with 'Text.XML.HXT.Arrow.readDocument'. Options are passed -- to 'Text.XML.HXT.Arrow.readDocument'. The conversion from XmlTree is done with the -- pickler. -- -- @ xpickleDocument xp al dest >>> xunpickleDocument xp al' dest @ is the identity arrow -- when applied with the appropriate options. When during pickling indentation is switched on, -- the whitespace must be removed during unpickling. xunpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s b a xunpickleDocument xp conf src = readDocument conf src >>> traceMsg 1 ("xunpickleVal for " ++ show src ++ " started") >>> xunpickleVal xp >>> traceMsg 1 ("xunpickleVal for " ++ show src ++ " finished") -- | Write out the DTD generated out of a pickler. Calls 'xpicklerDTD' xpickleWriteDTD :: PU b -> SysConfigList -> String -> IOStateArrow s b XmlTree xpickleWriteDTD xp config dest = xpickleDTD xp >>> writeDocument config dest -- | The arrow for generating the DTD out of a pickler -- -- A DTD is generated from a pickler and check for consistency. -- Errors concerning the DTD are issued. xpickleDTD :: PU b -> IOStateArrow s b XmlTree xpickleDTD xp = root [] [ constL (thePicklerDTD xp) >>> filterErrorMsg ] -- | An arrow for checking picklers -- -- A value is transformed into an XML document by a given pickler, -- the associated DTD is extracted from the pickler and checked, -- the document including the DTD is tranlated into a string, -- this string is read and validated against the included DTD, -- and unpickled. -- The last step is the equality with the input. -- -- If the check succeeds, the arrow works like this, else it fails. checkPickler :: Eq a => PU a -> IOStateArrow s a a checkPickler xp = ( ( ( ( xpickleVal xp >>> replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren) <+> getChildren ) >>> writeDocumentToString [] >>> readFromString [withValidate True] >>> xunpickleVal xp ) &&& this ) >>> isA (uncurry (==)) ) `guards` this ) `orElse` issueErr "pickle/unpickle combinators failed" -- | The arrow version of the pickler function xpickleVal :: ArrowXml a => PU b -> a b XmlTree xpickleVal xp = arr (pickleDoc xp) -- | The arrow version of the unpickler function {- old version, runs outside IO xunpickleVal :: ArrowXml a => PU b -> a XmlTree b xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element `when` isRoot ) >>> arrL (maybeToList . unpickleDoc xp) -- -} xunpickleVal :: PU b -> IOStateArrow s XmlTree b xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element `when` isRoot ) >>> arr (unpickleDoc' xp) >>> ( ( (issueFatal $< arr ("document unpickling failed\n" ++)) >>> none ) ||| this ) -- | Compute the associated DTD of a pickler thePicklerDTD :: PU b -> XmlTrees thePicklerDTD = dtdDescrToXml . dtdDescr . theSchema -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Pickle/DTD.hs0000644000000000000000000002667212474566610017062 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Pickle.DTD Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Version : $Id$ Functions for converting a pickler schema into a DTD -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Pickle.DTD where import Data.Maybe import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.Pickle.Schema import Text.XML.HXT.XMLSchema.DataTypeLibW3CNames -- ------------------------------------------------------------ data DTDdescr = DTDdescr Name Schemas [(Name,Schemas)] instance Show DTDdescr where show (DTDdescr n es as) = "root element: " ++ n ++ "\n" ++ "elements:\n" ++ concatMap ((++ "\n") .show) es ++ "attributes:\n" ++ concatMap ((++ "\n") . showAttr) as where showAttr (n1, sc) = n1 ++ ": " ++ show sc -- ------------------------------------------------------------ -- | convert a DTD descr into XmlTrees dtdDescrToXml :: DTDdescr -> XmlTrees dtdDescrToXml (DTDdescr rt es as) = checkErr (null rt) "no unique root element found in pickler DTD, add an \"xpElem\" pickler" ++ concatMap (checkErr True . ("no element decl found in: " ++) . show) (filter (not . isScElem) es) ++ concatMap (uncurry checkContentModell . \ (Element n sc) -> (n,sc)) es1 ++ concatMap (uncurry checkAttrModell) as ++ [ XN.mkDTDElem DOCTYPE docAttrs ( concatMap elemDTD es1 ++ concatMap (uncurry attrDTDs) as ) ] where es1 = filter isScElem es docAttrs = [(a_name, if null rt then "no-unique-root-element-found" else rt)] elemDTD (Element n sc) | lookup1 a_type al == "unknown" = cl | otherwise = [ XN.mkDTDElem ELEMENT ((a_name, n) : al) cl ] where (al, cl) = scContToXml sc elemDTD _ = error "illegal case in elemDTD" attrDTDs en = concatMap (attrDTD en) attrDTD en (Attribute an sc) = [ XN.mkDTDElem ATTLIST ((a_name, en) : (a_value, an) : al) cl ] where (al, cl) = scAttrToXml sc attrDTD _ _ = error "illegal case in attrDTD" checkAttrModell :: Name -> Schemas -> XmlTrees checkAttrModell n = concatMap (checkAM n) checkAM :: Name -> Schema -> XmlTrees checkAM en (Attribute an sc) = checkAMC en an sc checkAM _ _ = [] checkAMC :: Name -> Name -> Schema -> XmlTrees checkAMC _en _an (CharData _) = [] checkAMC en an sc | isScCharData sc = [] | isScList sc && (sc_1 sc == scNmtoken) = [] | isScOpt sc = checkAMC en an (sc_1 sc) | otherwise = foundErr ( "weird attribute type found for attribute " ++ show an ++ " for element " ++ show en ++ "\n\t(internal structure: " ++ show sc ++ ")" ++ "\n\thint: create an element instead of an attribute for " ++ show an ) -- checkContentModell1 n sc = foundErr (n ++ " : " ++ show sc) ++ checkContentModell n sc checkContentModell :: Name -> Schema -> XmlTrees checkContentModell _ Any = [] checkContentModell _ (ElemRef _) = [] checkContentModell _ (CharData _) = [] checkContentModell _ (Seq []) = [] checkContentModell n (Seq scs) = checkErr pcDataInCM ( "PCDATA found in a sequence spec in the content modell for " ++ show n ++ "\n\thint: create an element for this data" ) ++ checkErr somethingElseInCM ( "something weired found in a sequence spec in the content modell for " ++ show n ) ++ concatMap (checkContentModell n) scs where pcDataInCM = any isScCharData scs somethingElseInCM = any (\ sc -> not (isScSARE sc) && not (isScCharData sc)) scs checkContentModell n (Alt scs) = checkErr mixedCM ( "PCDATA mixed up with illegal content spec in mixed contents for " ++ show n ++ "\n\thint: create an element for this data" ) ++ concatMap (checkContentModell n) scs where mixedCM | any isScCharData scs = any (not . isScElemRef) . filter (not . isScCharData) $ scs | otherwise = False checkContentModell _ (Rep _ _ (ElemRef _)) = [] checkContentModell n (Rep _ _ sc@(Seq _)) = checkContentModell n sc checkContentModell n (Rep _ _ sc@(Alt _)) = checkContentModell n sc checkContentModell n (Rep _ _ _) = foundErr ( "illegal content spec found for " ++ show n ) checkContentModell _ _ = [] scContToXml :: Schema -> (Attributes, XmlTrees) scContToXml Any = ( [(a_type, v_any)], [] ) scContToXml (CharData _) = ( [(a_type, v_pcdata)], [] ) scContToXml (Seq []) = ( [(a_type, v_empty)], [] ) scContToXml sc@(ElemRef _) = scContToXml (Seq [sc]) scContToXml sc@(Seq _) = ( [(a_type, v_children)] , scCont [] sc ) scContToXml sc@(Alt sc1) | isMixed sc1 = ( [(a_type, v_mixed)] , scCont [ (a_modifier, "*") ] sc ) | otherwise = ( [(a_type, v_children)] , scCont [] sc ) where isMixed = not . null . filter isScCharData scContToXml sc@(Rep _ _ _) = ( [(a_type, v_children)] , scCont [] sc ) scContToXml _sc = ( [(a_type, v_any)] -- default: everything is allowed , [] ) scWrap :: Schema -> Schema scWrap sc@(Alt _) = sc scWrap sc@(Seq _) = sc scWrap sc@(Rep _ _ _) = sc scWrap sc = Seq [sc] scCont :: Attributes -> Schema -> XmlTrees scCont al (Seq scs) = scConts ((a_kind, v_seq ) : al) scs scCont al (Alt scs) = scConts ((a_kind, v_choice) : al) scs scCont al (Rep 0 (-1) sc) = scCont ((a_modifier, "*") : al) (scWrap sc) scCont al (Rep 1 (-1) sc) = scCont ((a_modifier, "+") : al) (scWrap sc) scCont al (Rep 0 1 sc) = scCont ((a_modifier, "?") : al) (scWrap sc) scCont al (ElemRef n) = [XN.mkDTDElem NAME ((a_name, n) : al) []] scCont _ (CharData _) = [XN.mkDTDElem NAME [(a_name, "#PCDATA")] []] scCont _ _sc = [XN.mkDTDElem NAME [(a_name, "bad-content-spec")] []] -- error case scConts :: Attributes -> Schemas -> XmlTrees scConts al scs = [XN.mkDTDElem CONTENT al (concatMap (scCont []) scs)] scAttrToXml :: Schema -> (Attributes, XmlTrees) scAttrToXml sc | isScFixed sc = ( [ (a_kind, k_fixed) , (a_type, k_cdata) , (a_default, (xsdParam xsd_enumeration sc)) ] , []) | isScEnum sc = ( [ (a_kind, k_required) , (a_type, k_enumeration) ] , map (\ n -> XN.mkDTDElem NAME [(a_name, n)] []) enums ) | isScCharData sc = ( [ (a_kind, k_required) , (a_type, d_type) ] , []) | isScOpt sc = (addEntry a_kind k_implied al, cl) | isScList sc = (addEntry a_type k_nmtokens al, cl) | otherwise = ( [ (a_kind, k_fixed) , (a_default, "bad-attribute-type: " ++ show sc) ] , [] ) where (al, cl) = scAttrToXml (sc_1 sc) d_type | sc == scNmtoken = k_nmtoken | otherwise = k_cdata enums = words . xsdParam xsd_enumeration $ sc checkErr :: Bool -> String -> XmlTrees checkErr True s = [XN.mkError c_err s] checkErr _ _ = [] foundErr :: String -> XmlTrees foundErr = checkErr True -- ------------------------------------------------------------ -- | convert a pickler schema into a DTD descr dtdDescr :: Schema -> DTDdescr dtdDescr sc = DTDdescr rt es1 as where es = elementDeclarations sc es1 = map remAttrDec es as = filter (not. null . snd) . concatMap attrDec $ es rt = fromMaybe "" . elemName $ sc elementDeclarations :: Schema -> Schemas elementDeclarations sc = elemRefs . elementDecs [] $ [sc] elementDecs :: Schemas -> Schemas -> Schemas elementDecs es [] = es elementDecs es (s:ss) = elementDecs (elemDecs s) ss where elemDecs (Seq scs) = elementDecs es scs elemDecs (Alt scs) = elementDecs es scs elemDecs (Rep _ _ sc) = elemDecs sc elemDecs e@(Element n sc) | n `elem` elemNames es = es | otherwise = elementDecs (e:es) [sc] elemDecs _ = es elemNames :: Schemas -> [Name] elemNames = concatMap (maybeToList . elemName) elemName :: Schema -> Maybe Name elemName (Element n _) = Just n elemName _ = Nothing elemRefs :: Schemas -> Schemas elemRefs = map elemRef where elemRef (Element n sc) = Element n (pruneElem sc) elemRef sc = sc pruneElem (Element n _) = ElemRef n pruneElem (Seq scs) = Seq (map pruneElem scs) pruneElem (Alt scs) = Alt (map pruneElem scs) pruneElem (Rep l u sc) = Rep l u (pruneElem sc) pruneElem sc = sc attrDec :: Schema -> [(Name, Schemas)] attrDec (Element n sc) = [(n, attrDecs sc)] where attrDecs a@(Attribute _ _) = [a] attrDecs (Seq scs) = concatMap attrDecs scs attrDecs _ = [] attrDec _ = [] remAttrDec :: Schema -> Schema remAttrDec (Element n sc) = Element n (remA sc) where remA (Attribute _ _) = scEmpty remA (Seq scs) = scSeqs . map remA $ scs remA sc1 = sc1 remAttrDec _ = error "illegal case in remAttrDec" -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Pickle/Schema.hs0000644000000000000000000001615212474566610017637 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Pickle.Schema Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Version : $Id$ Datatypes and functions for building a content model for XML picklers. A schema is part of every pickler and can be used to derive a corrensponding DTD (or Relax NG schema). This schema further enables checking the picklers. -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Pickle.Schema where import Text.XML.HXT.DOM.TypeDefs import Text.XML.HXT.XMLSchema.DataTypeLibW3CNames import Data.List ( sort ) -- ------------------------------------------------------------ -- | The datatype for modelling the structure of an data Schema = Any | Seq { sc_l :: [Schema] } | Alt { sc_l :: [Schema] } | Rep { sc_lb :: Int , sc_ub :: Int , sc_1 :: Schema } | Element { sc_n :: Name , sc_1 :: Schema } | Attribute { sc_n :: Name , sc_1 :: Schema } | ElemRef { sc_n :: Name } | CharData { sc_dt :: DataTypeDescr } deriving (Eq, Show) type Name = String type Schemas = [Schema] data DataTypeDescr = DTDescr { dtLib :: String , dtName :: String , dtParams :: Attributes } deriving (Show) instance Eq DataTypeDescr where x1 == x2 = dtLib x1 == dtLib x2 && dtName x1 == dtName x2 && sort (dtParams x1) == sort (dtParams x2) -- ------------------------------------------------------------ -- | test: is schema a simple XML Schema datatype isScXsd :: (String -> Bool) -> Schema -> Bool isScXsd p (CharData (DTDescr lib n _ps)) = lib == w3cNS && p n isScXsd _ _ = False -- | test: is type a fixed value attribute type isScFixed :: Schema -> Bool isScFixed sc = isScXsd (== xsd_string) sc && ((== 1) . length . words . xsdParam xsd_enumeration) sc isScEnum :: Schema -> Bool isScEnum sc = isScXsd (== xsd_string) sc && (not . null . xsdParam xsd_enumeration) sc isScElem :: Schema -> Bool isScElem (Element _ _) = True isScElem _ = False isScAttr :: Schema -> Bool isScAttr (Attribute _ _)= True isScAttr _ = False isScElemRef :: Schema -> Bool isScElemRef (ElemRef _) = True isScElemRef _ = False isScCharData :: Schema -> Bool isScCharData (CharData _)= True isScCharData _ = False isScSARE :: Schema -> Bool isScSARE (Seq _) = True isScSARE (Alt _) = True isScSARE (Rep _ _ _) = True isScSARE (ElemRef _) = True isScSARE _ = False isScList :: Schema -> Bool isScList (Rep 0 (-1) _) = True isScList _ = False isScOpt :: Schema -> Bool isScOpt (Rep 0 1 _) = True isScOpt _ = False -- | access an attribute of a descr of an atomic type xsdParam :: String -> Schema -> String xsdParam n (CharData dtd) = lookup1 n (dtParams dtd) xsdParam _ _ = "" -- ------------------------------------------------------------ -- smart constructors for Schema datatype -- ------------------------------------------------------------ -- -- predefined xsd data types for representation of DTD types scDT :: String -> String -> Attributes -> Schema scDT l n rl = CharData $ DTDescr l n rl scDTxsd :: String -> Attributes -> Schema scDTxsd = scDT w3cNS scString :: Schema scString = scDTxsd xsd_string [] scString1 :: Schema scString1 = scDTxsd xsd_string [(xsd_minLength, "1")] scFixed :: String -> Schema scFixed v = scDTxsd xsd_string [(xsd_enumeration, v)] scEnum :: [String] -> Schema scEnum vs = scFixed (unwords vs) scNmtoken :: Schema scNmtoken = scDTxsd xsd_NCName [] scNmtokens :: Schema scNmtokens = scList scNmtoken -- ------------------------------------------------------------ scEmpty :: Schema scEmpty = Seq [] scSeq :: Schema -> Schema -> Schema scSeq (Seq []) sc2 = sc2 scSeq sc1 (Seq []) = sc1 scSeq (Seq scs1) (Seq scs2) = Seq (scs1 ++ scs2) -- prevent nested Seq expr scSeq (Seq scs1) sc2 = Seq (scs1 ++ [sc2]) scSeq sc1 (Seq scs2) = Seq (sc1 : scs2) scSeq sc1 sc2 = Seq [sc1,sc2] scSeqs :: [Schema] -> Schema scSeqs = foldl scSeq scEmpty scNull :: Schema scNull = Alt [] scAlt :: Schema -> Schema -> Schema scAlt (Alt []) sc2 = sc2 scAlt sc1 (Alt []) = sc1 scAlt (Alt scs1) (Alt scs2) = Alt (scs1 ++ scs2) -- prevent nested Alt expr scAlt (Alt scs1) sc2 = Alt (scs1 ++ [sc2]) scAlt sc1 (Alt scs2) = Alt (sc1 : scs2) scAlt sc1 sc2 = Alt [sc1,sc2] scAlts :: [Schema] -> Schema scAlts = foldl scAlt scNull scOption :: Schema -> Schema scOption (Seq []) = scEmpty scOption (Attribute n sc2) = Attribute n (scOption sc2) scOption sc1 | sc1 == scString1 = scString | otherwise = scOpt sc1 scList :: Schema -> Schema scList = scRep 0 (-1) scList1 :: Schema -> Schema scList1 = scRep 1 (-1) scOpt :: Schema -> Schema scOpt = scRep 0 1 scRep :: Int -> Int -> Schema -> Schema scRep l u sc1 = Rep l u sc1 scElem :: String -> Schema -> Schema scElem n sc1 = Element n sc1 scAttr :: String -> Schema -> Schema scAttr n sc1 = Attribute n sc1 -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/Pickle/Xml.hs0000644000000000000000000015237013506133732017172 0ustar0000000000000000{-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeSynonymInstances #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Pickle.Xml Copyright : Copyright (C) 2005-2012 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Pickler functions for converting between user defined data types and XmlTree data. Usefull for persistent storage and retreival of arbitray data as XML documents. This module is an adaptation of the pickler combinators developed by Andrew Kennedy ( https:\/\/www.microsoft.com\/en-us\/research\/wp-content\/uploads\/2004\/01\/picklercombinators.pdf ) The difference to Kennedys approach is that the target is not a list of Chars but a list of XmlTrees. The basic picklers will convert data into XML text nodes. New are the picklers for creating elements and attributes. One extension was neccessary: The unpickling may fail. Old: Therefore the unpickler has a Maybe result type. Failure is used to unpickle optional elements (Maybe data) and lists of arbitray length. Since hxt-9.2.0: The unpicklers are implemented as a parser monad with an Either err val result type. This enables appropriate error messages , when unpickling XML stuff, that is not generated with the picklers and which contains some elements and/or attributes that are not handled when unpickling. There is an example program demonstrating the use of the picklers for a none trivial data structure. (see \"examples\/arrows\/pickle\" directory in the hxt distribution) -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Pickle.Xml where #if MIN_VERSION_base(4,8,0) #else import Control.Applicative (Applicative (..)) #endif import Control.Arrow.ArrowList import Control.Arrow.ListArrows import Control.Monad () #if MIN_VERSION_mtl(2,2,0) import Control.Monad.Except (MonadError (..)) #else import Control.Monad.Error (MonadError (..)) #endif import Control.Monad.State (MonadState (..), gets, modify) import Data.Char (isDigit) import Data.List (foldl') import Data.Map (Map) import qualified Data.Map as M import Data.Maybe (fromJust, fromMaybe) import Text.XML.HXT.Arrow.Edit (xshowEscapeXml) import Text.XML.HXT.Arrow.Pickle.Schema import Text.XML.HXT.Arrow.ReadDocument (xread) import Text.XML.HXT.Arrow.WriteDocument (writeDocumentToString) import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XN import qualified Text.XML.HXT.DOM.XmlNode as XN {- just for embedded test cases, prefix with -- to activate import Text.XML.HXT.Arrow.XmlArrow import qualified Control.Arrow.ListArrows as X -- -} {- debug code import qualified Debug.Trace as T -- -} -- ------------------------------------------------------------ data St = St { attributes :: [XmlTree] , contents :: [XmlTree] , nesting :: Int -- the remaining 3 fields are used only for unpickling , pname :: QName -- to generate appropriate error messages , pelem :: Bool } deriving (Show) data PU a = PU { appPickle :: Pickler a -- (a, St) -> St , appUnPickle :: Unpickler a , theSchema :: Schema } -- -------------------- -- -- The pickler type Pickler a = a -> St -> St -- -------------------- -- -- The unpickler monad, a combination of state and error monad newtype Unpickler a = UP { runUP :: St -> (UnpickleVal a, St) } type UnpickleVal a = Either UnpickleErr a type UnpickleErr = (String, St) instance Functor Unpickler where fmap f u = UP $ \ st -> let (r, st') = runUP u st in (fmap f r, st') instance Applicative Unpickler where pure a = UP $ \ st -> (Right a, st) uf <*> ua = UP $ \ st -> let (f, st') = runUP uf st in case f of Left err -> (Left err, st') Right f' -> runUP (fmap f' ua) st' instance Monad Unpickler where return = pure u >>= f = UP $ \ st -> let (r, st') = runUP u st in case r of Left err -> (Left err, st') Right v -> runUP (f v) st' instance MonadState St Unpickler where get = UP $ \ st -> (Right st, st) put st = UP $ \ _ -> (Right (), st) instance MonadError UnpickleErr Unpickler where throwError err = UP $ \ st -> (Left err, st) -- redundant, not (yet) used catchError u handler = UP $ \ st -> let (r, st') = runUP u st in case r of Left err -> runUP (handler err) st -- not st', state will be reset in error case _ -> (r, st') throwMsg :: String -> Unpickler a throwMsg msg = UP $ \ st -> (Left (msg, st), st) -- | Choice combinator for unpickling -- -- first 2 arguments are applied sequentially, but if the 1. one fails the -- 3. arg is applied mchoice :: Unpickler a -> (a -> Unpickler b) -> Unpickler b -> Unpickler b mchoice u f v = UP $ \ st -> let (r, st') = runUP u st in case r of Right x -> runUP (f x) st' -- success Left e@(_msg, st'') -> if nesting st'' == nesting st -- true: failure in parsing curr contents then runUP v st -- try the alternative unpickler else (Left e, st') -- false: failure in unpickling a subtree of -- the current contents, so the whole unpickler -- must fail -- | Lift a Maybe value into the Unpickler monad. -- -- The 1. arg is the attached error message liftMaybe :: String -> Maybe a -> Unpickler a liftMaybe e v = case v of Nothing -> throwMsg e Just x -> return x -- | Lift an Either value into the Unpickler monad liftUnpickleVal :: UnpickleVal a -> Unpickler a liftUnpickleVal v = UP $ \ st -> (v, st) -- -------------------- getCont :: Unpickler XmlTree getCont = do cs <- gets contents case cs of [] -> throwMsg "no more contents to be read" (x : xs) -> do modify (\ s -> s {contents = xs}) return x getAtt :: QName -> Unpickler XmlTree getAtt qn = do as <- gets attributes case findAtt as of Nothing -> throwMsg $ "no attribute value found for " ++ show qn Just (a, as') -> do modify (\ s -> s {attributes = as'}) return $ nonEmptyVal a where findAtt = findElem (maybe False (== qn) . XN.getAttrName) nonEmptyVal a' | null (XN.getChildren a') = XN.setChildren [et] a' | otherwise = a' where et = XN.mkText "" getNSAtt :: String -> Unpickler () getNSAtt ns = do as <- gets attributes case findNS as of Nothing -> throwMsg $ "no namespace declaration found for namespace " ++ show ns Just (_a, as') -> do modify (\ s -> s {attributes = as'}) return () where isNS t = (fromMaybe False . fmap isNameSpaceName . XN.getAttrName $ t) && XN.xshow (XN.getChildren t) == ns findNS = findElem isNS -- -------------------- emptySt :: St emptySt = St { attributes = [] , contents = [] , nesting = 0 , pname = mkName "/" , pelem = True } putAtt :: QName -> [XmlTree] -> St -> St putAtt qn v s = s {attributes = x : attributes s} where x = XN.mkAttr qn v {-# INLINE putAtt #-} putCont :: XmlTree -> St -> St putCont x s = s {contents = x : contents s} {-# INLINE putCont #-} -- -------------------- -- -- generally useful function for splitting a value from a list findElem :: (a -> Bool) -> [a] -> Maybe (a, [a]) findElem p = find' id where find' _ [] = Nothing find' prefix (x : xs) | p x = Just (x, prefix xs) | otherwise = find' (prefix . (x:)) xs -- ------------------------------------------------------------ -- -- | Format the context of an error message. formatSt :: St -> String formatSt st = fcx ++ fa (attributes st) ++ fc (contents st) where fcx = "\n" ++ "context: " ++ ( if pelem st then "element" else "attribute" ) ++ " " ++ show (pname st) fc [] = "" fc cs = "\n" ++ "contents: " ++ formatXML cs fa [] = "" fa as = "\n" ++ "attributes: " ++ formatXML as formatXML = format 80 . showXML showXML = concat . runLA ( xshowEscapeXml unlistA ) format n s = let s' = take (n + 1) s in if length s' <= n then s' else take n s ++ "..." -- ------------------------------------------------------------ -- | conversion of an arbitrary value into an XML document tree. -- -- The pickler, first parameter, controls the conversion process. -- Result is a complete document tree including a root node pickleDoc :: PU a -> a -> XmlTree pickleDoc p v = XN.mkRoot (attributes st) (contents st) where st = appPickle p v emptySt -- | Conversion of an XML document tree into an arbitrary data type -- -- The inverse of 'pickleDoc'. -- This law should hold for all picklers: @ unpickle px . pickle px $ v == Just v @. -- Not every possible combination of picklers does make sense. -- For reconverting a value from an XML tree, is becomes neccessary, -- to introduce \"enough\" markup for unpickling the value unpickleDoc :: PU a -> XmlTree -> Maybe a unpickleDoc p = either (const Nothing) Just . unpickleDoc' p -- | Like unpickleDoc but with a (sometimes) useful error message, when unpickling failed. unpickleDoc' :: PU a -> XmlTree -> Either String a unpickleDoc' p t | XN.isRoot t = mapErr $ unpickleElem' p 0 t | otherwise = unpickleDoc' p (XN.mkRoot [] [t]) where mapErr = either ( Left . \ (msg, st) -> msg ++ formatSt st ) Right -- | The main entry for unpickling, called by unpickleDoc unpickleElem' :: PU a -> Int -> XmlTree -> UnpickleVal a unpickleElem' p l t = -- T.trace ("unpickleElem': " ++ show t) $ ( fst . runUP (appUnPickle p) ) $ St { attributes = fromMaybe [] . XN.getAttrl $ t , contents = XN.getChildren t , nesting = l , pname = fromJust . XN.getName $ t , pelem = XN.isElem t } -- ------------------------------------------------------------ -- | Pickles a value, then writes the document to a string. showPickled :: (XmlPickler a) => SysConfigList -> a -> String showPickled a = concat . (pickleDoc xpickle >>> runLA (writeDocumentToString a)) -- ------------------------------------------------------------ -- | The zero pickler -- -- Encodes nothing, fails always during unpickling xpZero :: String -> PU a xpZero err = PU { appPickle = const id , appUnPickle = throwMsg err , theSchema = scNull } -- | unit pickler xpUnit :: PU () xpUnit = xpLift () -- | Check EOF pickler. -- -- When pickling, this behaves like the unit pickler. -- The unpickler fails, when there is some unprocessed XML contents left. xpCheckEmptyContents :: PU a -> PU a xpCheckEmptyContents pa = PU { appPickle = appPickle pa , appUnPickle = do res <- appUnPickle pa cs <- gets contents if null cs then return res else contentsLeft , theSchema = scNull } where contentsLeft = throwMsg "xpCheckEmptyContents: unprocessed XML content detected" -- | Like xpCheckEmptyContents, but checks the attribute list xpCheckEmptyAttributes :: PU a -> PU a xpCheckEmptyAttributes pa = PU { appPickle = appPickle pa , appUnPickle = do res <- appUnPickle pa as <- gets attributes if null as then return res else attributesLeft , theSchema = scNull } where attributesLeft = throwMsg "xpCheckEmptyAttributes: unprocessed XML attribute(s) detected" -- | Composition of xpCheckEmptyContents and xpCheckAttributes xpCheckEmpty :: PU a -> PU a xpCheckEmpty = xpCheckEmptyAttributes . xpCheckEmptyContents xpLift :: a -> PU a xpLift x = PU { appPickle = const id , appUnPickle = return x , theSchema = scEmpty } -- | Lift a Maybe value to a pickler. -- -- @Nothing@ is mapped to the zero pickler, @Just x@ is pickled with @xpLift x@. xpLiftMaybe :: Maybe a -> PU a xpLiftMaybe v = (xpLiftMaybe'' v) { theSchema = scOption scEmpty } where xpLiftMaybe'' Nothing = xpZero "xpLiftMaybe: got Nothing" xpLiftMaybe'' (Just x) = xpLift x xpLiftEither :: Either String a -> PU a xpLiftEither v = (xpLiftEither'' v) { theSchema = scOption scEmpty } where xpLiftEither'' (Left err) = xpZero err xpLiftEither'' (Right x) = xpLift x -- | Combine two picklers sequentially. -- -- If the first fails during -- unpickling, the whole unpickler fails xpSeq :: (b -> a) -> PU a -> (a -> PU b) -> PU b xpSeq f pa k = PU { appPickle = ( \ b -> let a = f b in appPickle pa a . appPickle (k a) b ) , appUnPickle = appUnPickle pa >>= (appUnPickle . k) , theSchema = undefined } -- | First apply a fixed pickler/unpickler, then a 2. one -- -- If the first fails during unpickling, the whole pickler fails. -- This can be used to check some properties of the input, e.g. whether -- a given fixed attribute or a namespace declaration exists ('xpAddFixedAttr', 'xpAddNSDecl') -- or to filter the input, e.g. to ignore some elements or attributes ('xpFilterCont', 'xpFilterAttr'). -- -- When pickling, this can be used to insert some fixed XML pieces, e.g. namespace declarations, -- class attributes or other stuff. xpSeq' :: PU () -> PU a -> PU a xpSeq' pa = xpWrap ( snd , \ y -> ((), y) ) . xpPair pa -- | combine two picklers with a choice -- -- Run two picklers in sequence like with xpSeq. -- If during unpickling the first one fails, -- an alternative pickler (first argument) is applied. -- This pickler is only used as combinator for unpickling. xpChoice :: PU b -> PU a -> (a -> PU b) -> Unpickler b xpChoice pb pa k = mchoice (appUnPickle pa) (appUnPickle . k) (appUnPickle pb) -- | map value into another domain and apply pickler there -- -- One of the most often used picklers. xpWrap :: (a -> b, b -> a) -> PU a -> PU b xpWrap (i, j) pa = (xpSeq j pa (xpLift . i)) { theSchema = theSchema pa } -- | like 'xpWrap', but if the inverse mapping is undefined, the unpickler fails -- -- Map a value into another domain. If the inverse mapping is -- undefined (Nothing), the unpickler fails -- -- Deprecated: Use xpWrapEither, this gives better error messages xpWrapMaybe :: (a -> Maybe b, b -> a) -> PU a -> PU b xpWrapMaybe (i, j) pa = (xpSeq j pa (xpLiftMaybe . i)) { theSchema = theSchema pa } -- | like 'xpWrap', but if the inverse mapping is undefined, the unpickler fails -- -- Map a value into another domain. If the inverse mapping is -- undefined, the unpickler fails with an error message in the Left component xpWrapEither :: (a -> Either String b, b -> a) -> PU a -> PU b xpWrapEither (i, j) pa = (xpSeq j pa (xpLiftEither . i)) { theSchema = theSchema pa } -- ------------------------------------------------------------ -- | pickle a pair of values sequentially -- -- Used for pairs or together with wrap for pickling -- algebraic data types with two components xpPair :: PU a -> PU b -> PU (a, b) xpPair pa pb = ( xpSeq fst pa (\ a -> xpSeq snd pb (\ b -> xpLift (a,b))) ) { theSchema = scSeq (theSchema pa) (theSchema pb) } -- | Like 'xpPair' but for triples xpTriple :: PU a -> PU b -> PU c -> PU (a, b, c) xpTriple pa pb pc = xpWrap (toTriple, fromTriple) (xpPair pa (xpPair pb pc)) where toTriple ~(a, ~(b, c)) = (a, b, c ) fromTriple ~(a, b, c ) = (a, (b, c)) -- | Like 'xpPair' and 'xpTriple' but for 4-tuples xp4Tuple :: PU a -> PU b -> PU c -> PU d -> PU (a, b, c, d) xp4Tuple pa pb pc pd = xpWrap (toQuad, fromQuad) (xpPair pa (xpPair pb (xpPair pc pd))) where toQuad ~(a, ~(b, ~(c, d))) = (a, b, c, d ) fromQuad ~(a, b, c, d ) = (a, (b, (c, d))) -- | Like 'xpPair' and 'xpTriple' but for 5-tuples xp5Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU (a, b, c, d, e) xp5Tuple pa pb pc pd pe = xpWrap (toQuint, fromQuint) (xpPair pa (xpPair pb (xpPair pc (xpPair pd pe)))) where toQuint ~(a, ~(b, ~(c, ~(d, e)))) = (a, b, c, d, e ) fromQuint ~(a, b, c, d, e ) = (a, (b, (c, (d, e)))) -- | Like 'xpPair' and 'xpTriple' but for 6-tuples xp6Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU (a, b, c, d, e, f) xp6Tuple pa pb pc pd pe pf = xpWrap (toSix, fromSix) (xpPair pa (xpPair pb (xpPair pc (xpPair pd (xpPair pe pf))))) where toSix ~(a, ~(b, ~(c, ~(d, ~(e, f))))) = (a, b, c, d, e, f ) fromSix ~(a, b, c, d, e, f) = (a, (b, (c, (d, (e, f))))) -- ------------------------------------------------------------ -- | Like 'xpPair' and 'xpTriple' but for 7-tuples -- -- Thanks to Tony Morris for doing xp7Tuple, ..., xp24Tuple. xp7Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU (a, b, c, d, e, f, g) xp7Tuple a b c d e f g = xpWrap ( \ (a, (b, c, d, e, f, g)) -> (a, b, c, d, e, f, g) , \ (a, b, c, d, e, f, g) -> (a, (b, c, d, e, f, g)) ) (xpPair a (xp6Tuple b c d e f g)) xp8Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU (a, b, c, d, e, f, g, h) xp8Tuple a b c d e f g h = xpWrap ( \ ((a, b), (c, d, e, f, g, h)) -> (a, b, c, d, e, f, g, h) , \ (a, b, c, d, e, f, g, h) -> ((a, b), (c, d, e, f, g, h)) ) (xpPair (xpPair a b) (xp6Tuple c d e f g h)) xp9Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU (a, b, c, d, e, f, g, h, i) xp9Tuple a b c d e f g h i = xpWrap ( \ ((a, b, c), (d, e, f, g, h, i)) -> (a, b, c, d, e, f, g, h, i) , \ (a, b, c, d, e, f, g, h, i) -> ((a, b, c), (d, e, f, g, h, i)) ) (xpPair (xpTriple a b c) (xp6Tuple d e f g h i)) xp10Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU (a, b, c, d, e, f, g, h, i, j) xp10Tuple a b c d e f g h i j = xpWrap ( \ ((a, b, c, d), (e, f, g, h, i, j)) -> (a, b, c, d, e, f, g, h, i, j) , \ (a, b, c, d, e, f, g, h, i, j) -> ((a, b, c, d), (e, f, g, h, i, j)) ) (xpPair (xp4Tuple a b c d) (xp6Tuple e f g h i j)) xp11Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU (a, b, c, d, e, f, g, h, i, j, k) xp11Tuple a b c d e f g h i j k = xpWrap ( \ ((a, b, c, d, e), (f, g, h, i, j, k)) -> (a, b, c, d, e, f, g, h, i, j, k) , \ (a, b, c, d, e, f, g, h, i, j, k) -> ((a, b, c, d, e), (f, g, h, i, j, k)) ) (xpPair (xp5Tuple a b c d e) (xp6Tuple f g h i j k)) xp12Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU (a, b, c, d, e, f, g, h, i, j, k, l) xp12Tuple a b c d e f g h i j k l = xpWrap ( \ ((a, b, c, d, e, f), (g, h, i, j, k, l)) -> (a, b, c, d, e, f, g, h, i, j, k, l) , \ (a, b, c, d, e, f, g, h, i, j, k, l) -> ((a, b, c, d, e, f), (g, h, i, j, k, l)) ) (xpPair (xp6Tuple a b c d e f) (xp6Tuple g h i j k l)) xp13Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m) xp13Tuple a b c d e f g h i j k l m = xpWrap ( \ (a, (b, c, d, e, f, g), (h, i, j, k, l, m)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m) -> (a, (b, c, d, e, f, g), (h, i, j, k, l, m)) ) (xpTriple a (xp6Tuple b c d e f g) (xp6Tuple h i j k l m)) xp14Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n) xp14Tuple a b c d e f g h i j k l m n = xpWrap ( \ ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n) -> ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n)) ) (xpTriple (xpPair a b) (xp6Tuple c d e f g h) (xp6Tuple i j k l m n)) xp15Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) xp15Tuple a b c d e f g h i j k l m n o = xpWrap ( \ ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) -> ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o)) ) (xpTriple (xpTriple a b c) (xp6Tuple d e f g h i) (xp6Tuple j k l m n o)) xp16Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) xp16Tuple a b c d e f g h i j k l m n o p = xpWrap ( \ ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) -> ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p)) ) (xpTriple (xp4Tuple a b c d) (xp6Tuple e f g h i j) (xp6Tuple k l m n o p)) xp17Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q) xp17Tuple a b c d e f g h i j k l m n o p q = xpWrap ( \ ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q) -> ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q)) ) (xpTriple (xp5Tuple a b c d e) (xp6Tuple f g h i j k) (xp6Tuple l m n o p q)) xp18Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r) xp18Tuple a b c d e f g h i j k l m n o p q r = xpWrap ( \ ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r) -> ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r)) ) (xpTriple (xp6Tuple a b c d e f) (xp6Tuple g h i j k l) (xp6Tuple m n o p q r)) xp19Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU s -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s) xp19Tuple a b c d e f g h i j k l m n o p q r s = xpWrap ( \ (a, (b, c, d, e, f, g), (h, i, j, k, l, m), (n, o, p, q, r, s)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s) -> (a, (b, c, d, e, f, g), (h, i, j, k, l, m), (n, o, p, q, r, s)) ) (xp4Tuple a (xp6Tuple b c d e f g) (xp6Tuple h i j k l m) (xp6Tuple n o p q r s)) xp20Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU s -> PU t -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) xp20Tuple a b c d e f g h i j k l m n o p q r s t = xpWrap ( \ ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n), (o, p, q, r, s, t)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> ((a, b), (c, d, e, f, g, h), (i, j, k, l, m, n), (o, p, q, r, s, t)) ) (xp4Tuple (xpPair a b) (xp6Tuple c d e f g h) (xp6Tuple i j k l m n) (xp6Tuple o p q r s t)) xp21Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU s -> PU t -> PU u -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u) xp21Tuple a b c d e f g h i j k l m n o p q r s t u = xpWrap ( \ ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o), (p, q, r, s, t, u)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u) -> ((a, b, c), (d, e, f, g, h, i), (j, k, l, m, n, o), (p, q, r, s, t, u)) ) (xp4Tuple (xpTriple a b c) (xp6Tuple d e f g h i) (xp6Tuple j k l m n o) (xp6Tuple p q r s t u)) xp22Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU s -> PU t -> PU u -> PU v -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v) xp22Tuple a b c d e f g h i j k l m n o p q r s t u v = xpWrap ( \ ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p), (q, r, s, t, u, v)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v) -> ((a, b, c, d), (e, f, g, h, i, j), (k, l, m, n, o, p), (q, r, s, t, u, v)) ) (xp4Tuple (xp4Tuple a b c d) (xp6Tuple e f g h i j) (xp6Tuple k l m n o p) (xp6Tuple q r s t u v)) xp23Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU s -> PU t -> PU u -> PU v -> PU w -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w) xp23Tuple a b c d e f g h i j k l m n o p q r s t u v w = xpWrap ( \ ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q), (r, s, t, u, v, w)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w) -> ((a, b, c, d, e), (f, g, h, i, j, k), (l, m, n, o, p, q), (r, s, t, u, v, w)) ) (xp4Tuple (xp5Tuple a b c d e) (xp6Tuple f g h i j k) (xp6Tuple l m n o p q) (xp6Tuple r s t u v w)) -- | Hopefully no one needs a xp25Tuple xp24Tuple :: PU a -> PU b -> PU c -> PU d -> PU e -> PU f -> PU g -> PU h -> PU i -> PU j -> PU k -> PU l -> PU m -> PU n -> PU o -> PU p -> PU q -> PU r -> PU s -> PU t -> PU u -> PU v -> PU w -> PU x -> PU (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x) xp24Tuple a b c d e f g h i j k l m n o p q r s t u v w x = xpWrap ( \ ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r), (s, t, u, v, w, x)) -> (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x) , \ (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x) -> ((a, b, c, d, e, f), (g, h, i, j, k, l), (m, n, o, p, q, r), (s, t, u, v, w, x)) ) (xp4Tuple (xp6Tuple a b c d e f) (xp6Tuple g h i j k l) (xp6Tuple m n o p q r) (xp6Tuple s t u v w x)) -- ------------------------------------------------------------ -- | Pickle a string into an XML text node -- -- One of the most often used primitive picklers. Attention: -- For pickling empty strings use 'xpText0'. If the text has a more -- specific datatype than xsd:string, use 'xpTextDT' xpText :: PU String xpText = xpTextDT scString1 {-# INLINE xpText #-} -- | Pickle a string into an XML text node -- -- Text pickler with a description of the structure of the text -- by a schema. A schema for a data type can be defined by 'Text.XML.HXT.Arrow.Pickle.Schema.scDT'. -- In 'Text.XML.HXT.Arrow.Pickle.Schema' there are some more functions for creating -- simple datatype descriptions. xpTextDT :: Schema -> PU String xpTextDT sc = PU { appPickle = putCont . XN.mkText , appUnPickle = do t <- getCont liftMaybe "xpText: XML text expected" $ XN.getText t , theSchema = sc } -- | Pickle a possibly empty string into an XML node. -- -- Must be used in all places, where empty strings are legal values. -- If the content of an element can be an empty string, this string disapears -- during storing the DOM into a document and reparse the document. -- So the empty text node becomes nothing, and the pickler must deliver an empty string, -- if there is no text node in the document. xpText0 :: PU String xpText0 = xpText0DT scString1 {-# INLINE xpText0 #-} -- | Pickle a possibly empty string with a datatype description into an XML node. -- -- Like 'xpText0' but with extra Parameter for datatype description as in 'xpTextDT'. xpText0DT :: Schema -> PU String xpText0DT sc = xpWrap (fromMaybe "", emptyToNothing) $ xpOption $ xpTextDT sc where emptyToNothing "" = Nothing emptyToNothing x = Just x -- | Pickle an arbitrary value by applyling show during pickling -- and read during unpickling. -- -- Real pickling is then done with 'xpText'. -- One of the most often used pimitive picklers. Applicable for all -- types which are instances of @Read@ and @Show@ xpPrim :: (Read a, Show a) => PU a xpPrim = xpWrapEither (readMaybe, show) xpText where readMaybe :: Read a => String -> Either String a readMaybe str = val (reads str) where val [(x,"")] = Right x val _ = Left $ "xpPrim: reading string " ++ show str ++ " failed" -- | Pickle an Int xpInt :: PU Int xpInt = xpWrapEither (readMaybe, show) xpText where readMaybe xs@(_:_) | all isDigit xs = Right . foldl' (\ r c -> 10 * r + (fromEnum c - fromEnum '0')) 0 $ xs readMaybe ('-' : xs) = fmap (0 -) . readMaybe $ xs readMaybe ('+' : xs) = readMaybe $ xs readMaybe xs = Left $ "xpInt: reading an Int from string " ++ show xs ++ " failed" -- ------------------------------------------------------------ -- | Pickle an XmlTree by just adding it -- -- Usefull for components of type XmlTree in other data structures xpTree :: PU XmlTree xpTree = PU { appPickle = putCont , appUnPickle = getCont , theSchema = Any } -- | Pickle a whole list of XmlTrees by just adding the list, unpickle is done by taking all element contents. -- -- This pickler should always be combined with 'xpElem' for taking the whole contents of an element. xpTrees :: PU [XmlTree] xpTrees = (xpList xpTree) { theSchema = Any } -- | Pickle a string representing XML contents by inserting the tree representation into the XML document. -- -- Unpickling is done by converting the contents with -- 'Text.XML.HXT.Arrow.Edit.xshowEscapeXml' into a string, -- this function will escape all XML special chars, such that pickling the value back becomes save. -- Pickling is done with 'Text.XML.HXT.Arrow.ReadDocument.xread' xpXmlText :: PU String xpXmlText = xpWrap ( showXML, readXML ) $ xpTrees where showXML = concat . runLA ( xshowEscapeXml unlistA ) readXML = runLA xread -- ------------------------------------------------------------ -- | Encoding of optional data by ignoring the Nothing case during pickling -- and relying on failure during unpickling to recompute the Nothing case -- -- The default pickler for Maybe types xpOption :: PU a -> PU (Maybe a) xpOption pa = PU { appPickle = ( \ a -> case a of Nothing -> id Just x -> appPickle pa x ) , appUnPickle = xpChoice (xpLift Nothing) pa (xpLift . Just) , theSchema = scOption (theSchema pa) } -- | Optional conversion with default value -- -- The default value is not encoded in the XML document, -- during unpickling the default value is inserted if the pickler fails xpDefault :: (Eq a) => a -> PU a -> PU a xpDefault df = xpWrap ( fromMaybe df , \ x -> if x == df then Nothing else Just x ) . xpOption -- ------------------------------------------------------------ -- | Encoding of list values by pickling all list elements sequentially. -- -- Unpickler relies on failure for detecting the end of the list. -- The standard pickler for lists. Can also be used in combination with 'xpWrap' -- for constructing set and map picklers xpList :: PU a -> PU [a] xpList pa = PU { appPickle = ( \ a -> case a of [] -> id _:_ -> appPickle pc a ) , appUnPickle = xpChoice (xpLift []) pa (\ x -> xpSeq id (xpList pa) (\xs -> xpLift (x:xs))) , theSchema = scList (theSchema pa) } where pc = xpSeq head pa (\ x -> xpSeq tail (xpList pa) (\ xs -> xpLift (x:xs) )) -- | Encoding of a none empty list of values -- -- Attention: when calling this pickler with an empty list, -- an internal error \"head of empty list is raised\". xpList1 :: PU a -> PU [a] xpList1 pa = ( xpWrap (\ (x, xs) -> x : xs ,\ x -> (head x, tail x) ) $ xpPair pa (xpList pa) ) { theSchema = scList1 (theSchema pa) } -- ------------------------------------------------------------ -- | Standard pickler for maps -- -- This pickler converts a map into a list of pairs. -- All key value pairs are mapped to an element with name (1.arg), -- the key is encoded as an attribute named by the 2. argument, -- the 3. arg is the pickler for the keys, the last one for the values xpMap :: Ord k => String -> String -> PU k -> PU v -> PU (Map k v) xpMap en an xpk xpv = xpWrap ( M.fromList , M.toList ) $ xpList $ xpElem en $ xpPair ( xpAttr an $ xpk ) xpv -- ------------------------------------------------------------ -- | Pickler for sum data types. -- -- Every constructor is mapped to an index into the list of picklers. -- The index is used only during pickling, not during unpickling, there the 1. match is taken xpAlt :: (a -> Int) -> [PU a] -> PU a xpAlt tag ps = PU { appPickle = \ a -> appPickle (ps !! tag a) a , appUnPickle = case ps of [] -> throwMsg "xpAlt: no matching unpickler found for a sum datatype" pa:ps1 -> xpChoice (xpAlt tag ps1) pa xpLift , theSchema = scAlts (map theSchema ps) } -- ------------------------------------------------------------ -- | Pickler for wrapping\/unwrapping data into an XML element -- -- Extra parameter is the element name given as a QName. THE pickler for constructing -- nested structures -- -- Example: -- -- > xpElemQN (mkName "number") $ xpickle -- -- will map an (42::Int) onto -- -- > 42 xpElemQN :: QName -> PU a -> PU a xpElemQN qn pa = PU { appPickle = ( \ a -> let st' = appPickle pa a emptySt in putCont (XN.mkElement qn (attributes st') (contents st')) ) , appUnPickle = upElem , theSchema = scElem (qualifiedName qn) (theSchema pa) } where upElem = do t <- getCont n <- liftMaybe "xpElem: XML element expected" $ XN.getElemName t if n /= qn then throwMsg ("xpElem: got element name " ++ show n ++ ", but expected " ++ show qn) else do l <- gets nesting liftUnpickleVal $ unpickleElem' (xpCheckEmpty pa) (l + 1) t -- | convenient Pickler for xpElemQN -- -- > xpElem n = xpElemQN (mkName n) xpElem :: String -> PU a -> PU a xpElem = xpElemQN . mkName -- | convenient Pickler for xpElemQN -- for pickling elements with respect to namespaces -- -- > xpElemNS ns px lp = xpElemQN (mkQName px lp ns) xpElemNS :: String -> String -> String -> PU a -> PU a xpElemNS ns px lp = xpElemQN $ mkQName px lp ns -- ------------------------------------------------------------ -- | Pickler for wrapping\/unwrapping data into an XML element with an attribute with given value -- -- To make XML structures flexible but limit the number of different elements, it's sometimes -- useful to use a kind of generic element with a key value structure -- -- Example: -- -- > value1 -- > value2 -- > value3 -- -- the Haskell datatype may look like this -- -- > type T = T { key1 :: Int ; key2 :: String ; key3 :: Double } -- -- Then the picker for that type looks like this -- -- > xpT :: PU T -- > xpT = xpWrap ( uncurry3 T, \ t -> (key1 t, key2 t, key3 t) ) $ -- > xpTriple (xpElemWithAttrValue "attr" "name" "key1" $ xpickle) -- > (xpElemWithAttrValue "attr" "name" "key2" $ xpText0) -- > (xpElemWithAttrValue "attr" "name" "key3" $ xpickle) xpElemWithAttrValue :: String -> String -> String -> PU a -> PU a xpElemWithAttrValue name an av pa = xpElem name $ xpAddFixedAttr an av $ pa -- ------------------------------------------------------------ -- | Pickler for storing\/retreiving data into\/from an attribute value -- -- The attribute is inserted in the surrounding element constructed by the 'xpElem' pickler xpAttrQN :: QName -> PU a -> PU a xpAttrQN qn pa = PU { appPickle = ( \ a -> let st' = appPickle pa a emptySt in putAtt qn (contents st') ) , appUnPickle = upAttr , theSchema = scAttr (qualifiedName qn) (theSchema pa) } where upAttr = do a <- getAtt qn l <- gets nesting liftUnpickleVal $ unpickleElem' (xpCheckEmptyContents pa) l a -- | convenient Pickler for xpAttrQN -- -- > xpAttr n = xpAttrQN (mkName n) xpAttr :: String -> PU a -> PU a xpAttr = xpAttrQN . mkName -- | convenient Pickler for xpAttrQN -- -- > xpAttr ns px lp = xpAttrQN (mkQName px lp ns) xpAttrNS :: String -> String -> String -> PU a -> PU a xpAttrNS ns px lp = xpAttrQN (mkQName px lp ns) -- | A text attribute. xpTextAttr :: String -> PU String xpTextAttr = flip xpAttr xpText -- | Add an optional attribute for an optional value (Maybe a). xpAttrImplied :: String -> PU a -> PU (Maybe a) xpAttrImplied name pa = xpOption $ xpAttr name pa xpAttrFixed :: String -> String -> PU () xpAttrFixed name val = ( xpWrapEither ( \ v -> if v == val then Right () else Left ( "xpAttrFixed: value " ++ show val ++ " expected, but got " ++ show v ) , const val ) $ xpAttr name xpText ) { theSchema = scAttr name (scFixed val) } -- | Add/Check an attribute with a fixed value. -- xpAddFixedAttr :: String -> String -> PU a -> PU a xpAddFixedAttr name val = xpSeq' $ xpAttrFixed name val -- | Add a namespace declaration. -- -- When generating XML the namespace decl is added, -- when reading a document, the unpickler checks -- whether there is a namespace declaration for the given -- namespace URI (2. arg) xpAddNSDecl :: String -> String -> PU a -> PU a xpAddNSDecl name val = xpSeq' $ xpAttrNSDecl name' val where name' | null name = "xmlns" | otherwise = "xmlns:" ++ name xpAttrNSDecl :: String -> String -> PU () xpAttrNSDecl name ns = PU { appPickle = const $ putAtt (mkName name) [XN.mkText ns] , appUnPickle = getNSAtt ns , theSchema = scAttr name (scFixed ns) } -- ------------------------------------------------------------ xpIgnoreCont :: LA XmlTree XmlTree -> PU () xpIgnoreCont = xpIgnoreInput $ \ mf s -> s {contents = mf $ contents s} xpIgnoreAttr :: LA XmlTree XmlTree -> PU () xpIgnoreAttr = xpIgnoreInput $ \ mf s -> s {attributes = mf $ attributes s} -- | When unpickling, filter the contents of the element currently processed, -- before applying the pickler argument -- -- Maybe useful to ignore some stuff in the input, or to do some cleanup before unpickling. xpFilterCont :: LA XmlTree XmlTree -> PU a -> PU a xpFilterCont f = xpSeq' $ xpIgnoreCont f -- | Same as 'xpFilterCont' but for the attribute list of the element currently processed. -- -- Maybe useful to ignore some stuff in the input, e.g. class attributes, or to do some cleanup before unpickling. xpFilterAttr :: LA XmlTree XmlTree -> PU a -> PU a xpFilterAttr f = xpSeq' $ xpIgnoreAttr f xpIgnoreInput :: (([XmlTree] -> [XmlTree]) -> St -> St) -> LA XmlTree XmlTree -> PU () xpIgnoreInput m f = PU { appPickle = const id , appUnPickle = do modify (m filterCont) return () , theSchema = scNull } where filterCont = runLA (unlistA >>> f) -- ------------------------------------------------------------ -- | The class for overloading 'xpickle', the default pickler class XmlPickler a where xpickle :: PU a instance XmlPickler Int where xpickle = xpPrim instance XmlPickler Integer where xpickle = xpPrim {- no instance of XmlPickler Char because then every text would be encoded char by char, because of the instance for lists instance XmlPickler Char where xpickle = xpPrim -} instance XmlPickler () where xpickle = xpUnit instance (XmlPickler a, XmlPickler b) => XmlPickler (a,b) where xpickle = xpPair xpickle xpickle instance (XmlPickler a, XmlPickler b, XmlPickler c) => XmlPickler (a,b,c) where xpickle = xpTriple xpickle xpickle xpickle instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d) => XmlPickler (a,b,c,d) where xpickle = xp4Tuple xpickle xpickle xpickle xpickle instance (XmlPickler a, XmlPickler b, XmlPickler c, XmlPickler d, XmlPickler e) => XmlPickler (a,b,c,d,e) where xpickle = xp5Tuple xpickle xpickle xpickle xpickle xpickle instance XmlPickler a => XmlPickler [a] where xpickle = xpList xpickle instance XmlPickler a => XmlPickler (Maybe a) where xpickle = xpOption xpickle -- ------------------------------------------------------------ {- begin embeded test cases -- ------------------------------------------------------------ -- -- a somewhat complex data structure -- for representing programs of a simple -- imperative language type Program = Stmt type StmtList = [Stmt] data Stmt = Assign Ident Expr | Stmts StmtList | If Expr Stmt (Maybe Stmt) | While Expr Stmt deriving (Eq, Show) type Ident = String data Expr = IntConst Int | BoolConst Bool | Var Ident | UnExpr UnOp Expr | BinExpr Op Expr Expr deriving (Eq, Show) data Op = Add | Sub | Mul | Div | Mod | Eq | Neq deriving (Eq, Ord, Enum, Show) data UnOp = UPlus | UMinus | Neg deriving (Eq, Ord, Read, Show) -- ------------------------------------------------------------ -- -- the pickler definition for the data types -- the main pickler xpProgram :: PU Program xpProgram = xpElem "program" $ xpAddNSDecl "" "program42" $ xpickle xpMissingRootElement :: PU Program xpMissingRootElement = xpickle instance XmlPickler UnOp where xpickle = xpPrim instance XmlPickler Op where xpickle = xpWrap (toEnum, fromEnum) xpPrim instance XmlPickler Expr where xpickle = xpAlt tag ps where tag (IntConst _ ) = 0 tag (BoolConst _ ) = 1 tag (Var _ ) = 2 tag (UnExpr _ _ ) = 3 tag (BinExpr _ _ _ ) = 4 ps = [ xpWrap ( IntConst , \ (IntConst i ) -> i ) $ ( xpElem "int" $ xpAttr "value" $ xpickle ) , xpWrap ( BoolConst , \ (BoolConst b) -> b ) $ ( xpElem "bool" $ xpAttr "value" $ xpWrap (toEnum, fromEnum) xpickle ) , xpWrap ( Var , \ (Var n) -> n ) $ ( xpElem "var" $ xpAttr "name" $ xpText ) , xpWrap ( uncurry UnExpr , \ (UnExpr op e) -> (op, e) ) $ ( xpElem "unex" $ xpPair (xpAttr "op" xpickle) xpickle ) , xpWrap ( uncurry3 $ BinExpr , \ (BinExpr op e1 e2) -> (op, e1, e2) ) $ ( xpElem "binex" $ xpTriple (xpAttr "op" xpickle) xpickle xpickle ) ] instance XmlPickler Stmt where xpickle = xpAlt tag ps where tag ( Assign _ _ ) = 0 tag ( Stmts _ ) = 1 tag ( If _ _ _ ) = 2 tag ( While _ _ ) = 3 ps = [ xpWrap ( uncurry Assign , \ (Assign n v) -> (n, v) ) $ ( xpElem "assign" $ xpFilterCont (neg $ hasName "comment" <+> isText) $ -- test case test7: remove uninteresting stuff xpPair (xpAttr "name" xpText) xpickle ) , xpWrap ( Stmts , \ (Stmts sl) -> sl ) $ ( xpElem "block" $ xpList xpickle ) , xpWrap ( uncurry3 If , \ (If c t e) -> (c, t, e) ) $ ( xpElem "if" $ xpTriple xpickle xpickle xpickle ) , xpWrap ( uncurry While , \ (While c b) -> (c, b) ) $ ( xpElem "while" $ xpPair xpickle xpickle ) ] -- ------------------------------------------------------------ -- -- example programs progs :: [Program] progs = [p0, p1, p2] p0, p1, p2 :: Program p0 = Stmts [] -- the empty program p1 = Stmts [ Assign i ( UnExpr UMinus ( IntConst (-22) ) ) , Assign j ( IntConst 20 ) , While ( BinExpr Neq ( Var i ) ( IntConst 0 ) ) ( Stmts [ Assign i ( BinExpr Sub ( Var i ) ( IntConst 1 ) ) , Assign j ( BinExpr Add ( Var j ) ( IntConst 1 ) ) , If ( IntConst 0 ) (Stmts []) Nothing ] ) ] where i = "i" j = "j" p2 = Stmts [ Assign x (IntConst 6) , Assign y (IntConst 7) , Assign p (IntConst 0) , While ( BinExpr Neq (Var x) (IntConst 0) ) ( If ( BinExpr Neq ( BinExpr Mod (Var x) (IntConst 2) ) (IntConst 0) ) ( Stmts [ Assign x ( BinExpr Sub (Var x) (IntConst 1) ) , Assign p ( BinExpr Add (Var p) (Var y) ) ] ) ( Just ( Stmts [ Assign x ( BinExpr Div (Var x) (IntConst 2) ) , Assign y ( BinExpr Mul (Var y) (IntConst 2) ) ] ) ) ) ] where x = "x" y = "y" p = "p" -- ------------------------------------------------------------ test0 = putStrLn . head . runLA ( xshow (arr (pickleDoc xpProgram) >>> getChildren ) ) test0' f = runLA ( xshow (arr (pickleDoc xpProgram) >>> getChildren ) >>> root [] [xread] >>> f ) test1' f = runLA ( xshow (arr (pickleDoc xpProgram) >>> getChildren ) >>> root [] [xread] >>> f >>> arr (unpickleDoc' xpProgram) ) test1 = test0' (processTopDown (setQName (mkName "real") `X.when` hasName "int")) test2 = test1' this test3 = test1' (processTopDown (setQName (mkName "real") `X.when` hasName "int")) test4 = test1' (processTopDown (setQName (mkName "xxx") `X.when` hasName "program")) test5 = test1' (processTopDown (setQName (mkName "xxx") `X.when` hasName "assign")) test6 = test1' (processTopDownWithAttrl (txt "xxx" `X.when` hasText (== "UMinus"))) test7 = test1' (processTopDown (insertComment `X.when` hasName "assign")) where insertComment = replaceChildren (getChildren <+> eelem "comment" <+> txt "zzz") -- ------------------------------------------------------------ -- end embeded test cases -} hxt-9.3.1.18/src/Text/XML/HXT/Arrow/ProcessDocument.hs0000644000000000000000000002741012474566610020344 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ProcessDocument Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Compound arrows for reading, parsing, validating and writing XML documents All arrows use IO and a global state for options, errorhandling, ... -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ProcessDocument ( parseXmlDocument , parseXmlDocumentWithExpat , parseHtmlDocument , validateDocument , propagateAndValidateNamespaces , andValidateNamespaces , getDocumentContents ) where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow (fromLA) import Control.Arrow.NTreeEdit import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.ParserInterface (parseHtmlDoc, parseXmlDoc) import Text.XML.HXT.Arrow.Edit (substAllXHTMLEntityRefs, transfAllCharRef) import Text.XML.HXT.Arrow.GeneralEntitySubstitution (processGeneralEntities) import Text.XML.HXT.Arrow.DTDProcessing (processDTD) import Text.XML.HXT.Arrow.DocumentInput (getXmlContents) import Text.XML.HXT.Arrow.Namespace (propagateNamespaces, validateNamespaces) import Text.XML.HXT.DTDValidation.Validation (generalEntitiesDefined, getDTDSubset, transform, validate) -- ------------------------------------------------------------ {- | XML parser Input tree must be a root tree with a text tree as child containing the document to be parsed. The parser generates from the input string a tree of a wellformed XML document, processes the DTD (parameter substitution, conditional DTD parts, ...) and substitutes all general entity references. Next step is character reference substitution. Last step is the document validation. Validation can be controlled by an extra parameter. Example: > parseXmlDocument True -- parse and validate document > > parseXmlDocument False -- only parse document, don't validate This parser is useful for applications processing correct XML documents. -} parseXmlDocument :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree parseXmlDocument validateD substDTD substHTML validateRX = ( replaceChildren ( ( getAttrValue a_source &&& xshow getChildren ) >>> parseXmlDoc >>> filterErrorMsg ) >>> setDocumentStatusFromSystemState "parse XML document" >>> ( ifA (fromLA getDTDSubset) ( processDTDandEntities >>> ( if validate' -- validation only possible if there is a DTD then validateDocument else this ) ) ( if validate' -- validation only consists of checking -- for undefined entity refs -- predefined XML entity refs are substituted -- in the XML parser into char refs -- so there is no need for an entity substitution then traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs" >>> perform checkUndefinedEntityRefs >>> traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs done" >>> setDocumentStatusFromSystemState "decoding document" else this ) ) ) `when` documentStatusOk where validate' = validateD && not validateRX processDTDandEntities = ( if validateD || substDTD then processDTD else this ) >>> ( if substDTD then ( processGeneralEntities -- DTD contains general entity definitions `when` fromLA generalEntitiesDefined ) else if substHTML then substAllXHTMLEntityRefs else this ) >>> transfAllCharRef checkUndefinedEntityRefs :: IOStateArrow s XmlTree XmlTree checkUndefinedEntityRefs = deep isEntityRef >>> getEntityRef >>> arr (\ en -> "general entity reference \"&" ++ en ++ ";\" is undefined") >>> mkError c_err >>> filterErrorMsg -- ------------------------------------------------------------ parseXmlDocumentWithExpat :: IOStateArrow s XmlTree XmlTree parseXmlDocumentWithExpat = ( withoutUserState $< getSysVar theExpatParser ) `when` documentStatusOk -- ------------------------------------------------------------ {- | HTML parser Input tree must be a root tree with a text tree as child containing the document to be parsed. The parser tries to parse everything as HTML, if the HTML document is not wellformed XML or if errors occur, warnings are generated. The warnings can be issued, or suppressed. Example: @ parseHtmlDocument True @ : parse document and issue warnings This parser is useful for applications like web crawlers, where the pages may contain arbitray errors, but the application is only interested in parts of the document, e.g. the plain text. -} parseHtmlDocument :: IOStateArrow s XmlTree XmlTree parseHtmlDocument = ( perform ( getAttrValue a_source >>> traceValue 1 (("parseHtmlDoc: parse HTML document " ++) . show) ) >>> ( parseHtml $< getSysVar (theTagSoup .&&&. theExpat) ) >>> ( removeWarnings $< getSysVar (theWarnings .&&&. theTagSoup) ) >>> setDocumentStatusFromSystemState "parse HTML document" >>> traceTree >>> traceSource >>> perform ( getAttrValue a_source >>> traceValue 1 (\ src -> "parse HTML document " ++ show src ++ " finished") ) ) `when` documentStatusOk where parseHtml (withTagSoup', withExpat') | withExpat' = withoutUserState $< getSysVar theExpatParser | withTagSoup' = withoutUserState $< getSysVar theTagSoupParser | otherwise = traceMsg 1 ("parse document with parsec HTML parser") >>> replaceChildren ( ( getAttrValue a_source -- get source name &&& xshow getChildren ) -- get string to be parsed >>> parseHtmlDoc -- run parser, entity substituion is done in parser ) removeWarnings (warnings, withTagSoup') | warnings = processTopDownWithAttrl -- remove warnings inserted by parser and entity subst filterErrorMsg | withTagSoup' = this -- warnings are not generated in tagsoup | otherwise = fromLA $ editNTreeA [isError :-> none] -- remove all warnings from document -- ------------------------------------------------------------ {- | Document validation Input must be a complete document tree. The document is validated with respect to the DTD spec. Only useful for XML documents containing a DTD. If the document is valid, it is transformed with respect to the DTD, normalization of attribute values, adding default values, sorting attributes by name,... If no error was found, result is the normalized tree, else the error status is set in the list of attributes of the root node \"\/\" and the document content is removed from the tree. -} validateDocument :: IOStateArrow s XmlTree XmlTree validateDocument = ( traceMsg 1 "validating document" >>> perform ( validateDoc >>> filterErrorMsg ) >>> setDocumentStatusFromSystemState "document validation" >>> traceMsg 1 "document validated, transforming doc with respect to DTD" >>> transformDoc >>> traceMsg 1 "document transformed" >>> traceSource >>> traceTree ) `when` documentStatusOk -- ------------------------------------------------------------ {- | Namespace propagation Input must be a complete document tree. The namespace declarations are evaluated and all element and attribute names are processed by splitting the name into prefix, local part and namespace URI. Naames are checked with respect to the XML namespace definition If no error was found, result is the unchanged input tree, else the error status is set in the list of attributes of the root node \"\/\" and the document content is removed from the tree. -} propagateAndValidateNamespaces :: IOStateArrow s XmlTree XmlTree propagateAndValidateNamespaces = ( traceMsg 1 "propagating namespaces" >>> propagateNamespaces >>> traceDoc "propagating namespaces done" >>> andValidateNamespaces ) `when` documentStatusOk andValidateNamespaces :: IOStateArrow s XmlTree XmlTree andValidateNamespaces = ( traceMsg 1 "validating namespaces" >>> ( setDocumentStatusFromSystemState "namespace propagation" `when` ( validateNamespaces >>> perform filterErrorMsg ) ) >>> traceMsg 1 "namespace validation finished" ) `when` documentStatusOk -- ------------------------------------------------------------ {- | creates a new document root, adds all options as attributes to the document root and calls 'getXmlContents'. If the document name is the empty string, the document will be read from standard input. For supported protocols see 'Text.XML.HXT.Arrow.DocumentInput.getXmlContents' -} getDocumentContents :: String -> IOStateArrow s b XmlTree getDocumentContents src = root [] [] >>> addAttr a_source src >>> traceMsg 1 ("readDocument: start processing document " ++ show src) >>> getXmlContents -- ------------------------------------------------------------ validateDoc :: ArrowList a => a XmlTree XmlTree validateDoc = fromLA ( validate `when` getDTDSubset -- validate only when DTD decl is present ) transformDoc :: ArrowList a => a XmlTree XmlTree transformDoc = fromLA transform -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/ReadDocument.hs0000644000000000000000000005110612474566610017600 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ReadDocument Copyright : Copyright (C) 2005-2013 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Compound arrows for reading an XML\/HTML document or an XML\/HTML string -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ReadDocument ( readDocument , readFromDocument , readString , readFromString , hread , hreadDoc , xread , xreadDoc ) where import Control.Arrow.ListArrows import Data.Maybe ( fromMaybe ) import qualified Data.Map as M import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes , canonicalizeForXPath , canonicalizeContents , rememberDTDAttrl , removeDocWhiteSpace ) import qualified Text.XML.HXT.Arrow.ParserInterface as PI import Text.XML.HXT.Arrow.ProcessDocument ( getDocumentContents , parseXmlDocument , parseXmlDocumentWithExpat , parseHtmlDocument , propagateAndValidateNamespaces , andValidateNamespaces ) import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- {- | the main document input filter this filter can be configured by a list of configuration options, a value of type 'Text.XML.HXT.XmlState.TypeDefs.SysConfig' for all available options see module 'Text.XML.HXT.Arrow.XmlState.SystemConfig' - @withValidate yes\/no@ : switch on\/off DTD validation. Only for XML parsed documents, not for HTML parsing. - @withSubstDTDEntities yes\/no@ : switch on\/off entity substitution for general entities defined in DTD validation. Default is @yes@. Switching this option and the validation off can lead to faster parsing, in that case reading the DTD documents is not longer necessary. Only used with XML parsed documents, not with HTML parsing. - @withSubstHTMLEntities yes\/no@ : switch on\/off entity substitution for general entities defined in HTML validation. Default is @no@. Switching this option on and the validation and substDTDEntities off can lead to faster parsing, in that case reading the DTD documents is not longer necessary, HTML general entities are still substituted. Only used with XML parsed documents, not with HTML parsing. - @withParseHTML yes\/no@ : switch on HTML parsing. - @withParseByMimeType yes\/no@ : select XML\/HTML parser by document mime type. text\/xml and text\/xhtml are parsed as XML, text\/html as HTML. - @withCheckNamespaces yes\/no@ : Switch on\/off namespace propagation and checking - @withInputEncoding \@ : Set default encoding. - @withTagSoup@ : use light weight and lazy parser based on tagsoup lib. This is only available when package hxt-tagsoup is installed and the source contains an @import Text.XML.HXT.TagSoup@. - @withRelaxNG \@ : validate document with Relax NG, the parameter is for the schema URI. This implies using XML parser, no validation against DTD, and canonicalisation. - @withCurl [\...]@ : Use the libCurl binding for HTTP access. This is only available when package hxt-curl is installed and the source contains an @import Text.XML.HXT.Curl@. - @withHTTP [\...]@ : Use the Haskell HTTP package for HTTP access. This is only available when package hxt-http is installed and the source contains an @import Text.XML.HXT.HTTP@. examples: > readDocument [] "test.xml" reads and validates a document \"test.xml\", no namespace propagation, only canonicalization is performed > ... > import Text.XML.HXT.Curl > ... > > readDocument [ withValidate no > , withInputEncoding isoLatin1 > , withParseByMimeType yes > , withCurl [] > ] "http://localhost/test.php" reads document \"test.php\", parses it as HTML or XML depending on the mimetype given from the server, but without validation, default encoding 'isoLatin1'. HTTP access is done via libCurl. > readDocument [ withParseHTML yes > , withInputEncoding isoLatin1 > ] "" reads a HTML document from standard input, no validation is done when parsing HTML, default encoding is 'isoLatin1', > readDocument [ withInputEncoding isoLatin1 > , withValidate no > , withMimeTypeFile "/etc/mime.types" > , withStrictInput yes > ] "test.svg" reads an SVG document from \"test.svg\", sets the mime type by looking in the system mimetype config file, default encoding is 'isoLatin1', > ... > import Text.XML.HXT.Curl > import Text.XML.HXT.TagSoup > ... > > readDocument [ withParseHTML yes > , withTagSoup > , withProxy "www-cache:3128" > , withCurl [] > , withWarnings no > ] "http://www.haskell.org/" reads Haskell homepage with HTML parser, ignoring any warnings (at the time of writing, there were some HTML errors), with http access via libCurl interface and proxy \"www-cache\" at port 3128, parsing is done with tagsoup HTML parser. This requires packages \"hxt-curl\" and \"hxt-tagsoup\" to be installed > readDocument [ withValidate yes > , withCheckNamespaces yes > , withRemoveWS yes > , withTrace 2 > , withHTTP [] > ] "http://www.w3c.org/" read w3c home page (xhtml), validate and check namespaces, remove whitespace between tags, trace activities with level 2. HTTP access is done with Haskell HTTP package > readDocument [ withValidate no > , withSubstDTDEntities no > ... > ] "http://www.w3c.org/" read w3c home page (xhtml), but without accessing the DTD given in that document. Only the predefined XML general entity refs are substituted. > readDocument [ withValidate no > , withSubstDTDEntities no > , withSubstHTMLEntities yes > ... > ] "http://www.w3c.org/" same as above, but with substituion of all general entity refs defined in XHTML. for minimal complete examples see 'Text.XML.HXT.Arrow.WriteDocument.writeDocument' and 'runX', the main starting point for running an XML arrow. -} readDocument :: SysConfigList -> String -> IOStateArrow s b XmlTree readDocument config src = localSysEnv $ readDocument' config src readDocument' :: SysConfigList -> String -> IOStateArrow s b XmlTree readDocument' config src = configSysVars config >>> readD $< getSysVar theWithCache where readD True = constA undefined -- just for generalizing the signature to: IOStateArrow s b XmlTree >>> -- instead of IOStateArrow s XmlTree XmlTree (withoutUserState $< (getSysVar theCacheRead >>^ ($ src))) readD False = readDocument'' src readDocument'' :: String -> IOStateArrow s b XmlTree readDocument'' src = getDocumentContents src >>> ( processDoc $<< ( getMimeType &&& getSysVar (theParseByMimeType .&&&. theParseHTML .&&&. theAcceptedMimeTypes .&&&. theRelaxValidate .&&&. theXmlSchemaValidate ) ) ) >>> traceMsg 1 ("readDocument: " ++ show src ++ " processed") >>> traceSource >>> traceTree where processNoneEmptyDoc p = ifA (fromLA hasEmptyBody) (replaceChildren none) p where hasEmptyBody = hasAttrValue transferStatus (/= "200") -- test on empty response body for not o.k. responses `guards` -- e.g. 3xx status values ( neg getChildren <+> ( getChildren >>> isWhiteSpace ) ) getMimeType = getAttrValue transferMimeType >>^ stringToLower applyMimeTypeHandler mt = withoutUserState (applyMTH $< getSysVar theMimeTypeHandlers) where applyMTH mtTable = fromMaybe none $ fmap (\ f -> processNoneEmptyDoc (traceMimeStart >>> f >>> traceMimeEnd) ) $ M.lookup mt mtTable traceMimeStart = traceMsg 2 $ "readDocument: calling user defined document parser" traceMimeEnd = traceMsg 2 $ "readDocument: user defined document parser finished" processDoc mimeType options = traceMsg 1 (unwords [ "readDocument:", show src , "(mime type:", show mimeType, ") will be processed" ] ) >>> ( applyMimeTypeHandler mimeType -- try user defined document handlers `orElse` processDoc' mimeType options ) processDoc' mimeType ( parseByMimeType , ( parseHtml , ( acceptedMimeTypes , ( validateWithRelax , validateWithXmlSchema )))) = ( if isAcceptedMimeType acceptedMimeTypes mimeType then ( processNoneEmptyDoc ( ( parse $< getSysVar (theValidate .&&&. theSubstDTDEntities .&&&. theSubstHTMLEntities .&&&. theIgnoreNoneXmlContents .&&&. theTagSoup .&&&. theExpat ) ) >>> ( if isXmlOrHtml then ( ( checknamespaces $< getSysVar (theCheckNamespaces .&&&. theTagSoup ) ) >>> rememberDTDAttrl >>> ( canonicalize $< getSysVar (thePreserveComment .&&&. theCanonicalize .&&&. theTagSoup ) ) >>> ( whitespace $< getSysVar (theRemoveWS .&&&. theTagSoup ) ) >>> relaxOrXmlSchema ) else this ) ) ) else ( traceMsg 1 (unwords [ "readDocument:", show src , "mime type:", show mimeType, "not accepted"]) >>> replaceChildren none -- remove contents of not accepted mimetype ) ) where isAcceptedMimeType :: [String] -> String -> Bool isAcceptedMimeType mts mt | null mts || null mt = True | otherwise = foldr (matchMt mt') False $ mts' where mt' = parseMt mt mts' = map parseMt $ mts parseMt = break (== '/') >>> second (drop 1) matchMt (ma,mi) (mas,mis) r = ( (ma == mas || mas == "*") && (mi == mis || mis == "*") ) || r parse ( validate , ( substDTD , ( substHTML , ( removeNoneXml , ( withTagSoup' , withExpat' ))))) | not isXmlOrHtml = if removeNoneXml then replaceChildren none -- don't parse, if mime type is not XML nor HTML else this -- but remove contents when option is set | isHtml || withTagSoup' = configSysVar (setS theLowerCaseNames isHtml) >>> parseHtmlDocument -- parse as HTML or with tagsoup XML | isXml = if withExpat' then parseXmlDocumentWithExpat else parseXmlDocument validate substDTD substHTML validateWithRelax -- parse as XML | otherwise = this -- suppress warning checknamespaces (withNamespaces, withTagSoup') | withNamespaces && withTagSoup' = andValidateNamespaces -- propagation is done in tagsoup | withNamespaces || validateWithRelax || validateWithXmlSchema = propagateAndValidateNamespaces -- RelaxNG and XML Schema require correct namespaces | otherwise = this canonicalize (preserveCmt, (canonicalize', withTagSoup')) | withTagSoup' = this -- tagsoup already removes redundant stuff | validateWithRelax || validateWithXmlSchema = canonicalizeAllNodes -- no comments in schema validation | canonicalize' && preserveCmt = canonicalizeForXPath | canonicalize' = canonicalizeAllNodes | otherwise = this relaxOrXmlSchema | validateWithXmlSchema = withoutUserState $< getSysVar theXmlSchemaValidator | validateWithRelax = withoutUserState $< getSysVar theRelaxValidator | otherwise = this whitespace (removeWS, withTagSoup') | ( removeWS || validateWithXmlSchema -- XML Schema does not like WS ) && not withTagSoup' = removeDocWhiteSpace -- tagsoup already removes whitespace | otherwise = this isHtml = ( not parseByMimeType && parseHtml ) -- force HTML || ( parseByMimeType && isHtmlMimeType mimeType ) isXml = ( not parseByMimeType && not parseHtml ) || ( parseByMimeType && ( isXmlMimeType mimeType || null mimeType ) -- mime type is XML or not known ) isXmlOrHtml = isHtml || isXml -- ------------------------------------------------------------ -- | -- the arrow version of 'readDocument', the arrow input is the source URI readFromDocument :: SysConfigList -> IOStateArrow s String XmlTree readFromDocument config = applyA ( arr $ readDocument config ) -- ------------------------------------------------------------ -- | -- read a document that is stored in a normal Haskell String -- -- the same function as readDocument, but the parameter forms the input. -- All options available for 'readDocument' are applicable for readString, -- except input encoding options. -- -- Encoding: No decoding is done, the String argument is taken as Unicode string -- All decoding must be done before calling readString, even if the -- XML document contains an encoding spec. readString :: SysConfigList -> String -> IOStateArrow s b XmlTree readString config content = readDocument config (stringProtocol ++ content) -- ------------------------------------------------------------ -- | -- the arrow version of 'readString', the arrow input is the source URI readFromString :: SysConfigList -> IOStateArrow s String XmlTree readFromString config = applyA ( arr $ readString config ) -- ------------------------------------------------------------ -- | -- parse a string as HTML content, substitute all HTML entity refs and canonicalize tree. -- (substitute char refs, ...). Errors are ignored. -- -- This arrow delegates all work to the parseHtmlContent parser in module HtmlParser. -- -- This is a simpler version of 'readFromString' without any options, -- but it does not run in the IO monad. hread :: ArrowXml a => a String XmlTree hread = fromLA $ PI.hread -- substHtmlEntityRefs is done in parser >>> -- as well as subst HTML char refs editNTreeA [isError :-> none] -- ignores all errors >>> canonicalizeContents -- combine text nodes, substitute char refs -- comments are not removed -- | like hread, but accepts a whole document, not a HTML content hreadDoc :: ArrowXml a => a String XmlTree hreadDoc = fromLA $ root [] [PI.hreadDoc] -- substHtmlEntityRefs is done in parser >>> -- as well as subst HTML char refs editNTreeA [isError :-> none] -- ignores all errors >>> canonicalizeForXPath -- remove DTD spec and text in content of root node -- and do a canonicalizeContents >>> getChildren -- ------------------------------------------------------------ -- | -- parse a string as XML CONTENT, (no xml decl or doctype decls are allowed), -- substitute all predefined XML entity refs and canonicalize tree -- This xread arrow delegates all work to the xread parser function in module XmlParsec xread :: ArrowXml a => a String XmlTree xread = PI.xreadCont -- | -- a more general version of xread which -- parses a whole document including a prolog -- (xml decl, doctype decl) and processing -- instructions. Doctype decls remain uninterpreted, -- but are in the list of results trees. xreadDoc :: ArrowXml a => a String XmlTree xreadDoc = PI.xreadDoc {- -- the old version, where the parser does not subst char refs and cdata xread = root [] [parseXmlContent] -- substXmlEntityRefs is done in parser >>> canonicalizeContents >>> getChildren -- -} -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/WriteDocument.hs0000644000000000000000000002273212474566610020022 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.WriteDocument Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Compound arrow for writing XML documents -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.WriteDocument ( writeDocument , writeDocument' , writeDocumentToString , prepareContents ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow ( initialSysState ) import Text.XML.HXT.Arrow.Edit ( haskellRepOfXmlDoc , indentDoc , addDefaultDTDecl , preventEmptyElements , removeDocWhiteSpace , treeRepOfXmlDoc ) import Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , encodeDocument , encodeDocument' ) -- ------------------------------------------------------------ -- {- | the main filter for writing documents this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument' usage: @ writeDocument optionList destination @ if @ destination @ is the empty string or \"-\", stdout is used as output device for available options see 'Text.XML.HXT.Arrow.XmlState.SystemConfig' - @withOutputXML@ : (default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary add XML processing instruction and encode document with respect to output encoding, - @withOutputHTML@ : issue HTML: translate all special XML chars and all HTML chars with a corresponding entity reference into entity references. Do not generate empty elements, e.g. @@. The short form introduces trouble in various browsers. - @withOutputXHTML@ : same as @withOutputHTML@, but all none ASCII chars are substituted by char references. - @withOutputPLAIN@ : Do not substitute any chars. This is useful when generating something else than XML/HTML, e.g. Haskell source code. - @withXmlPi yes/no@ : Add a @@ processing instruction to the beginning of the document. Default is yes. - @withAddDefaultDTD@ : if the document to be written was build by reading another document containing a Document Type Declaration, this DTD is inserted into the output document (default: no insert) - @withShowTree yes/no@ : show DOM tree representation of document (for debugging) - @withShowHaskell yes/no@ : show Haskell representaion of document (for debugging) a minimal main program for copying a document has the following structure: > module Main > where > > import Text.XML.HXT.Core > > main :: IO () > main > = do > runX ( readDocument [] "hello.xml" > >>> > writeDocument [] "bye.xml" > ) > return () an example for copying a document from the web to standard output with global trace level 1, input trace level 2, output encoding isoLatin1, and evaluation of error code is: > module Main > where > > import Text.XML.HXT.Core > import Text.XML.HXT.Curl > -- or > -- import Text.XML.HXT.HTTP > import System.Exit > > main :: IO () > main > = do > [rc] <- runX > ( configSysVars [ withTrace 1 -- set the defaults for all read-, > , withCurl [] -- write- and other operations > -- or withHTTP [] > ] > >>> > readDocument [ withTrace 2 -- use these additional > , withParseHTML yes -- options only for this read > ] > "http://www.haskell.org/" > >>> > writeDocument [ withOutputEncoding isoLatin1 > ] > "" -- output to stdout > >>> > getErrStatus > ) > exitWith ( if rc >= c_err > then ExitFailure 1 > else ExitSuccess > ) -} writeDocument :: SysConfigList -> String -> IOStateArrow s XmlTree XmlTree writeDocument config dst = localSysEnv $ configSysVars config >>> perform ( (flip writeDocument') dst $< getSysVar theTextMode ) writeDocument' :: Bool -> String -> IOStateArrow s XmlTree XmlTree writeDocument' textMode dst = ( traceMsg 1 ("writeDocument: destination is " ++ show dst) >>> ( (flip prepareContents) encodeDocument $< getSysVar idS ) >>> traceDoc "document after encoding" >>> putXmlDocument textMode dst >>> traceMsg 1 "writeDocument: finished" ) `when` documentStatusOk -- ------------------------------------------------------------ -- | -- Convert a document into a string. Formating is done the same way -- and with the same options as in 'writeDocument'. Default output encoding is -- no encoding, that means the result is a normal unicode encode haskell string. -- The default may be overwritten with the 'Text.XML.HXT.Arrow.XmlState.SystemConfig.withOutputEncoding' option. -- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option. -- -- This arrow fails, when the encoding scheme is not supported. -- The arrow is pure, it does not run in the IO monad. -- The XML PI is suppressed, if not explicitly turned on with an -- option @ (a_no_xml_pi, v_0) @ writeDocumentToString :: ArrowXml a => SysConfigList -> a XmlTree String writeDocumentToString config = prepareContents ( foldr (>>>) id (withOutputEncoding unicodeString : withXmlPi no : config ) $ initialSysState ) encodeDocument' >>> xshow getChildren -- ------------------------------------------------------------ -- | -- indent and format output prepareContents :: ArrowXml a => XIOSysState -> (Bool -> Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree prepareContents config encodeDoc = indent >>> addDtd >>> format where indent' = getS theIndent config removeWS' = getS theRemoveWS config showTree' = getS theShowTree config showHaskell' = getS theShowHaskell config outHtml' = getS theOutputFmt config == HTMLoutput outXhtml' = getS theOutputFmt config == XHTMLoutput outXml' = getS theOutputFmt config == XMLoutput noPi' = not $ getS theXmlPi config noEEsFor' = getS theNoEmptyElemFor config addDDTD' = getS theAddDefaultDTD config outEnc' = getS theOutputEncoding config addDtd | addDDTD' = addDefaultDTDecl | otherwise = this indent | indent' = indentDoc -- document indentation | removeWS' = removeDocWhiteSpace -- remove all whitespace between tags | otherwise = this format | showTree' = treeRepOfXmlDoc | showHaskell' = haskellRepOfXmlDoc | outHtml' = preventEmptyElements noEEsFor' True >>> encodeDoc -- convert doc into text with respect to output encoding with ASCII as default False noPi' ( if null outEnc' then usAscii else outEnc' ) | outXhtml' = preventEmptyElements noEEsFor' True >>> encodeDoc -- convert doc into text with respect to output encoding True noPi' outEnc' | outXml' = ( if null noEEsFor' then this else preventEmptyElements noEEsFor' False ) >>> encodeDoc -- convert doc into text with respect to output encoding True noPi' outEnc' | otherwise = this -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlArrow.hs0000644000000000000000000006745313506133134017001 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlArrow Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Basic arrows for processing XML documents All arrows use IO and a global state for options, errorhandling, ... -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlArrow ( module Text.XML.HXT.Arrow.XmlArrow ) where import Control.Arrow -- classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow -- arrow types import Control.Arrow.StateListArrow import Control.Arrow.IOListArrow import Control.Arrow.IOStateListArrow import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar ) import Data.Maybe import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import qualified Text.XML.HXT.DOM.ShowXml as XS -- ------------------------------------------------------------ {- | Arrows for processing 'Text.XML.HXT.DOM.TypeDefs.XmlTree's These arrows can be grouped into predicates, selectors, constructors, and transformers. All predicates (tests) act like 'Control.Arrow.ArrowIf.none' for failure and 'Control.Arrow.ArrowIf.this' for success. A logical and can be formed by @ a1 >>> a2 @, a locical or by @ a1 \<+\> a2 @. Selector arrows will fail, when applied to wrong input, e.g. selecting the text of a node with 'getText' will fail when applied to a none text node. Edit arrows will remain the input unchanged, when applied to wrong argument, e.g. editing the content of a text node with 'changeText' applied to an element node will return the unchanged element node. -} infixl 7 += class (Arrow a, ArrowList a, ArrowTree a) => ArrowXml a where -- discriminating predicates -- | test for text nodes isText :: a XmlTree XmlTree isText = isA XN.isText {-# INLINE isText #-} isBlob :: a XmlTree XmlTree isBlob = isA XN.isBlob {-# INLINE isBlob #-} -- | test for char reference, used during parsing isCharRef :: a XmlTree XmlTree isCharRef = isA XN.isCharRef {-# INLINE isCharRef #-} -- | test for entity reference, used during parsing isEntityRef :: a XmlTree XmlTree isEntityRef = isA XN.isEntityRef {-# INLINE isEntityRef #-} -- | test for comment isCmt :: a XmlTree XmlTree isCmt = isA XN.isCmt {-# INLINE isCmt #-} -- | test for CDATA section, used during parsing isCdata :: a XmlTree XmlTree isCdata = isA XN.isCdata {-# INLINE isCdata #-} -- | test for processing instruction isPi :: a XmlTree XmlTree isPi = isA XN.isPi {-# INLINE isPi #-} -- | test for processing instruction \ isXmlPi :: a XmlTree XmlTree isXmlPi = isPi >>> hasName "xml" -- | test for element isElem :: a XmlTree XmlTree isElem = isA XN.isElem {-# INLINE isElem #-} -- | test for DTD part, used during parsing isDTD :: a XmlTree XmlTree isDTD = isA XN.isDTD {-# INLINE isDTD #-} -- | test for attribute tree isAttr :: a XmlTree XmlTree isAttr = isA XN.isAttr {-# INLINE isAttr #-} -- | test for error message isError :: a XmlTree XmlTree isError = isA XN.isError {-# INLINE isError #-} -- | test for root node (element with name \"\/\") isRoot :: a XmlTree XmlTree isRoot = isA XN.isRoot {-# INLINE isRoot #-} -- | test for text nodes with text, for which a predicate holds -- -- example: @hasText (all (\`elem\` \" \\t\\n\"))@ check for text nodes with only whitespace content hasText :: (String -> Bool) -> a XmlTree XmlTree hasText p = (isText >>> getText >>> isA p) `guards` this -- | test for text nodes with only white space -- -- implemented with 'hasTest' isWhiteSpace :: a XmlTree XmlTree isWhiteSpace = hasText (all isXmlSpaceChar) {-# INLINE isWhiteSpace #-} -- | -- test whether a node (element, attribute, pi) has a name with a special property hasNameWith :: (QName -> Bool) -> a XmlTree XmlTree hasNameWith p = (getQName >>> isA p) `guards` this {-# INLINE hasNameWith #-} -- | -- test whether a node (element, attribute, pi) has a specific qualified name -- useful only after namespace propagation hasQName :: QName -> a XmlTree XmlTree hasQName n = (getQName >>> isA (== n)) `guards` this {-# INLINE hasQName #-} -- | -- test whether a node has a specific name (prefix:localPart or localPart), -- generally useful, even without namespace handling hasName :: String -> a XmlTree XmlTree hasName n = (getName >>> isA (== n)) `guards` this {-# INLINE hasName #-} -- | -- test whether a node has a specific name as local part, -- useful only after namespace propagation hasLocalPart :: String -> a XmlTree XmlTree hasLocalPart n = (getLocalPart >>> isA (== n)) `guards` this {-# INLINE hasLocalPart #-} -- | -- test whether a node has a specific name prefix, -- useful only after namespace propagation hasNamePrefix :: String -> a XmlTree XmlTree hasNamePrefix n = (getNamePrefix >>> isA (== n)) `guards` this {-# INLINE hasNamePrefix #-} -- | -- test whether a node has a specific namespace URI -- useful only after namespace propagation hasNamespaceUri :: String -> a XmlTree XmlTree hasNamespaceUri n = (getNamespaceUri >>> isA (== n)) `guards` this {-# INLINE hasNamespaceUri #-} -- | -- test whether an element node has an attribute node with a specific name hasAttr :: String -> a XmlTree XmlTree hasAttr n = (getAttrl >>> hasName n) `guards` this {-# INLINE hasAttr #-} -- | -- test whether an element node has an attribute node with a specific qualified name hasQAttr :: QName -> a XmlTree XmlTree hasQAttr n = (getAttrl >>> hasQName n) `guards` this {-# INLINE hasQAttr #-} -- | -- test whether an element node has an attribute with a specific value hasAttrValue :: String -> (String -> Bool) -> a XmlTree XmlTree hasAttrValue n p = (getAttrl >>> hasName n >>> xshow getChildren >>> isA p) `guards` this -- | -- test whether an element node has an attribute with a qualified name and a specific value hasQAttrValue :: QName -> (String -> Bool) -> a XmlTree XmlTree hasQAttrValue n p = (getAttrl >>> hasQName n >>> xshow getChildren >>> isA p) `guards` this -- constructor arrows ------------------------------------------------------------ -- | text node construction arrow mkText :: a String XmlTree mkText = arr XN.mkText {-# INLINE mkText #-} -- | blob node construction arrow mkBlob :: a Blob XmlTree mkBlob = arr XN.mkBlob {-# INLINE mkBlob #-} -- | char reference construction arrow, useful for document output mkCharRef :: a Int XmlTree mkCharRef = arr XN.mkCharRef {-# INLINE mkCharRef #-} -- | entity reference construction arrow, useful for document output mkEntityRef :: a String XmlTree mkEntityRef = arr XN.mkEntityRef {-# INLINE mkEntityRef #-} -- | comment node construction, useful for document output mkCmt :: a String XmlTree mkCmt = arr XN.mkCmt {-# INLINE mkCmt #-} -- | CDATA construction, useful for document output mkCdata :: a String XmlTree mkCdata = arr XN.mkCdata {-# INLINE mkCdata #-} -- | error node construction, useful only internally mkError :: Int -> a String XmlTree mkError level = arr (XN.mkError level) -- | element construction: -- | the attributes and the content of the element are computed by applying arrows -- to the input mkElement :: QName -> a n XmlTree -> a n XmlTree -> a n XmlTree mkElement n af cf = (listA af &&& listA cf) >>> arr2 (\ al cl -> XN.mkElement n al cl) -- | attribute node construction: -- | the attribute value is computed by applying an arrow to the input mkAttr :: QName -> a n XmlTree -> a n XmlTree mkAttr qn f = listA f >>> arr (XN.mkAttr qn) -- | processing instruction construction: -- | the content of the processing instruction is computed by applying an arrow to the input mkPi :: QName -> a n XmlTree -> a n XmlTree mkPi qn f = listA f >>> arr (XN.mkPi qn) -- convenient arrows for constructors -------------------------------------------------- -- | convenient arrow for element construction, more comfortable variant of 'mkElement' -- -- example for simplifying 'mkElement' : -- -- > mkElement qn (a1 <+> ... <+> ai) (c1 <+> ... <+> cj) -- -- equals -- -- > mkqelem qn [a1,...,ai] [c1,...,cj] mkqelem :: QName -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree mkqelem n afs cfs = mkElement n (catA afs) (catA cfs) {-# INLINE mkqelem #-} -- | convenient arrow for element construction with strings instead of qualified names as element names, see also 'mkElement' and 'mkelem' mkelem :: String -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree mkelem n afs cfs = mkElement (mkName n) (catA afs) (catA cfs) {-# INLINE mkelem #-} -- | convenient arrow for element construction with attributes but without content, simple variant of 'mkelem' and 'mkElement' aelem :: String -> [a n XmlTree] -> a n XmlTree aelem n afs = catA afs >. \ al -> XN.mkElement (mkName n) al [] {-# INLINE aelem #-} -- | convenient arrow for simple element construction without attributes, simple variant of 'mkelem' and 'mkElement' selem :: String -> [a n XmlTree] -> a n XmlTree selem n cfs = catA cfs >. XN.mkElement (mkName n) [] {-# INLINE selem #-} -- | convenient arrow for construction of empty elements without attributes, simple variant of 'mkelem' and 'mkElement' eelem :: String -> a n XmlTree eelem n = constA (XN.mkElement (mkName n) [] []) {-# INLINE eelem #-} -- | construction of an element node with name \"\/\" for document roots root :: [a n XmlTree] -> [a n XmlTree] -> a n XmlTree root = mkelem t_root {-# INLINE root #-} -- | alias for 'mkAttr' qattr :: QName -> a n XmlTree -> a n XmlTree qattr = mkAttr {-# INLINE qattr #-} -- | convenient arrow for attribute construction, simple variant of 'mkAttr' attr :: String -> a n XmlTree -> a n XmlTree attr = mkAttr . mkName {-# INLINE attr #-} -- constant arrows (ignoring the input) for tree construction ------------------------------ -- | constant arrow for text nodes txt :: String -> a n XmlTree txt = constA . XN.mkText {-# INLINE txt #-} -- | constant arrow for blob nodes blb :: Blob -> a n XmlTree blb = constA . XN.mkBlob {-# INLINE blb #-} -- | constant arrow for char reference nodes charRef :: Int -> a n XmlTree charRef = constA . XN.mkCharRef {-# INLINE charRef #-} -- | constant arrow for entity reference nodes entityRef :: String -> a n XmlTree entityRef = constA . XN.mkEntityRef {-# INLINE entityRef #-} -- | constant arrow for comment cmt :: String -> a n XmlTree cmt = constA . XN.mkCmt {-# INLINE cmt #-} -- | constant arrow for warning warn :: String -> a n XmlTree warn = constA . (XN.mkError c_warn) {-# INLINE warn #-} -- | constant arrow for errors err :: String -> a n XmlTree err = constA . (XN.mkError c_err) {-# INLINE err #-} -- | constant arrow for fatal errors fatal :: String -> a n XmlTree fatal = constA . (XN.mkError c_fatal) {-# INLINE fatal #-} -- | constant arrow for simple processing instructions, see 'mkPi' spi :: String -> String -> a n XmlTree spi piName piCont = constA (XN.mkPi (mkName piName) [XN.mkAttr (mkName a_value) [XN.mkText piCont]]) {-# INLINE spi #-} -- | constant arrow for attribute nodes, attribute name is a qualified name and value is a text, -- | see also 'mkAttr', 'qattr', 'attr' sqattr :: QName -> String -> a n XmlTree sqattr an av = constA (XN.mkAttr an [XN.mkText av]) {-# INLINE sqattr #-} -- | constant arrow for attribute nodes, attribute name and value are -- | given by parameters, see 'mkAttr' sattr :: String -> String -> a n XmlTree sattr an av = constA (XN.mkAttr (mkName an) [XN.mkText av]) {-# INLINE sattr #-} -- selector arrows -------------------------------------------------- -- | select the text of a text node getText :: a XmlTree String getText = arrL (maybeToList . XN.getText) {-# INLINE getText #-} -- | select the value of a char reference getCharRef :: a XmlTree Int getCharRef = arrL (maybeToList . XN.getCharRef) {-# INLINE getCharRef #-} -- | select the name of a entity reference node getEntityRef :: a XmlTree String getEntityRef = arrL (maybeToList . XN.getEntityRef) {-# INLINE getEntityRef #-} -- | select the comment of a comment node getCmt :: a XmlTree String getCmt = arrL (maybeToList . XN.getCmt) {-# INLINE getCmt #-} -- | select the content of a CDATA node getCdata :: a XmlTree String getCdata = arrL (maybeToList . XN.getCdata) {-# INLINE getCdata #-} -- | select the name of a processing instruction getPiName :: a XmlTree QName getPiName = arrL (maybeToList . XN.getPiName) {-# INLINE getPiName #-} -- | select the content of a processing instruction getPiContent :: a XmlTree XmlTree getPiContent = arrL (fromMaybe [] . XN.getPiContent) {-# INLINE getPiContent #-} -- | select the name of an element node getElemName :: a XmlTree QName getElemName = arrL (maybeToList . XN.getElemName) {-# INLINE getElemName #-} -- | select the attribute list of an element node getAttrl :: a XmlTree XmlTree getAttrl = arrL (fromMaybe [] . XN.getAttrl) {-# INLINE getAttrl #-} -- | select the DTD type of a DTD node getDTDPart :: a XmlTree DTDElem getDTDPart = arrL (maybeToList . XN.getDTDPart) {-# INLINE getDTDPart #-} -- | select the DTD attributes of a DTD node getDTDAttrl :: a XmlTree Attributes getDTDAttrl = arrL (maybeToList . XN.getDTDAttrl) {-# INLINE getDTDAttrl #-} -- | select the name of an attribute getAttrName :: a XmlTree QName getAttrName = arrL (maybeToList . XN.getAttrName) {-# INLINE getAttrName #-} -- | select the error level (c_warn, c_err, c_fatal) from an error node getErrorLevel :: a XmlTree Int getErrorLevel = arrL (maybeToList . XN.getErrorLevel) {-# INLINE getErrorLevel #-} -- | select the error message from an error node getErrorMsg :: a XmlTree String getErrorMsg = arrL (maybeToList . XN.getErrorMsg) {-# INLINE getErrorMsg #-} -- | select the qualified name from an element, attribute or pi getQName :: a XmlTree QName getQName = arrL (maybeToList . XN.getName) {-# INLINE getQName #-} -- | select the prefix:localPart or localPart from an element, attribute or pi getName :: a XmlTree String getName = arrL (maybeToList . XN.getQualifiedName) {-# INLINE getName #-} -- | select the univeral name ({namespace URI} ++ localPart) getUniversalName :: a XmlTree String getUniversalName = arrL (maybeToList . XN.getUniversalName) {-# INLINE getUniversalName #-} -- | select the univeral name (namespace URI ++ localPart) getUniversalUri :: a XmlTree String getUniversalUri = arrL (maybeToList . XN.getUniversalUri) {-# INLINE getUniversalUri #-} -- | select the local part getLocalPart :: a XmlTree String getLocalPart = arrL (maybeToList . XN.getLocalPart) {-# INLINE getLocalPart #-} -- | select the name prefix getNamePrefix :: a XmlTree String getNamePrefix = arrL (maybeToList . XN.getNamePrefix) {-# INLINE getNamePrefix #-} -- | select the namespace URI getNamespaceUri :: a XmlTree String getNamespaceUri = arrL (maybeToList . XN.getNamespaceUri) {-# INLINE getNamespaceUri #-} -- | select the value of an attribute of an element node, -- always succeeds with empty string as default value \"\" getAttrValue :: String -> a XmlTree String getAttrValue n = xshow (getAttrl >>> hasName n >>> getChildren) -- | like 'getAttrValue', but fails if the attribute does not exist getAttrValue0 :: String -> a XmlTree String getAttrValue0 n = getAttrl >>> hasName n >>> xshow getChildren -- | like 'getAttrValue', but select the value of an attribute given by a qualified name, -- always succeeds with empty string as default value \"\" getQAttrValue :: QName -> a XmlTree String getQAttrValue n = xshow (getAttrl >>> hasQName n >>> getChildren) -- | like 'getQAttrValue', but fails if attribute does not exist getQAttrValue0 :: QName -> a XmlTree String getQAttrValue0 n = getAttrl >>> hasQName n >>> xshow getChildren -- edit arrows -------------------------------------------------- -- | edit the string of a text node changeText :: (String -> String) -> a XmlTree XmlTree changeText cf = arr (XN.changeText cf) `when` isText -- | edit the blob of a blob node changeBlob :: (Blob -> Blob) -> a XmlTree XmlTree changeBlob cf = arr (XN.changeBlob cf) `when` isBlob -- | edit the comment string of a comment node changeCmt :: (String -> String) -> a XmlTree XmlTree changeCmt cf = arr (XN.changeCmt cf) `when` isCmt -- | edit an element-, attribute- or pi- name changeQName :: (QName -> QName) -> a XmlTree XmlTree changeQName cf = arr (XN.changeName cf) `when` getQName -- | edit an element name changeElemName :: (QName -> QName) -> a XmlTree XmlTree changeElemName cf = arr (XN.changeElemName cf) `when` isElem -- | edit an attribute name changeAttrName :: (QName -> QName) -> a XmlTree XmlTree changeAttrName cf = arr (XN.changeAttrName cf) `when` isAttr -- | edit a pi name changePiName :: (QName -> QName) -> a XmlTree XmlTree changePiName cf = arr (XN.changePiName cf) `when` isPi -- | edit an attribute value changeAttrValue :: (String -> String) -> a XmlTree XmlTree changeAttrValue cf = replaceChildren ( xshow getChildren >>> arr cf >>> mkText ) `when` isAttr -- | edit an attribute list of an element node changeAttrl :: (XmlTrees -> XmlTrees -> XmlTrees) -> a XmlTree XmlTree -> a XmlTree XmlTree changeAttrl cf f = ( ( listA f &&& this ) >>> arr2 changeAL ) `when` ( isElem <+> isPi ) where changeAL as x = XN.changeAttrl (\ xs -> cf xs as) x -- | replace an element, attribute or pi name setQName :: QName -> a XmlTree XmlTree setQName n = changeQName (const n) {-# INLINE setQName #-} -- | replace an element name setElemName :: QName -> a XmlTree XmlTree setElemName n = changeElemName (const n) {-# INLINE setElemName #-} -- | replace an attribute name setAttrName :: QName -> a XmlTree XmlTree setAttrName n = changeAttrName (const n) {-# INLINE setAttrName #-} -- | replace an element name setPiName :: QName -> a XmlTree XmlTree setPiName n = changePiName (const n) {-# INLINE setPiName #-} -- | replace an atribute list of an element node setAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree setAttrl = changeAttrl (const id) -- (\ x y -> y) {-# INLINE setAttrl #-} -- | add a list of attributes to an element addAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree addAttrl = changeAttrl (XN.mergeAttrl) {-# INLINE addAttrl #-} -- | add (or replace) an attribute addAttr :: String -> String -> a XmlTree XmlTree addAttr an av = addAttrl (sattr an av) {-# INLINE addAttr #-} -- | remove an attribute removeAttr :: String -> a XmlTree XmlTree removeAttr an = processAttrl (none `when` hasName an) -- | remove an attribute with a qualified name removeQAttr :: QName -> a XmlTree XmlTree removeQAttr an = processAttrl (none `when` hasQName an) -- | process the attributes of an element node with an arrow processAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree processAttrl f = setAttrl (getAttrl >>> f) -- | process a whole tree inclusive attribute list of element nodes -- see also: 'Control.Arrow.ArrowTree.processTopDown' processTopDownWithAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree processTopDownWithAttrl f = processTopDown ( f >>> ( processAttrl (processTopDown f) `when` isElem)) -- | convenient op for adding attributes or children to a node -- -- usage: @ tf += cf @ -- -- the @tf@ arrow computes an element node, and all trees computed by @cf@ are -- added to this node, if a tree is an attribute, it is inserted in the attribute list -- else it is appended to the content list. -- -- attention: do not build long content list this way because '+=' is implemented by ++ -- -- examples: -- -- > eelem "a" -- > += sattr "href" "page.html" -- > += sattr "name" "here" -- > += txt "look here" -- -- is the same as -- -- > mkelem [ sattr "href" "page.html" -- > , sattr "name" "here" -- > ] -- > [ txt "look here" ] -- -- and results in the XML fragment: \look here\<\/a\> -- -- advantage of the '+=' operator is, that attributes and content can be added -- any time step by step. -- if @tf@ computes a whole list of trees, e.g. a list of \"td\" or \"tr\" elements, -- the attributes or content is added to all trees. useful for adding \"class\" or \"style\" attributes -- to table elements. (+=) :: a b XmlTree -> a b XmlTree -> a b XmlTree tf += cf = (tf &&& listA cf) >>> arr2 addChildren where addChildren :: XmlTree -> XmlTrees -> XmlTree addChildren t cs = foldl addChild t cs addChild :: XmlTree -> XmlTree -> XmlTree addChild t c | not (XN.isElem t) = t | XN.isAttr c = XN.changeAttrl (XN.addAttr c) t | otherwise = XN.changeChildren (++ [c]) t -- | apply an arrow to the input and convert the resulting XML trees into a string representation xshow :: a n XmlTree -> a n String xshow f = f >. XS.xshow {-# INLINE xshow #-} -- | apply an arrow to the input and convert the resulting XML trees into a string representation xshowBlob :: a n XmlTree -> a n Blob xshowBlob f = f >. XS.xshowBlob {-# INLINE xshowBlob #-} {- | Document Type Definition arrows These are separated, because they are not needed for document processing, only when processing the DTD, e.g. for generating access funtions for the toolbox from a DTD (se example DTDtoHaskell in the examples directory) -} class (ArrowXml a) => ArrowDTD a where isDTDDoctype :: a XmlTree XmlTree isDTDDoctype = isA (maybe False (== DOCTYPE ) . XN.getDTDPart) isDTDElement :: a XmlTree XmlTree isDTDElement = isA (maybe False (== ELEMENT ) . XN.getDTDPart) isDTDContent :: a XmlTree XmlTree isDTDContent = isA (maybe False (== CONTENT ) . XN.getDTDPart) isDTDAttlist :: a XmlTree XmlTree isDTDAttlist = isA (maybe False (== ATTLIST ) . XN.getDTDPart) isDTDEntity :: a XmlTree XmlTree isDTDEntity = isA (maybe False (== ENTITY ) . XN.getDTDPart) isDTDPEntity :: a XmlTree XmlTree isDTDPEntity = isA (maybe False (== PENTITY ) . XN.getDTDPart) isDTDNotation :: a XmlTree XmlTree isDTDNotation = isA (maybe False (== NOTATION) . XN.getDTDPart) isDTDCondSect :: a XmlTree XmlTree isDTDCondSect = isA (maybe False (== CONDSECT) . XN.getDTDPart) isDTDName :: a XmlTree XmlTree isDTDName = isA (maybe False (== NAME ) . XN.getDTDPart) isDTDPERef :: a XmlTree XmlTree isDTDPERef = isA (maybe False (== PEREF ) . XN.getDTDPart) hasDTDAttr :: String -> a XmlTree XmlTree hasDTDAttr n = isA (isJust . lookup n . fromMaybe [] . XN.getDTDAttrl) getDTDAttrValue :: String -> a XmlTree String getDTDAttrValue n = arrL (maybeToList . lookup n . fromMaybe [] . XN.getDTDAttrl) setDTDAttrValue :: String -> String -> a XmlTree XmlTree setDTDAttrValue n v = arr (XN.changeDTDAttrl (addEntry n v)) `when` isDTD mkDTDElem :: DTDElem -> Attributes -> a n XmlTree -> a n XmlTree mkDTDElem e al cf = listA cf >>> arr (XN.mkDTDElem e al) mkDTDDoctype :: Attributes -> a n XmlTree -> a n XmlTree mkDTDDoctype = mkDTDElem DOCTYPE mkDTDElement :: Attributes -> a n XmlTree mkDTDElement al = mkDTDElem ELEMENT al none mkDTDEntity :: Attributes -> a n XmlTree mkDTDEntity al = mkDTDElem ENTITY al none mkDTDPEntity :: Attributes -> a n XmlTree mkDTDPEntity al = mkDTDElem PENTITY al none instance ArrowXml LA instance ArrowXml (SLA s) instance ArrowXml IOLA instance ArrowXml (IOSLA s) instance ArrowDTD LA instance ArrowDTD (SLA s) instance ArrowDTD IOLA instance ArrowDTD (IOSLA s) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlOptions.hs0000644000000000000000000003303613205353551017333 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlOptions Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable system configuration and common options options -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlOptions where import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.SystemConfig import Data.Maybe import System.Console.GetOpt -- ------------------------------------------------------------ -- -- | -- commonly useful options for XML input -- -- can be used for option definition with haskell getopt -- -- defines options: 'a_trace', 'a_proxy', 'a_use_curl', 'a_do_not_use_curl', 'a_options_curl', 'a_encoding', -- 'a_issue_errors', 'a_do_not_issue_errors', 'a_parse_html', 'a_parse_by_mimetype', 'a_issue_warnings', 'a_do_not_issue_warnings', -- 'a_parse_xml', 'a_validate', 'a_do_not_validate', 'a_canonicalize', 'a_do_not_canonicalize', --- 'a_preserve_comment', 'a_do_not_preserve_comment', 'a_check_namespaces', 'a_do_not_check_namespaces', -- 'a_remove_whitespace', 'a_do_not_remove_whitespace' inputOptions :: [OptDescr SysConfig] inputOptions = [ Option "t" [a_trace] (OptArg trc "LEVEL") "trace level (0-4), default 1" , Option "p" [a_proxy] (ReqArg withProxy "PROXY") "proxy for http access (e.g. \"www-cache:3128\")" , Option "" [a_redirect] (NoArg (withRedirect True)) "automatically follow redirected URIs" , Option "" [a_no_redirect] (NoArg (withRedirect False)) "switch off following redirected URIs" , Option "" [a_default_baseuri] (ReqArg withDefaultBaseURI "URI") "default base URI, default: \"file:////\"" , Option "e" [a_encoding] (ReqArg withInputEncoding "CHARSET") ( "default document encoding (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ", ...)" ) , Option "" [a_mime_types] (ReqArg withMimeTypeFile "FILE") "set mime type configuration file, e.g. \"/etc/mime.types\"" , Option "" [a_issue_errors] (NoArg (withErrors True)) "issue all error messages on stderr (default)" , Option "" [a_do_not_issue_errors] (NoArg (withErrors False)) "ignore all error messages" , Option "" [a_ignore_encoding_errors] (NoArg (withEncodingErrors False)) "ignore encoding errors" , Option "" [a_ignore_none_xml_contents] (NoArg (withIgnoreNoneXmlContents True)) "discards all contents of none XML/HTML documents, only the meta info remains in the doc tree" , Option "" [a_accept_mimetypes] (ReqArg withMT "MIMETYPES") "only accept documents matching the given comma separated list of mimetype specs" , Option "H" [a_parse_html] (NoArg (withParseHTML True)) "parse input as HTML, try to interprete everything as HTML, no validation" , Option "M" [a_parse_by_mimetype] (NoArg (withParseByMimeType True)) "parse dependent on mime type: text/html as HTML, text/xml and text/xhtml and others as XML, else no parse" , Option "" [a_parse_xml] (NoArg (withParseHTML False)) "parse input as XML, (default)" , Option "" [a_strict_input] (NoArg (withStrictInput True)) "read input files strictly, this ensures closing the files correctly even if not read completely" , Option "" [a_issue_warnings] (NoArg (withWarnings True)) "issue warnings, when parsing HTML (default)" , Option "Q" [a_do_not_issue_warnings] (NoArg (withWarnings False)) "ignore warnings, when parsing HTML" , Option "" [a_validate] (NoArg (withValidate True)) "document validation when parsing XML (default)" , Option "w" [a_do_not_validate] (NoArg (withValidate False)) "only wellformed check, no validation" , Option "" [a_subst_dtd_entities] (NoArg (withSubstDTDEntities True)) "entities defined in DTD are substituted when parsing XML (default)" , Option "" [a_do_not_subst_dtd_entities] (NoArg (withSubstDTDEntities False)) "entities defined in DTD are NOT substituted when parsing XML" , Option "" [a_subst_html_entities] (NoArg (withSubstHTMLEntities True)) "entities defined in XHTML are substituted when parsing XML, only in effect when prev. option is switched off" , Option "" [a_do_not_subst_html_entities] (NoArg (withSubstHTMLEntities False)) "only entities predefined in XML are substituted when parsing XML (default)" , Option "" [a_canonicalize] (NoArg (withCanonicalize True)) "canonicalize document, remove DTD, comment, transform CDATA, CharRef's, ... (default)" , Option "c" [a_do_not_canonicalize] (NoArg (withCanonicalize False)) "do not canonicalize document, don't remove DTD, comment, don't transform CDATA, CharRef's, ..." , Option "C" [a_preserve_comment] (NoArg (withPreserveComment True)) "don't remove comments during canonicalisation" , Option "" [a_do_not_preserve_comment] (NoArg (withPreserveComment False)) "remove comments during canonicalisation (default)" , Option "n" [a_check_namespaces] (NoArg (withCheckNamespaces True)) "tag tree with namespace information and check namespaces" , Option "" [a_do_not_check_namespaces] (NoArg (withCheckNamespaces False)) "ignore namespaces (default)" , Option "r" [a_remove_whitespace] (NoArg (withRemoveWS True)) "remove redundant whitespace, simplifies tree and processing" , Option "" [a_do_not_remove_whitespace] (NoArg (withRemoveWS False)) "don't remove redundant whitespace (default)" ] where withMT = withAcceptedMimeTypes . words . map (\ x -> if x == ',' then ' ' else x) trc = withTrace . max 0 . min 9 . (read :: String -> Int) . ('0':) . filter (`elem` "0123456789") . fromMaybe v_1 -- | -- commonly useful options for XML output -- -- defines options: 'a_indent', 'a_output_encoding', 'a_output_html' and others outputOptions :: [OptDescr SysConfig] outputOptions = [ Option "f" [a_output_file] (ReqArg (withSysAttr a_output_file) "FILE") "output file for resulting document (default: stdout)" , Option "i" [a_indent] (NoArg (withIndent True)) "indent XML output for readability" , Option "o" [a_output_encoding] (ReqArg withOutputEncoding "CHARSET") ( "encoding of output (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ")" ) , Option "" [a_output_xml] (NoArg withOutputXML ) "output of none ASCII chars as HTMl entity references" , Option "" [a_output_html] (NoArg withOutputHTML ) "output of none ASCII chars as HTMl entity references" , Option "" [a_output_xhtml] (NoArg withOutputXHTML ) "output of HTML elements with empty content (script, ...) done in format instead of " , Option "" [a_output_plain] (NoArg withOutputPLAIN ) "output of HTML elements with empty content (script, ...) done in format instead of " , Option "" [a_no_xml_pi] (NoArg (withXmlPi False)) ("output without processing instruction, useful in combination with --" ++ show a_output_html) , Option "" [a_no_empty_elem_for] (ReqArg (withNoEmptyElemFor . words') "NAMES") "output of empty elements done in format only for given list of element names" , Option "" [a_add_default_dtd] (NoArg (withAddDefaultDTD True)) "add the document type declaration given in the input document" , Option "" [a_text_mode] (NoArg (withTextMode True)) "output in text mode" ] where words' = words . map (\ c -> if c == ',' then ' ' else c) -- | -- commonly useful options -- -- defines options: 'a_verbose', 'a_help' generalOptions :: [OptDescr SysConfig] generalOptions = [ Option "v" [a_verbose] (NoArg (withSysAttr a_verbose v_1)) "verbose output" , Option "h?" [a_help] (NoArg (withSysAttr a_help v_1)) "this message" ] -- | -- defines 'a_version' option versionOptions :: [OptDescr SysConfig] versionOptions = [ Option "V" [a_version] (NoArg (withSysAttr a_version v_1)) "show program version" ] -- | -- debug output options showOptions :: [OptDescr SysConfig] showOptions = [ Option "" [a_show_tree] (NoArg (withShowTree True)) "output tree representation instead of document source" , Option "" [a_show_haskell] (NoArg (withShowHaskell True)) "output internal Haskell representation instead of document source" ] -- ------------------------------------------------------------ a_accept_mimetypes, a_add_default_dtd, a_canonicalize, a_check_namespaces, a_collect_errors, a_default_baseuri, a_do_not_canonicalize, a_do_not_check_namespaces, a_do_not_issue_errors, a_do_not_issue_warnings, a_do_not_preserve_comment, a_do_not_remove_whitespace, a_do_not_subst_dtd_entities, a_do_not_subst_html_entities, a_do_not_validate, a_error, a_error_log, a_help, a_if_modified_since, a_if_unmodified_since, a_ignore_encoding_errors, a_ignore_none_xml_contents, a_indent, a_issue_errors, a_issue_warnings, a_mime_types, a_no_empty_elements, a_no_empty_elem_for, a_no_redirect, a_no_xml_pi, a_output_file, a_output_xml, a_output_html, a_output_xhtml, a_output_plain, a_parse_by_mimetype, a_parse_html, a_parse_xml, a_preserve_comment, a_proxy, a_redirect, a_remove_whitespace, a_show_haskell, a_show_tree, a_strict_input, a_subst_dtd_entities, a_subst_html_entities, a_text_mode, a_trace, a_validate, a_verbose :: String a_accept_mimetypes = "accept-mimetypes" a_add_default_dtd = "add-default-dtd" a_canonicalize = "canonicalize" a_check_namespaces = "check-namespaces" a_collect_errors = "collect-errors" a_default_baseuri = "default-base-URI" a_do_not_canonicalize = "do-not-canonicalize" a_do_not_check_namespaces = "do-not-check-namespaces" a_do_not_issue_errors = "do-not-issue-errors" a_do_not_issue_warnings = "do-not-issue-warnings" a_do_not_preserve_comment = "do-not-preserve-comment" a_do_not_remove_whitespace = "do-not-remove-whitespace" a_do_not_subst_dtd_entities = "do-not-subst-dtd-entities" a_do_not_subst_html_entities = "do-not-subst-html-entities" a_do_not_validate = "do-not-validate" a_error = "error" a_error_log = "errorLog" a_help = "help" a_if_modified_since = "if-modified-since" a_if_unmodified_since = "if-unmodified-since" a_ignore_encoding_errors = "ignore-encoding-errors" a_ignore_none_xml_contents = "ignore-none-xml-contents" a_indent = "indent" a_issue_warnings = "issue-warnings" a_issue_errors = "issue-errors" a_mime_types = "mimetypes" a_no_empty_elements = "no-empty-elements" a_no_empty_elem_for = "no-empty-elem-for" a_no_redirect = "no-redirect" a_no_xml_pi = "no-xml-pi" a_output_file = "output-file" a_output_html = "output-html" a_output_xhtml = "output-xhtml" a_output_xml = "output-xml" a_output_plain = "output-plain" a_parse_by_mimetype = "parse-by-mimetype" a_parse_html = "parse-html" a_parse_xml = "parse-xml" a_preserve_comment = "preserve-comment" a_proxy = "proxy" a_redirect = "redirect" a_remove_whitespace = "remove-whitespace" a_show_haskell = "show-haskell" a_show_tree = "show-tree" a_strict_input = "strict-input" a_subst_dtd_entities = "subst-dtd-entities" a_subst_html_entities = "subst-html-entities" a_text_mode = "text-mode" a_trace = "trace" a_validate = "validate" a_verbose = "verbose" -- ------------------------------------------------------------ -- | -- select options from a predefined list of option descriptions selectOptions :: [String] -> [OptDescr a] -> [OptDescr a] selectOptions ol os = concat . map (\ on -> filter (\ (Option _ ons _ _) -> on `elem` ons) os) $ ol removeOptions :: [String] -> [OptDescr a] -> [OptDescr a] removeOptions ol os = filter (\ (Option _ ons _ _) -> not . any (`elem` ol) $ ons ) os -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlRegex.hs0000644000000000000000000003236312474566610016765 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlRegex Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Regular Expression Matcher working on lists of XmlTrees It's intended to import this module with an explicit import declaration for not spoiling the namespace with these somewhat special arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlRegex ( XmlRegex , mkZero , mkUnit , mkPrim , mkPrim' , mkPrimA , mkDot , mkStar , mkAlt , mkAlts , mkSeq , mkSeqs , mkRep , mkRng , mkOpt , mkPerm , mkPerms , mkMerge , nullable , delta , matchXmlRegex , splitXmlRegex , scanXmlRegex , matchRegexA , splitRegexA , scanRegexA ) where import Control.Arrow.ListArrows import Data.Maybe import Text.XML.HXT.DOM.Interface import Text.XML.HXT.DOM.ShowXml (xshow) -- ------------------------------------------------------------ -- the exported regex arrows -- | check whether a sequence of XmlTrees match an Xml regular expression -- -- The arrow for 'matchXmlRegex'. -- -- The expession is build up from simple arrows acting as predicate ('mkPrimA') for -- an XmlTree and of the usual cobinators for sequence ('mkSeq'), repetition -- ('mkStar', mkRep', 'mkRng') and choice ('mkAlt', 'mkOpt') matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees matchRegexA re ts = ts >>. (\ s -> maybe [s] (const []) . matchXmlRegex re $ s) -- | split the sequence of trees computed by the filter a into -- -- The arrow for 'splitXmlRegex'. -- -- a first part matching the regex and a rest, -- if a prefix of the input sequence does not match the regex, the arrow fails -- else the pair containing the result lists is returned splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees) splitRegexA re ts = ts >>. (maybeToList . splitXmlRegex re) -- | scan the input sequence with a regex and give the result as a list of lists of trees back -- the regex must at least match one input tree, so the empty sequence should not match the regex -- -- The arrow for 'scanXmlRegex'. scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees scanRegexA re ts = ts >>. (fromMaybe [] . scanXmlRegex re) -- ------------------------------------------------------------ data XmlRegex = Zero String | Unit | Sym (XmlTree -> Bool) String -- optional external repr. of predicate | Dot | Star XmlRegex | Alt XmlRegex XmlRegex | Seq XmlRegex XmlRegex | Rep Int XmlRegex -- 1 or more repetitions | Rng Int Int XmlRegex -- n..m repetitions | Perm XmlRegex XmlRegex | Merge XmlRegex XmlRegex -- ------------------------------------------------------------ {- just for documentation class Inv a where inv :: a -> Bool instance Inv XmlRegex where inv (Zero _) = True inv Unit = True inv (Sym p _) = p holds for some XmlTrees inv Dot = True inv (Star e) = inv e inv (Alt e1 e2) = inv e1 && inv e2 inv (Seq e1 e2) = inv e1 && inv e2 inv (Rep i e) = i > 0 && inv e inv (Rng i j e) = (i < j || (i == j && i > 1)) && inv e inv (Perm e1 e2) = inv e1 && inv e2 -} -- ------------------------------------------------------------ -- -- smart constructors mkZero :: String -> XmlRegex mkZero = Zero mkUnit :: XmlRegex mkUnit = Unit mkPrim :: (XmlTree -> Bool) -> XmlRegex mkPrim p = Sym p "" mkPrim' :: (XmlTree -> Bool) -> String -> XmlRegex mkPrim' = Sym mkPrimA :: LA XmlTree XmlTree -> XmlRegex mkPrimA a = mkPrim (not . null . runLA a) mkDot :: XmlRegex mkDot = Dot mkStar :: XmlRegex -> XmlRegex mkStar (Zero _) = mkUnit -- {}* == () mkStar e@Unit = e -- ()* == () mkStar e@(Star _e1) = e -- (r*)* == r* mkStar (Rep 1 e1) = mkStar e1 -- (r+)* == r* mkStar e@(Alt _ _) = Star (rmStar e) -- (a*|b)* == (a|b)* mkStar e = Star e rmStar :: XmlRegex -> XmlRegex rmStar (Alt e1 e2) = mkAlt (rmStar e1) (rmStar e2) rmStar (Star e1) = rmStar e1 rmStar (Rep 1 e1) = rmStar e1 rmStar e1 = e1 mkAlt :: XmlRegex -> XmlRegex -> XmlRegex mkAlt e1 (Zero _) = e1 -- e1 u {} = e1 mkAlt (Zero _) e2 = e2 -- {} u e2 = e2 mkAlt e1@(Star Dot) _e2 = e1 -- A* u e1 = A* mkAlt _e1 e2@(Star Dot) = e2 -- e1 u A* = A* mkAlt (Sym p1 e1) (Sym p2 e2) = mkPrim' (\ x -> p1 x || p2 x) (e e1 e2) -- melting of predicates where e "" x2 = x2 e x1 "" = x1 e x1 x2 = x1 ++ "|" ++ x2 mkAlt e1 e2@(Sym _ _) = mkAlt e2 e1 -- symmetry: predicates always first mkAlt e1@(Sym _ _) (Alt e2@(Sym _ _) e3) = mkAlt (mkAlt e1 e2) e3 -- prepare melting of predicates mkAlt (Alt e1 e2) e3 = mkAlt e1 (mkAlt e2 e3) -- associativity mkAlt e1 e2 = Alt e1 e2 mkAlts :: [XmlRegex] -> XmlRegex mkAlts = foldr mkAlt (mkZero "") mkSeq :: XmlRegex -> XmlRegex -> XmlRegex mkSeq e1@(Zero _) _e2 = e1 mkSeq _e1 e2@(Zero _) = e2 mkSeq Unit e2 = e2 mkSeq e1 Unit = e1 mkSeq (Seq e1 e2) e3 = mkSeq e1 (mkSeq e2 e3) mkSeq e1 e2 = Seq e1 e2 mkSeqs :: [XmlRegex] -> XmlRegex mkSeqs = foldr mkSeq mkUnit mkRep :: Int -> XmlRegex -> XmlRegex mkRep 0 e = mkStar e mkRep _ e@(Zero _) = e mkRep _ e@Unit = e mkRep i e = Rep i e mkRng :: Int -> Int -> XmlRegex -> XmlRegex mkRng 0 0 _e = mkUnit mkRng 1 1 e = e mkRng lb ub _e | lb > ub = Zero $ "illegal range " ++ show lb ++ ".." ++ show ub mkRng _l _u e@(Zero _) = e mkRng _l _u e@Unit = e mkRng lb ub e = Rng lb ub e mkOpt :: XmlRegex -> XmlRegex mkOpt = mkRng 0 1 mkPerm :: XmlRegex -> XmlRegex -> XmlRegex mkPerm e1@(Zero _) _ = e1 mkPerm _ e2@(Zero _) = e2 mkPerm Unit e2 = e2 mkPerm e1 Unit = e1 mkPerm e1 e2 = Perm e1 e2 mkPerms :: [XmlRegex] -> XmlRegex mkPerms = foldr mkPerm mkUnit mkMerge :: XmlRegex -> XmlRegex -> XmlRegex mkMerge e1@(Zero _) _ = e1 mkMerge _ e2@(Zero _) = e2 mkMerge Unit e2 = e2 mkMerge e1 Unit = e1 mkMerge e1 e2 = Merge e1 e2 -- ------------------------------------------------------------ instance Show XmlRegex where show (Zero s) = "{err:" ++ s ++ "}" show Unit = "()" show (Sym _p "") = "" show (Sym _p r ) = r show Dot = "." show (Star e) = "(" ++ show e ++ ")*" show (Alt e1 e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")" show (Seq e1 e2) = show e1 ++ show e2 show (Rep 1 e) = "(" ++ show e ++ ")+" show (Rep i e) = "(" ++ show e ++ "){" ++ show i ++ ",}" show (Rng 0 1 e) = "(" ++ show e ++ ")?" show (Rng i j e) = "(" ++ show e ++ "){" ++ show i ++ "," ++ show j ++ "}" show (Perm e1 e2) = "(" ++ show e1 ++ show e2 ++ "|" ++ show e2 ++ show e1 ++ ")" show (Merge e1 e2) = "(" ++ show e1 ++ "&" ++ show e2 ++ ")" -- ------------------------------------------------------------ unexpected :: XmlTree -> String -> String unexpected t e = emsg e ++ (cut 80 . xshow) [t] where emsg "" = "unexpected: " emsg s = "expected: " ++ s ++ ", but got: " cut n s | null rest = s' | otherwise = s' ++ "..." where (s', rest) = splitAt n s -- ------------------------------------------------------------ nullable :: XmlRegex -> Bool nullable (Zero _) = False nullable Unit = True nullable (Sym _p _) = False -- assumption: p holds for at least one tree nullable Dot = False nullable (Star _) = True nullable (Alt e1 e2) = nullable e1 || nullable e2 nullable (Seq e1 e2) = nullable e1 && nullable e2 nullable (Rep _i e) = nullable e nullable (Rng i _ e) = i == 0 || nullable e nullable (Perm e1 e2) = nullable e1 && nullable e2 nullable (Merge e1 e2) = nullable e1 && nullable e2 -- ------------------------------------------------------------ delta :: XmlRegex -> XmlTree -> XmlRegex delta e@(Zero _) _ = e delta Unit c = mkZero $ unexpected c "" delta (Sym p e) c | p c = mkUnit | otherwise = mkZero $ unexpected c e delta Dot _ = mkUnit delta e@(Star e1) c = mkSeq (delta e1 c) e delta (Alt e1 e2) c = mkAlt (delta e1 c) (delta e2 c) delta (Seq e1 e2) c | nullable e1 = mkAlt (mkSeq (delta e1 c) e2) (delta e2 c) | otherwise = mkSeq (delta e1 c) e2 delta (Rep i e) c = mkSeq (delta e c) (mkRep (i-1) e) delta (Rng i j e) c = mkSeq (delta e c) (mkRng ((i-1) `max` 0) (j-1) e) delta (Perm e1 e2) c = case e1' of (Zero _) -> mkPerm e1 (delta e2 c) _ -> mkPerm e1' e2 where e1' = delta e1 c delta (Merge e1 e2) c = mkAlt (mkMerge (delta e1 c) e2) (mkMerge e1 (delta e2 c)) -- ------------------------------------------------------------ delta' :: XmlRegex -> XmlTrees -> XmlRegex delta' = foldl delta -- | match a sequence of XML trees with a regular expression over trees -- -- If the input matches, the result is Nothing, else Just an error message is returned matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String matchXmlRegex e = res . delta' e where res (Zero er) = Just er res re | nullable re = Nothing -- o.k. | otherwise = Just $ "input does not match " ++ show e -- ------------------------------------------------------------ -- | split a sequence of XML trees into a pair of a a matching prefix and a rest -- -- If there is no matching prefix, Nothing is returned splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees) splitXmlRegex re = splitXmlRegex' re [] splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees) splitXmlRegex' re res [] | nullable re = Just (reverse res, []) | otherwise = Nothing splitXmlRegex' (Zero _) _ _ = Nothing splitXmlRegex' re res xs@(x:xs') | isJust res' = res' | nullable re = Just (reverse res, xs) | otherwise = Nothing where re' = delta re x res' = splitXmlRegex' re' (x:res) xs' -- ------------------------------------------------------------ -- | scan a sequence of XML trees and split it into parts matching the given regex -- -- If the parts cannot be split because of a missing match, or because of the -- empty sequence as match, Nothing is returned scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees] scanXmlRegex re ts = scanXmlRegex' re (splitXmlRegex re ts) scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees] scanXmlRegex' _ Nothing = Nothing scanXmlRegex' _ (Just (rs, [])) = Just [rs] scanXmlRegex' _ (Just ([], _)) = Nothing -- re is nullable (the empty word matches), nothing split off -- would give infinite list of empty lists scanXmlRegex' re (Just (rs, rest)) | isNothing res = Nothing | otherwise = Just (rs : fromJust res) where res = scanXmlRegex' re (splitXmlRegex re rest) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState.hs0000644000000000000000000000675412474566610017000 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the interface for the basic state maipulation functions -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState ( -- * Data Types XIOState , XIOSysState , IOStateArrow , IOSArrow , SysConfig , SysConfigList , -- * User State Manipulation getUserState , setUserState , changeUserState , withExtendedUserState , withOtherUserState , withoutUserState , -- * Run IO State arrows runX , -- * Global System State Configuration and Access configSysVars , setSysAttr , unsetSysAttr , getSysAttr , getAllSysAttrs , setSysAttrString , setSysAttrInt , getSysAttrInt , getConfigAttr , -- * Error Handling clearErrStatus , setErrStatus , getErrStatus , setErrMsgStatus , setErrorMsgHandler , errorMsgStderr , errorMsgCollect , errorMsgStderrAndCollect , errorMsgIgnore , getErrorMessages , filterErrorMsg , issueWarn , issueErr , issueFatal , issueExc , setDocumentStatus , setDocumentStatusFromSystemState , documentStatusOk , -- * Tracing setTraceLevel , getTraceLevel , withTraceLevel , setTraceCmd , getTraceCmd , trace , traceMsg , traceValue , traceString , traceSource , traceTree , traceDoc , -- * Document Base setBaseURI , getBaseURI , changeBaseURI , setDefaultBaseURI , getDefaultBaseURI , runInLocalURIContext , -- * URI Manipulation expandURIString , expandURI , mkAbsURI , getFragmentFromURI , getPathFromURI , getPortFromURI , getQueryFromURI , getRegNameFromURI , getSchemeFromURI , getUserInfoFromURI , -- * Mime Type Handling getMimeTypeTable , setMimeTypeTable , setMimeTypeTableFromFile , -- * System Configuration and Options yes , no , withAcceptedMimeTypes , withAddDefaultDTD , withSysAttr , withCanonicalize , withCompression , withCheckNamespaces , withDefaultBaseURI , withStrictDeserialize , withEncodingErrors , withErrors , withFileMimeType , withIgnoreNoneXmlContents , withIndent , withInputEncoding , withInputOption , withInputOptions , withMimeTypeFile , withMimeTypeHandler , withNoEmptyElemFor , withXmlPi , withOutputEncoding , withOutputXML , withOutputHTML , withOutputXHTML , withOutputPLAIN , withParseByMimeType , withParseHTML , withPreserveComment , withProxy , withRedirect , withRemoveWS , withShowHaskell , withShowTree , withStrictInput , withSubstDTDEntities , withSubstHTMLEntities , withTextMode , withTrace , withValidate , withWarnings ) where import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.MimeTypeTable import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow import Text.XML.HXT.Arrow.XmlState.SystemConfig import Text.XML.HXT.Arrow.XmlState.TraceHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.URIHandling -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/ErrorHandling.hs0000644000000000000000000002172612474566610021532 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.ErrorHandling Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the basic state arrows for XML processing A state is needed for global processing options, like encoding options, document base URI, trace levels and error message handling The state is separated into a user defined state and a system state. The system state contains variables for error message handling, for tracing, for the document base for accessing XML documents with relative references, e.g. DTDs, and a global key value store. This assoc list has strings as keys and lists of XmlTrees as values. It is used to store arbitrary XML and text values, e.g. user defined global options. The user defined part of the store is in the default case empty, defined as (). It can be extended with an arbitray data type -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.ErrorHandling where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import Control.Exception ( SomeException ) import Data.Maybe import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.TypeDefs import System.IO ( hPutStrLn , hFlush , stderr ) -- ------------------------------------------------------------ changeErrorStatus :: (Int -> Int -> Int) -> IOStateArrow s Int Int changeErrorStatus f = chgSysVar theErrorStatus f -- | reset global error variable clearErrStatus :: IOStateArrow s b b clearErrStatus = configSysVar $ setS theErrorStatus 0 -- | set global error variable setErrStatus :: IOStateArrow s Int Int setErrStatus = changeErrorStatus max -- | read current global error status getErrStatus :: IOStateArrow s XmlTree Int getErrStatus = getSysVar theErrorStatus -- ------------------------------------------------------------ -- | raise the global error status level to that of the input tree setErrMsgStatus :: IOStateArrow s XmlTree XmlTree setErrMsgStatus = perform ( getErrorLevel >>> setErrStatus ) -- | set the error message handler and the flag for collecting the errors setErrorMsgHandler :: Bool -> (String -> IO ()) -> IOStateArrow s b b setErrorMsgHandler c f = configSysVar $ setS (theErrorMsgCollect .&&&. theErrorMsgHandler) (c, f) -- | error message handler for output to stderr sysErrorMsg :: IOStateArrow s XmlTree XmlTree sysErrorMsg = perform ( getErrorLevel &&& getErrorMsg >>> arr formatErrorMsg >>> getSysVar theErrorMsgHandler &&& this >>> arrIO (\ (h, msg) -> h msg) ) where formatErrorMsg (level, msg) = "\n" ++ errClass level ++ ": " ++ msg errClass l = fromMaybe "fatal error" . lookup l $ msgList where msgList = [ (c_ok, "no error") , (c_warn, "warning") , (c_err, "error") , (c_fatal, "fatal error") ] -- | the default error message handler: error output to stderr errorMsgStderr :: IOStateArrow s b b errorMsgStderr = setErrorMsgHandler False (\ x -> do hPutStrLn stderr x hFlush stderr ) -- | error message handler for collecting errors errorMsgCollect :: IOStateArrow s b b errorMsgCollect = setErrorMsgHandler True (const $ return ()) -- | error message handler for output to stderr and collecting errorMsgStderrAndCollect :: IOStateArrow s b b errorMsgStderrAndCollect = setErrorMsgHandler True (hPutStrLn stderr) -- | error message handler for ignoring errors errorMsgIgnore :: IOStateArrow s b b errorMsgIgnore = setErrorMsgHandler False (const $ return ()) -- | -- if error messages are collected by the error handler for -- processing these messages by the calling application, -- this arrow reads the stored messages and clears the error message store getErrorMessages :: IOStateArrow s b XmlTree getErrorMessages = getSysVar theErrorMsgList >>> configSysVar (setS theErrorMsgList []) >>> arrL reverse addToErrorMsgList :: IOStateArrow s XmlTree XmlTree addToErrorMsgList = chgSysVar ( theErrorMsgCollect .&&&. theErrorMsgList ) ( \ e (cs, es) -> (cs, if cs then e : es else es) ) -- ------------------------------------------------------------ -- | -- filter error messages from input trees and issue errors filterErrorMsg :: IOStateArrow s XmlTree XmlTree filterErrorMsg = ( setErrMsgStatus >>> sysErrorMsg >>> addToErrorMsgList >>> none ) `when` isError -- | generate a warnig message issueWarn :: String -> IOStateArrow s b b issueWarn msg = perform (warn msg >>> filterErrorMsg) -- | generate an error message issueErr :: String -> IOStateArrow s b b issueErr msg = perform (err msg >>> filterErrorMsg) -- | generate a fatal error message, e.g. document not found issueFatal :: String -> IOStateArrow s b b issueFatal msg = perform (fatal msg >>> filterErrorMsg) -- | Default exception handler: issue a fatal error message and fail. -- -- The parameter can be used to specify where the error occured issueExc :: String -> IOStateArrow s SomeException b issueExc m = ( issueFatal $< arr ((msg ++) . show) ) >>> none where msg | null m = "Exception: " | otherwise = "Exception in " ++ m ++ ": " -- | -- add the error level and the module where the error occured -- to the attributes of a document root node and remove the children when level is greater or equal to 'c_err'. -- called by 'setDocumentStatusFromSystemState' when the system state indicates an error setDocumentStatus :: Int -> String -> IOStateArrow s XmlTree XmlTree setDocumentStatus level msg = ( addAttrl ( sattr a_status (show level) <+> sattr a_module msg ) >>> ( if level >= c_err then setChildren [] else this ) ) `when` isRoot -- | -- check whether the error level attribute in the system state -- is set to error, in this case the children of the document root are -- removed and the module name where the error occured and the error level are added as attributes with 'setDocumentStatus' -- else nothing is changed setDocumentStatusFromSystemState :: String -> IOStateArrow s XmlTree XmlTree setDocumentStatusFromSystemState msg = setStatus $< getErrStatus where setStatus level | level <= c_warn = this | otherwise = setDocumentStatus level msg -- | -- check whether tree is a document root and the status attribute has a value less than 'c_err' documentStatusOk :: ArrowXml a => a XmlTree XmlTree documentStatusOk = isRoot >>> ( (getAttrValue a_status >>> isA (\ v -> null v || ((read v)::Int) <= c_warn) ) `guards` this ) -- ------------------------------------------------------------ errorOutputToStderr :: String -> IO () errorOutputToStderr msg = do hPutStrLn stderr msg hFlush stderr -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/MimeTypeTable.hs0000644000000000000000000000431712474566610021472 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.MimeTypeTable Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the mime type configuration functions -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.MimeTypeTable where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIO import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- | set the table mapping of file extensions to mime types in the system state -- -- Default table is defined in 'Text.XML.HXT.DOM.MimeTypeDefaults'. -- This table is used when reading loacl files, (file: protocol) to determine the mime type setMimeTypeTable :: MimeTypeTable -> IOStateArrow s b b setMimeTypeTable mtt = configSysVar $ setS (theMimeTypes .&&&. theMimeTypeFile) (mtt, "") -- | set the table mapping of file extensions to mime types by an external config file -- -- The config file must follow the conventions of /etc/mime.types on a debian linux system, -- that means all empty lines and all lines starting with a # are ignored. The other lines -- must consist of a mime type followed by a possible empty list of extensions. -- The list of extenstions and mime types overwrites the default list in the system state -- of the IOStateArrow setMimeTypeTableFromFile :: FilePath -> IOStateArrow s b b setMimeTypeTableFromFile file = configSysVar $ setS theMimeTypeFile file -- | read the system mimetype table getMimeTypeTable :: IOStateArrow s b MimeTypeTable getMimeTypeTable = getMime $< getSysVar (theMimeTypes .&&&. theMimeTypeFile) where getMime (mtt, "") = constA mtt getMime (_, mtf) = perform (setMimeTypeTable $< arrIO0 ( readMimeTypeTable mtf)) >>> getMimeTypeTable -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/RunIOStateArrow.hs0000644000000000000000000002716412474566610022006 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.RunIOStateArrow Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable run an io state arrow -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.RunIOStateArrow where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.IOStateListArrow import Data.Map ( empty ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TraceHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- | -- apply an 'IOSArrow' to an empty root node with 'initialState' () as initial state -- -- the main entry point for running a state arrow with IO -- -- when running @ runX f@ an empty XML root node is applied to @f@. -- usually @f@ will start with a constant arrow (ignoring the input), e.g. a 'Text.XML.HXT.Arrow.ReadDocument.readDocument' arrow. -- -- for usage see examples with 'Text.XML.HXT.Arrow.WriteDocument.writeDocument' -- -- if input has to be feed into the arrow use 'Control.Arrow.IOStateListArrow.runIOSLA' like in @ runIOSLA f emptyX inputDoc @ runX :: IOSArrow XmlTree c -> IO [c] runX = runXIOState (initialState ()) runXIOState :: XIOState s -> IOStateArrow s XmlTree c -> IO [c] runXIOState s0 f = do (_finalState, res) <- runIOSLA (emptyRoot >>> f) s0 undefined return res where emptyRoot = root [] [] -- | the default global state, used as initial state when running an 'IOSArrow' with 'runIOSLA' or -- 'runX' initialState :: us -> XIOState us initialState s = XIOState { xioSysState = initialSysState , xioUserState = s } -- ------------------------------------------------------------ initialSysState :: XIOSysState initialSysState = XIOSys { xioSysWriter = initialSysWriter , xioSysEnv = initialSysEnv } initialSysWriter :: XIOSysWriter initialSysWriter = XIOwrt { xioErrorStatus = c_ok , xioErrorMsgList = [] , xioExpatErrors = none , xioRelaxNoOfErrors = 0 , xioRelaxDefineId = 0 , xioRelaxAttrList = [] } initialSysEnv :: XIOSysEnv initialSysEnv = XIOEnv { xioTraceLevel = 0 , xioTraceCmd = traceOutputToStderr , xioErrorMsgHandler = errorOutputToStderr , xioErrorMsgCollect = False , xioBaseURI = "" , xioDefaultBaseURI = "" , xioAttrList = [] , xioInputConfig = initialInputConfig , xioParseConfig = initialParseConfig , xioOutputConfig = initialOutputConfig , xioRelaxConfig = initialRelaxConfig , xioXmlSchemaConfig = initialXmlSchemaConfig , xioCacheConfig = initialCacheConfig } initialInputConfig :: XIOInputConfig initialInputConfig = XIOIcgf { xioStrictInput = False , xioEncodingErrors = True , xioInputEncoding = "" , xioHttpHandler = dummyHTTPHandler , xioInputOptions = [] , xioRedirect = False , xioProxy = "" } initialParseConfig :: XIOParseConfig initialParseConfig = XIOPcfg { xioMimeTypes = defaultMimeTypeTable , xioMimeTypeHandlers = empty , xioMimeTypeFile = "" , xioAcceptedMimeTypes = [] , xioFileMimeType = "" , xioWarnings = True , xioRemoveWS = False , xioParseByMimeType = False , xioParseHTML = False , xioLowerCaseNames = False , xioTagSoup = False , xioPreserveComment = False , xioValidate = True , xioSubstDTDEntities = True , xioSubstHTMLEntities = False , xioCheckNamespaces = False , xioCanonicalize = True , xioIgnoreNoneXmlContents = False , xioTagSoupParser = dummyTagSoupParser , xioExpat = False , xioExpatParser = dummyExpatParser } initialOutputConfig :: XIOOutputConfig initialOutputConfig = XIOOcfg { xioIndent = False , xioOutputEncoding = "" , xioOutputFmt = XMLoutput , xioXmlPi = True , xioNoEmptyElemFor = [] , xioAddDefaultDTD = False , xioTextMode = False , xioShowTree = False , xioShowHaskell = False } initialRelaxConfig :: XIORelaxConfig initialRelaxConfig = XIORxc { xioRelaxValidate = False , xioRelaxSchema = "" , xioRelaxCheckRestr = True , xioRelaxValidateExtRef = True , xioRelaxValidateInclude = True , xioRelaxCollectErrors = True , xioRelaxValidator = dummyRelaxValidator } initialXmlSchemaConfig :: XIOXmlSchemaConfig initialXmlSchemaConfig = XIOScc { xioXmlSchemaValidate = False , xioXmlSchemaSchema = "" , xioXmlSchemaValidator = dummyXmlSchemaValidator } initialCacheConfig :: XIOCacheConfig initialCacheConfig = XIOCch { xioBinaryCompression = id , xioBinaryDeCompression = id , xioWithCache = False , xioCacheDir = "" , xioDocumentAge = 0 , xioCache404Err = False , xioCacheRead = dummyCacheRead , xioStrictDeserialize = False } -- ------------------------------------------------------------ dummyHTTPHandler :: IOSArrow XmlTree XmlTree dummyHTTPHandler = ( issueFatal $ unlines $ [ "HTTP handler not configured," , "please install package hxt-curl and use 'withCurl' config option" , "or install package hxt-http and use 'withHTTP' config option" ] ) >>> addAttr transferMessage "HTTP handler not configured" >>> addAttr transferStatus "999" dummyTagSoupParser :: IOSArrow b b dummyTagSoupParser = issueFatal $ unlines $ [ "TagSoup parser not configured," , "please install package hxt-tagsoup" , " and use 'withTagSoup' parser config option from this package" ] dummyExpatParser :: IOSArrow b b dummyExpatParser = issueFatal $ unlines $ [ "Expat parser not configured," , "please install package hxt-expat" , " and use 'withExpat' parser config option from this package" ] dummyRelaxValidator :: IOSArrow b b dummyRelaxValidator = issueFatal $ unlines $ [ "RelaxNG validator not configured," , "please install package hxt-relaxng" , " and use 'withRelaxNG' config option from this package" ] dummyXmlSchemaValidator :: IOSArrow b b dummyXmlSchemaValidator = issueFatal $ unlines $ [ "XML Schema validator not configured," , "please install package hxt-xmlschema" , " and use 'withXmlSchema' config option from this package" ] dummyCacheRead :: String -> IOSArrow b b dummyCacheRead = const $ issueFatal $ unlines $ [ "Document cache not configured," , "please install package hxt-cache and use 'withCache' config option" ] -- ------------------------------------------------------------ getConfigAttr :: String -> SysConfigList -> String getConfigAttr n c = lookup1 n $ tl where s = (foldr (>>>) id c) initialSysState tl = getS theAttrList s -- ---------------------------------------- theSysConfigComp :: Selector XIOSysState a -> Selector SysConfig a theSysConfigComp sel = S { getS = \ cf -> getS sel (cf initialSysState) , setS = \ val cf -> setS sel val . cf } -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/TraceHandling.hs0000644000000000000000000001240712474566610021473 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.TraceHandling Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the trace arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.TraceHandling where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import System.IO ( hPutStrLn , hFlush , stderr ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.SystemConfig import Text.XML.HXT.Arrow.Edit ( addHeadlineToXmlDoc , treeRepOfXmlDoc , indentDoc ) -- ------------------------------------------------------------ -- | set the global trace level setTraceLevel :: Int -> IOStateArrow s b b setTraceLevel l = configSysVar $ withTrace l -- | read the global trace level getTraceLevel :: IOStateArrow s b Int getTraceLevel = getSysVar theTraceLevel -- | set the global trace command. This command does the trace output setTraceCmd :: (Int -> String -> IO ()) -> IOStateArrow s b b setTraceCmd c = configSysVar $ setS theTraceCmd c -- | acces the command for trace output getTraceCmd :: IOStateArrow a b (Int -> String -> IO ()) getTraceCmd = getSysVar theTraceCmd -- | run an arrow with a given trace level, the old trace level is restored after the arrow execution withTraceLevel :: Int -> IOStateArrow s b c -> IOStateArrow s b c withTraceLevel level f = localSysEnv $ setTraceLevel level >>> f -- | apply a trace arrow and issue message to stderr trace :: Int -> IOStateArrow s b String -> IOStateArrow s b b trace level trc = perform ( trc >>> ( getTraceCmd &&& this ) >>> arrIO (\ (cmd, msg) -> cmd level msg) ) `when` ( getTraceLevel >>> isA (>= level) ) -- | trace the current value transfered in a sequence of arrows. -- -- The value is formated by a string conversion function. This is a substitute for -- the old and less general traceString function traceValue :: Int -> (b -> String) -> IOStateArrow s b b traceValue level trc = trace level (arr $ (('-' : "- (" ++ show level ++ ") ") ++) . trc) -- | an old alias for 'traceValue' traceString :: Int -> (b -> String) -> IOStateArrow s b b traceString = traceValue -- | issue a string message as trace traceMsg :: Int -> String -> IOStateArrow s b b traceMsg level msg = traceValue level (const msg) -- | issue the source representation of a document if trace level >= 3 -- -- for better readability the source is formated with indentDoc traceSource :: IOStateArrow s XmlTree XmlTree traceSource = trace 3 $ xshow $ choiceA [ isRoot :-> ( indentDoc >>> getChildren ) , isElem :-> ( root [] [this] >>> indentDoc >>> getChildren >>> isElem ) , this :-> this ] -- | issue the tree representation of a document if trace level >= 4 traceTree :: IOStateArrow s XmlTree XmlTree traceTree = trace 4 $ xshow $ treeRepOfXmlDoc >>> addHeadlineToXmlDoc >>> getChildren -- | trace a main computation step -- issue a message when trace level >= 1, issue document source if level >= 3, issue tree when level is >= 4 traceDoc :: String -> IOStateArrow s XmlTree XmlTree traceDoc msg = traceMsg 1 msg >>> traceSource >>> traceTree -- ---------------------------------------------------------- traceOutputToStderr :: Int -> String -> IO () traceOutputToStderr _level msg = do hPutStrLn stderr msg hFlush stderr -- ---------------------------------------------------------- hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/TypeDefs.hs0000644000000000000000000011777612474566610020532 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.TypeDefs Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the basic state arrows for XML processing A state is needed for global processing options, like encoding options, document base URI, trace levels and error message handling The state is separated into a user defined state and a system state. The system state contains variables for error message handling, for tracing, for the document base for accessing XML documents with relative references, e.g. DTDs, and a global key value store. This assoc list has strings as keys and lists of XmlTrees as values. It is used to store arbitrary XML and text values, e.g. user defined global options. The user defined part of the store is in the default case empty, defined as (). It can be extended with an arbitray data type -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.TypeDefs ( module Text.XML.HXT.Arrow.XmlState.TypeDefs , Selector(..) , chgS , idS , (.&&&.) ) where import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.IOStateListArrow import Control.DeepSeq import Data.ByteString.Lazy (ByteString) import Data.Char (isDigit) import Data.Function.Selector (Selector (..), chgS, idS, (.&&&.)) import qualified Data.Map as M import Text.XML.HXT.DOM.Interface -- ------------------------------------------------------------ {- datatypes -} -- | -- state datatype consists of a system state and a user state -- the user state is not fixed data XIOState us = XIOState { xioSysState :: ! XIOSysState , xioUserState :: ! us } instance (NFData us) => NFData (XIOState us) where rnf (XIOState sys usr) = rnf sys `seq` rnf usr -- | -- The arrow type for stateful arrows type IOStateArrow s b c = IOSLA (XIOState s) b c -- | -- The arrow for stateful arrows with no user defined state type IOSArrow b c = IOStateArrow () b c -- ------------------------------------------------------------ -- user state functions -- | read the user defined part of the state getUserState :: IOStateArrow s b s getUserState = IOSLA $ \ s _ -> return (s, [xioUserState s]) -- | change the user defined part of the state changeUserState :: (b -> s -> s) -> IOStateArrow s b b changeUserState cf = IOSLA $ \ s v -> let s' = s { xioUserState = cf v (xioUserState s) } in return (s', [v]) -- | set the user defined part of the state setUserState :: IOStateArrow s s s setUserState = changeUserState const -- | extend user state -- -- Run an arrow with an extended user state component, The old component -- is stored together with a new one in a pair, the arrow is executed with this -- extended state, and the augmented state component is removed form the state -- when the arrow has finished its execution withExtendedUserState :: s1 -> IOStateArrow (s1, s0) b c -> IOStateArrow s0 b c withExtendedUserState initS1 f = IOSLA $ \ s0 x -> do ~(finalS, res) <- runIOSLA f ( XIOState { xioSysState = xioSysState s0 , xioUserState = (initS1, xioUserState s0) } ) x return ( XIOState { xioSysState = xioSysState finalS , xioUserState = snd (xioUserState finalS) } , res ) -- | change the type of user state -- -- This conversion is useful, when running a state arrow with another -- structure of the user state, e.g. with () when executing some IO arrows withOtherUserState :: s1 -> IOStateArrow s1 b c -> IOStateArrow s0 b c withOtherUserState s1 f = IOSLA $ \ s x -> do (s', res) <- runIOSLA f ( XIOState { xioSysState = xioSysState s , xioUserState = s1 } ) x return ( XIOState { xioSysState = xioSysState s' , xioUserState = xioUserState s } , res ) withoutUserState :: IOSArrow b c -> IOStateArrow s0 b c withoutUserState = withOtherUserState () -- ------------------------------------------------------------ -- system state structure and acces functions -- | -- predefined system state data type with all components for the -- system functions, like trace, error handling, ... data XIOSysState = XIOSys { xioSysWriter :: ! XIOSysWriter , xioSysEnv :: ! XIOSysEnv } instance NFData XIOSysState where rnf x = seq x () -- all fields of interest are strict data XIOSysWriter = XIOwrt { xioErrorStatus :: ! Int , xioErrorMsgList :: ! XmlTrees , xioExpatErrors :: IOSArrow XmlTree XmlTree , xioRelaxNoOfErrors :: ! Int , xioRelaxDefineId :: ! Int , xioRelaxAttrList :: AssocList String XmlTrees } data XIOSysEnv = XIOEnv { xioTraceLevel :: ! Int , xioTraceCmd :: Int -> String -> IO () , xioErrorMsgHandler :: String -> IO () , xioErrorMsgCollect :: ! Bool , xioBaseURI :: ! String , xioDefaultBaseURI :: ! String , xioAttrList :: ! Attributes , xioInputConfig :: ! XIOInputConfig , xioParseConfig :: ! XIOParseConfig , xioOutputConfig :: ! XIOOutputConfig , xioRelaxConfig :: ! XIORelaxConfig , xioXmlSchemaConfig :: ! XIOXmlSchemaConfig , xioCacheConfig :: ! XIOCacheConfig } data XIOInputConfig = XIOIcgf { xioStrictInput :: ! Bool , xioEncodingErrors :: ! Bool , xioInputEncoding :: String , xioHttpHandler :: IOSArrow XmlTree XmlTree , xioInputOptions :: ! Attributes , xioRedirect :: ! Bool , xioProxy :: String } data XIOParseConfig = XIOPcfg { xioMimeTypes :: MimeTypeTable , xioMimeTypeHandlers :: MimeTypeHandlers , xioMimeTypeFile :: String , xioAcceptedMimeTypes :: [String] , xioFileMimeType :: String , xioWarnings :: ! Bool , xioRemoveWS :: ! Bool , xioParseByMimeType :: ! Bool , xioParseHTML :: ! Bool , xioLowerCaseNames :: ! Bool , xioPreserveComment :: ! Bool , xioValidate :: ! Bool , xioSubstDTDEntities :: ! Bool , xioSubstHTMLEntities :: ! Bool , xioCheckNamespaces :: ! Bool , xioCanonicalize :: ! Bool , xioIgnoreNoneXmlContents :: ! Bool , xioTagSoup :: ! Bool , xioTagSoupParser :: IOSArrow XmlTree XmlTree , xioExpat :: ! Bool , xioExpatParser :: IOSArrow XmlTree XmlTree } data XIOOutputConfig = XIOOcfg { xioIndent :: ! Bool , xioOutputEncoding :: ! String , xioOutputFmt :: ! XIOXoutConfig , xioXmlPi :: ! Bool , xioNoEmptyElemFor :: ! [String] , xioAddDefaultDTD :: ! Bool , xioTextMode :: ! Bool , xioShowTree :: ! Bool , xioShowHaskell :: ! Bool } data XIOXoutConfig = XMLoutput | XHTMLoutput | HTMLoutput | PLAINoutput deriving (Eq) data XIORelaxConfig = XIORxc { xioRelaxValidate :: ! Bool , xioRelaxSchema :: String , xioRelaxCheckRestr :: ! Bool , xioRelaxValidateExtRef :: ! Bool , xioRelaxValidateInclude :: ! Bool , xioRelaxCollectErrors :: ! Bool , xioRelaxValidator :: IOSArrow XmlTree XmlTree } data XIOXmlSchemaConfig = XIOScc { xioXmlSchemaValidate :: ! Bool , xioXmlSchemaSchema :: String , xioXmlSchemaValidator :: IOSArrow XmlTree XmlTree } data XIOCacheConfig = XIOCch { xioBinaryCompression :: CompressionFct , xioBinaryDeCompression :: DeCompressionFct , xioWithCache :: ! Bool , xioCacheDir :: ! String , xioDocumentAge :: ! Int , xioCache404Err :: ! Bool , xioCacheRead :: String -> IOSArrow XmlTree XmlTree , xioStrictDeserialize :: ! Bool } type MimeTypeHandlers = M.Map String (IOSArrow XmlTree XmlTree) type CompressionFct = ByteString -> ByteString type DeCompressionFct = ByteString -> ByteString type SysConfig = XIOSysState -> XIOSysState type SysConfigList = [SysConfig] -- ---------------------------------------- theSysState :: Selector (XIOState us) XIOSysState theSysState = S { getS = xioSysState , setS = \ x s -> s { xioSysState = x} } theUserState :: Selector (XIOState us) us theUserState = S { getS = xioUserState , setS = \ x s -> s { xioUserState = x} } -- ---------------------------------------- theSysWriter :: Selector XIOSysState XIOSysWriter theSysWriter = S { getS = xioSysWriter , setS = \ x s -> s { xioSysWriter = x} } theErrorStatus :: Selector XIOSysState Int theErrorStatus = theSysWriter >>> S { getS = xioErrorStatus , setS = \ x s -> s { xioErrorStatus = x } } theErrorMsgList :: Selector XIOSysState XmlTrees theErrorMsgList = theSysWriter >>> S { getS = xioErrorMsgList , setS = \ x s -> s { xioErrorMsgList = x } } theRelaxNoOfErrors :: Selector XIOSysState Int theRelaxNoOfErrors = theSysWriter >>> S { getS = xioRelaxNoOfErrors , setS = \ x s -> s { xioRelaxNoOfErrors = x} } theRelaxDefineId :: Selector XIOSysState Int theRelaxDefineId = theSysWriter >>> S { getS = xioRelaxDefineId , setS = \ x s -> s { xioRelaxDefineId = x} } theRelaxAttrList :: Selector XIOSysState (AssocList String XmlTrees) theRelaxAttrList = theSysWriter >>> S { getS = xioRelaxAttrList , setS = \ x s -> s { xioRelaxAttrList = x} } -- ---------------------------------------- theSysEnv :: Selector XIOSysState XIOSysEnv theSysEnv = S { getS = xioSysEnv , setS = \ x s -> s { xioSysEnv = x} } theInputConfig :: Selector XIOSysState XIOInputConfig theInputConfig = theSysEnv >>> S { getS = xioInputConfig , setS = \ x s -> s { xioInputConfig = x} } theStrictInput :: Selector XIOSysState Bool theStrictInput = theInputConfig >>> S { getS = xioStrictInput , setS = \ x s -> s { xioStrictInput = x} } theEncodingErrors :: Selector XIOSysState Bool theEncodingErrors = theInputConfig >>> S { getS = xioEncodingErrors , setS = \ x s -> s { xioEncodingErrors = x} } theInputEncoding :: Selector XIOSysState String theInputEncoding = theInputConfig >>> S { getS = xioInputEncoding , setS = \ x s -> s { xioInputEncoding = x} } theHttpHandler :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theHttpHandler = theInputConfig >>> S { getS = xioHttpHandler , setS = \ x s -> s { xioHttpHandler = x} } theInputOptions :: Selector XIOSysState Attributes theInputOptions = theInputConfig >>> S { getS = xioInputOptions , setS = \ x s -> s { xioInputOptions = x} } theRedirect :: Selector XIOSysState Bool theRedirect = theInputConfig >>> S { getS = xioRedirect , setS = \ x s -> s { xioRedirect = x} } theProxy :: Selector XIOSysState String theProxy = theInputConfig >>> S { getS = xioProxy , setS = \ x s -> s { xioProxy = x} } -- ---------------------------------------- theOutputConfig :: Selector XIOSysState XIOOutputConfig theOutputConfig = theSysEnv >>> S { getS = xioOutputConfig , setS = \ x s -> s { xioOutputConfig = x} } theIndent :: Selector XIOSysState Bool theIndent = theOutputConfig >>> S { getS = xioIndent , setS = \ x s -> s { xioIndent = x} } theOutputEncoding :: Selector XIOSysState String theOutputEncoding = theOutputConfig >>> S { getS = xioOutputEncoding , setS = \ x s -> s { xioOutputEncoding = x} } theOutputFmt :: Selector XIOSysState XIOXoutConfig theOutputFmt = theOutputConfig >>> S { getS = xioOutputFmt , setS = \ x s -> s { xioOutputFmt = x} } theXmlPi :: Selector XIOSysState Bool theXmlPi = theOutputConfig >>> S { getS = xioXmlPi , setS = \ x s -> s { xioXmlPi = x} } theNoEmptyElemFor :: Selector XIOSysState [String] theNoEmptyElemFor = theOutputConfig >>> S { getS = xioNoEmptyElemFor , setS = \ x s -> s { xioNoEmptyElemFor = x} } theAddDefaultDTD :: Selector XIOSysState Bool theAddDefaultDTD = theOutputConfig >>> S { getS = xioAddDefaultDTD , setS = \ x s -> s { xioAddDefaultDTD = x} } theTextMode :: Selector XIOSysState Bool theTextMode = theOutputConfig >>> S { getS = xioTextMode , setS = \ x s -> s { xioTextMode = x} } theShowTree :: Selector XIOSysState Bool theShowTree = theOutputConfig >>> S { getS = xioShowTree , setS = \ x s -> s { xioShowTree = x} } theShowHaskell :: Selector XIOSysState Bool theShowHaskell = theOutputConfig >>> S { getS = xioShowHaskell , setS = \ x s -> s { xioShowHaskell = x} } -- ---------------------------------------- theRelaxConfig :: Selector XIOSysState XIORelaxConfig theRelaxConfig = theSysEnv >>> S { getS = xioRelaxConfig , setS = \ x s -> s { xioRelaxConfig = x} } theRelaxValidate :: Selector XIOSysState Bool theRelaxValidate = theRelaxConfig >>> S { getS = xioRelaxValidate , setS = \ x s -> s { xioRelaxValidate = x} } theRelaxSchema :: Selector XIOSysState String theRelaxSchema = theRelaxConfig >>> S { getS = xioRelaxSchema , setS = \ x s -> s { xioRelaxSchema = x} } theRelaxCheckRestr :: Selector XIOSysState Bool theRelaxCheckRestr = theRelaxConfig >>> S { getS = xioRelaxCheckRestr , setS = \ x s -> s { xioRelaxCheckRestr = x} } theRelaxValidateExtRef :: Selector XIOSysState Bool theRelaxValidateExtRef = theRelaxConfig >>> S { getS = xioRelaxValidateExtRef , setS = \ x s -> s { xioRelaxValidateExtRef = x} } theRelaxValidateInclude :: Selector XIOSysState Bool theRelaxValidateInclude = theRelaxConfig >>> S { getS = xioRelaxValidateInclude , setS = \ x s -> s { xioRelaxValidateInclude = x} } theRelaxCollectErrors :: Selector XIOSysState Bool theRelaxCollectErrors = theRelaxConfig >>> S { getS = xioRelaxCollectErrors , setS = \ x s -> s { xioRelaxCollectErrors = x} } theRelaxValidator :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theRelaxValidator = theRelaxConfig >>> S { getS = xioRelaxValidator , setS = \ x s -> s { xioRelaxValidator = x} } -- ---------------------------------------- theXmlSchemaConfig :: Selector XIOSysState XIOXmlSchemaConfig theXmlSchemaConfig = theSysEnv >>> S { getS = xioXmlSchemaConfig , setS = \ x s -> s { xioXmlSchemaConfig = x} } theXmlSchemaValidate :: Selector XIOSysState Bool theXmlSchemaValidate = theXmlSchemaConfig >>> S { getS = xioXmlSchemaValidate , setS = \ x s -> s { xioXmlSchemaValidate = x} } theXmlSchemaSchema :: Selector XIOSysState String theXmlSchemaSchema = theXmlSchemaConfig >>> S { getS = xioXmlSchemaSchema , setS = \ x s -> s { xioXmlSchemaSchema = x} } theXmlSchemaValidator :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theXmlSchemaValidator = theXmlSchemaConfig >>> S { getS = xioXmlSchemaValidator , setS = \ x s -> s { xioXmlSchemaValidator = x} } -- ---------------------------------------- theParseConfig :: Selector XIOSysState XIOParseConfig theParseConfig = theSysEnv >>> S { getS = xioParseConfig , setS = \ x s -> s { xioParseConfig = x} } theErrorMsgHandler :: Selector XIOSysState (String -> IO ()) theErrorMsgHandler = theSysEnv >>> S { getS = xioErrorMsgHandler , setS = \ x s -> s { xioErrorMsgHandler = x } } theErrorMsgCollect :: Selector XIOSysState Bool theErrorMsgCollect = theSysEnv >>> S { getS = xioErrorMsgCollect , setS = \ x s -> s { xioErrorMsgCollect = x } } theBaseURI :: Selector XIOSysState String theBaseURI = theSysEnv >>> S { getS = xioBaseURI , setS = \ x s -> s { xioBaseURI = x } } theDefaultBaseURI :: Selector XIOSysState String theDefaultBaseURI = theSysEnv >>> S { getS = xioDefaultBaseURI , setS = \ x s -> s { xioDefaultBaseURI = x } } theTraceLevel :: Selector XIOSysState Int theTraceLevel = theSysEnv >>> S { getS = xioTraceLevel , setS = \ x s -> s { xioTraceLevel = x } } theTraceCmd :: Selector XIOSysState (Int -> String -> IO ()) theTraceCmd = theSysEnv >>> S { getS = xioTraceCmd , setS = \ x s -> s { xioTraceCmd = x } } theTrace :: Selector XIOSysState (Int, Int -> String -> IO ()) theTrace = theTraceLevel .&&&. theTraceCmd theAttrList :: Selector XIOSysState Attributes theAttrList = theSysEnv >>> S { getS = xioAttrList , setS = \ x s -> s { xioAttrList = x } } theMimeTypes :: Selector XIOSysState MimeTypeTable theMimeTypes = theParseConfig >>> S { getS = xioMimeTypes , setS = \ x s -> s { xioMimeTypes = x } } theMimeTypeHandlers :: Selector XIOSysState MimeTypeHandlers theMimeTypeHandlers = theParseConfig >>> S { getS = xioMimeTypeHandlers , setS = \ x s -> s { xioMimeTypeHandlers = x } } theMimeTypeFile :: Selector XIOSysState String theMimeTypeFile = theParseConfig >>> S { getS = xioMimeTypeFile , setS = \ x s -> s { xioMimeTypeFile = x } } theAcceptedMimeTypes :: Selector XIOSysState [String] theAcceptedMimeTypes = theParseConfig >>> S { getS = xioAcceptedMimeTypes , setS = \ x s -> s { xioAcceptedMimeTypes = x } } theFileMimeType :: Selector XIOSysState String theFileMimeType = theParseConfig >>> S { getS = xioFileMimeType , setS = \ x s -> s { xioFileMimeType = x } } theWarnings :: Selector XIOSysState Bool theWarnings = theParseConfig >>> S { getS = xioWarnings , setS = \ x s -> s { xioWarnings = x } } theRemoveWS :: Selector XIOSysState Bool theRemoveWS = theParseConfig >>> S { getS = xioRemoveWS , setS = \ x s -> s { xioRemoveWS = x } } thePreserveComment :: Selector XIOSysState Bool thePreserveComment = theParseConfig >>> S { getS = xioPreserveComment , setS = \ x s -> s { xioPreserveComment = x } } theParseByMimeType :: Selector XIOSysState Bool theParseByMimeType = theParseConfig >>> S { getS = xioParseByMimeType , setS = \ x s -> s { xioParseByMimeType = x } } theParseHTML :: Selector XIOSysState Bool theParseHTML = theParseConfig >>> S { getS = xioParseHTML , setS = \ x s -> s { xioParseHTML = x } } theLowerCaseNames :: Selector XIOSysState Bool theLowerCaseNames = theParseConfig >>> S { getS = xioLowerCaseNames , setS = \ x s -> s { xioLowerCaseNames = x } } theValidate :: Selector XIOSysState Bool theValidate = theParseConfig >>> S { getS = xioValidate , setS = \ x s -> s { xioValidate = x } } theSubstDTDEntities :: Selector XIOSysState Bool theSubstDTDEntities = theParseConfig >>> S { getS = xioSubstDTDEntities , setS = \ x s -> s { xioSubstDTDEntities = x } } theSubstHTMLEntities :: Selector XIOSysState Bool theSubstHTMLEntities = theParseConfig >>> S { getS = xioSubstHTMLEntities , setS = \ x s -> s { xioSubstHTMLEntities = x } } theCheckNamespaces :: Selector XIOSysState Bool theCheckNamespaces = theParseConfig >>> S { getS = xioCheckNamespaces , setS = \ x s -> s { xioCheckNamespaces = x } } theCanonicalize :: Selector XIOSysState Bool theCanonicalize = theParseConfig >>> S { getS = xioCanonicalize , setS = \ x s -> s { xioCanonicalize = x } } theIgnoreNoneXmlContents :: Selector XIOSysState Bool theIgnoreNoneXmlContents = theParseConfig >>> S { getS = xioIgnoreNoneXmlContents , setS = \ x s -> s { xioIgnoreNoneXmlContents = x } } theTagSoup :: Selector XIOSysState Bool theTagSoup = theParseConfig >>> S { getS = xioTagSoup , setS = \ x s -> s { xioTagSoup = x } } theTagSoupParser :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theTagSoupParser = theParseConfig >>> S { getS = xioTagSoupParser , setS = \ x s -> s { xioTagSoupParser = x } } theExpat :: Selector XIOSysState Bool theExpat = theParseConfig >>> S { getS = xioExpat , setS = \ x s -> s { xioExpat = x } } theExpatParser :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theExpatParser = theParseConfig >>> S { getS = xioExpatParser , setS = \ x s -> s { xioExpatParser = x } } theExpatErrors :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theExpatErrors = theSysWriter >>> S { getS = xioExpatErrors , setS = \ x s -> s { xioExpatErrors = x } } -- ---------------------------------------- theCacheConfig :: Selector XIOSysState XIOCacheConfig theCacheConfig = theSysEnv >>> S { getS = xioCacheConfig , setS = \ x s -> s { xioCacheConfig = x} } theBinaryCompression :: Selector XIOSysState (ByteString -> ByteString) theBinaryCompression = theCacheConfig >>> S { getS = xioBinaryCompression , setS = \ x s -> s { xioBinaryCompression = x} } theBinaryDeCompression :: Selector XIOSysState (ByteString -> ByteString) theBinaryDeCompression = theCacheConfig >>> S { getS = xioBinaryDeCompression , setS = \ x s -> s { xioBinaryDeCompression = x} } theWithCache :: Selector XIOSysState Bool theWithCache = theCacheConfig >>> S { getS = xioWithCache , setS = \ x s -> s { xioWithCache = x} } theCacheDir :: Selector XIOSysState String theCacheDir = theCacheConfig >>> S { getS = xioCacheDir , setS = \ x s -> s { xioCacheDir = x} } theDocumentAge :: Selector XIOSysState Int theDocumentAge = theCacheConfig >>> S { getS = xioDocumentAge , setS = \ x s -> s { xioDocumentAge = x} } theCache404Err :: Selector XIOSysState Bool theCache404Err = theCacheConfig >>> S { getS = xioCache404Err , setS = \ x s -> s { xioCache404Err = x} } theCacheRead :: Selector XIOSysState (String -> IOSArrow XmlTree XmlTree) theCacheRead = theCacheConfig >>> S { getS = xioCacheRead , setS = \ x s -> s { xioCacheRead = x} } theStrictDeserialize :: Selector XIOSysState Bool theStrictDeserialize = theCacheConfig >>> S { getS = xioStrictDeserialize , setS = \ x s -> s { xioStrictDeserialize = x} } -- ------------------------------------------------------------ getSysVar :: Selector XIOSysState c -> IOStateArrow s b c getSysVar sel = IOSLA $ \ s _x -> return (s, (:[]) . getS (theSysState >>> sel) $ s) setSysVar :: Selector XIOSysState c -> IOStateArrow s c c setSysVar sel = (\ v -> configSysVar $ setS sel v) $< this chgSysVar :: Selector XIOSysState c -> (b -> c -> c) -> IOStateArrow s b b chgSysVar sel op = (\ v -> configSysVar $ chgS sel (op v)) $< this configSysVar :: SysConfig -> IOStateArrow s c c configSysVar cf = IOSLA $ \ s v -> return (chgS theSysState cf s, [v]) configSysVars :: SysConfigList -> IOStateArrow s c c configSysVars cfs = configSysVar $ foldr (>>>) id $ cfs localSysVar :: Selector XIOSysState c -> IOStateArrow s a b -> IOStateArrow s a b localSysVar sel f = IOSLA $ \ s0 v -> let sel' = theSysState >>> sel in let c0 = getS sel' s0 in do (s1, res) <- runIOSLA f s0 v return (setS sel' c0 s1, res) localSysEnv :: IOStateArrow s a b -> IOStateArrow s a b localSysEnv = localSysVar theSysEnv incrSysVar :: Selector XIOSysState Int -> IOStateArrow s a Int incrSysVar cnt = getSysVar cnt >>> arr (+1) >>> setSysVar cnt >>> arr (\ x -> x - 1) -- ------------------------------ -- | store a string in global state under a given attribute name setSysAttr :: String -> IOStateArrow s String String setSysAttr n = chgSysVar theAttrList (addEntry n) -- | remove an entry in global state, arrow input remains unchanged unsetSysAttr :: String -> IOStateArrow s b b unsetSysAttr n = configSysVar $ chgS theAttrList (delEntry n) -- | read an attribute value from global state getSysAttr :: String -> IOStateArrow s b String getSysAttr n = getSysVar theAttrList >>^ lookup1 n -- | read all attributes from global state getAllSysAttrs :: IOStateArrow s b Attributes getAllSysAttrs = getSysVar theAttrList setSysAttrString :: String -> String -> IOStateArrow s b b setSysAttrString n v = perform ( constA v >>> setSysAttr n ) -- | store an int value in global state setSysAttrInt :: String -> Int -> IOStateArrow s b b setSysAttrInt n v = setSysAttrString n (show v) -- | read an int value from global state -- -- > getSysAttrInt 0 myIntAttr getSysAttrInt :: Int -> String -> IOStateArrow s b Int getSysAttrInt def n = getSysAttr n >>^ toInt def toInt :: Int -> String -> Int toInt def s | not (null s) && all isDigit s = read s | otherwise = def -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/URIHandling.hs0000644000000000000000000002107312474566610021073 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.URIHandling Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the basic state arrows for URI handling -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.URIHandling where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Monad ( mzero , mplus ) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.TraceHandling import Data.Maybe import Network.URI ( URI , escapeURIChar , isUnescapedInURI , nonStrictRelativeTo , parseURIReference , uriAuthority , uriFragment , uriPath , uriPort , uriQuery , uriRegName , uriScheme , uriUserInfo ) import System.Directory ( getCurrentDirectory ) -- ------------------------------------------------------------ -- | set the base URI of a document, used e.g. for reading includes, e.g. external entities, -- the input must be an absolute URI setBaseURI :: IOStateArrow s String String setBaseURI = setSysVar theBaseURI >>> traceValue 2 (("setBaseURI: new base URI is " ++) . show) -- | read the base URI from the globale state getBaseURI :: IOStateArrow s b String getBaseURI = getSysVar theBaseURI >>> ( ( getDefaultBaseURI >>> setBaseURI >>> getBaseURI ) `when` isA null -- set and get it, if not yet done ) -- | change the base URI with a possibly relative URI, can be used for -- evaluating the xml:base attribute. Returns the new absolute base URI. -- Fails, if input is not parsable with parseURIReference -- -- see also: 'setBaseURI', 'mkAbsURI' changeBaseURI :: IOStateArrow s String String changeBaseURI = mkAbsURI >>> setBaseURI -- | set the default base URI, if parameter is null, the system base (@ file:\/\/\/\\/ @) is used, -- else the parameter, must be called before any document is read setDefaultBaseURI :: String -> IOStateArrow s b String setDefaultBaseURI base = ( if null base then arrIO getDir else constA base ) >>> setSysVar theDefaultBaseURI >>> traceValue 2 (("setDefaultBaseURI: new default base URI is " ++) . show) where getDir _ = do cwd <- getCurrentDirectory return ("file://" ++ normalize cwd ++ "/") -- under Windows getCurrentDirectory returns something like: "c:\path\to\file" -- backslaches are not allowed in URIs and paths must start with a / -- so this is transformed into "/c:/path/to/file" normalize wd'@(d : ':' : _) | d `elem` ['A'..'Z'] || d `elem` ['a'..'z'] = '/' : concatMap win32ToUriChar wd' normalize wd' = concatMap escapeNonUriChar wd' win32ToUriChar '\\' = "/" win32ToUriChar c = escapeNonUriChar c escapeNonUriChar c = escapeURIChar isUnescapedInURI c -- from Network.URI -- | get the default base URI getDefaultBaseURI :: IOStateArrow s b String getDefaultBaseURI = getSysVar theDefaultBaseURI -- read default uri in system state >>> ( ( setDefaultBaseURI "" -- set the default uri in system state >>> getDefaultBaseURI ) `when` isA null ) -- when uri not yet set -- ------------------------------------------------------------ -- | remember base uri, run an arrow and restore the base URI, used with external entity substitution runInLocalURIContext :: IOStateArrow s b c -> IOStateArrow s b c runInLocalURIContext f = localSysVar theBaseURI f -- ---------------------------------------------------------- -- | parse a URI reference, in case of a failure, -- try to escape unescaped chars, convert backslashes to slashes for windows paths, -- and try parsing again parseURIReference' :: String -> Maybe URI parseURIReference' uri = parseURIReference uri `mplus` ( if unesc then parseURIReference uri' else mzero ) where unesc = not . all isUnescapedInURI $ uri escape '\\' = "/" escape c = escapeURIChar isUnescapedInURI c uri' = concatMap escape uri -- | compute the absolut URI for a given URI and a base URI expandURIString :: String -> String -> Maybe String expandURIString uri base = do base' <- parseURIReference' base uri' <- parseURIReference' uri -- abs' <- nonStrictRelativeTo uri' base' let abs' = nonStrictRelativeTo uri' base' return $ show abs' -- | arrow variant of 'expandURIString', fails if 'expandURIString' returns Nothing expandURI :: ArrowXml a => a (String, String) String expandURI = arrL (maybeToList . uncurry expandURIString) -- | arrow for expanding an input URI into an absolute URI using global base URI, fails if input is not a legal URI mkAbsURI :: IOStateArrow s String String mkAbsURI = ( this &&& getBaseURI ) >>> expandURI -- | arrow for selecting the scheme (protocol) of the URI, fails if input is not a legal URI. -- -- See Network.URI for URI components getSchemeFromURI :: ArrowList a => a String String getSchemeFromURI = getPartFromURI scheme where scheme = init . uriScheme -- | arrow for selecting the registered name (host) of the URI, fails if input is not a legal URI getRegNameFromURI :: ArrowList a => a String String getRegNameFromURI = getPartFromURI host where host = maybe "" uriRegName . uriAuthority -- | arrow for selecting the port number of the URI without leading \':\', fails if input is not a legal URI getPortFromURI :: ArrowList a => a String String getPortFromURI = getPartFromURI port where port = dropWhile (==':') . maybe "" uriPort . uriAuthority -- | arrow for selecting the user info of the URI without trailing \'\@\', fails if input is not a legal URI getUserInfoFromURI :: ArrowList a => a String String getUserInfoFromURI = getPartFromURI ui where ui = reverse . dropWhile (=='@') . reverse . maybe "" uriUserInfo . uriAuthority -- | arrow for computing the path component of an URI, fails if input is not a legal URI getPathFromURI :: ArrowList a => a String String getPathFromURI = getPartFromURI uriPath -- | arrow for computing the query component of an URI, fails if input is not a legal URI getQueryFromURI :: ArrowList a => a String String getQueryFromURI = getPartFromURI uriQuery -- | arrow for computing the fragment component of an URI, fails if input is not a legal URI getFragmentFromURI :: ArrowList a => a String String getFragmentFromURI = getPartFromURI uriFragment -- | arrow for computing the path component of an URI, fails if input is not a legal URI getPartFromURI :: ArrowList a => (URI -> String) -> a String String getPartFromURI sel = arrL (maybeToList . getPart) where getPart s = do uri <- parseURIReference' s return (sel uri) -- ------------------------------------------------------------ hxt-9.3.1.18/src/Text/XML/HXT/Arrow/XmlState/SystemConfig.hs0000644000000000000000000002510513205353551021370 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.SystemConfig Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable system configuration and common options options -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.SystemConfig where import Control.Arrow import Data.Map ( insert ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------ -- config options -- | @withTrace level@ : system option, set the trace level, (0..4) withTrace :: Int -> SysConfig withTrace = setS theTraceLevel -- | @withSysAttr key value@ : store an arbitrary key value pair in system state withSysAttr :: String -> String -> SysConfig withSysAttr n v = chgS theAttrList (addEntry n v) -- | Specify the set of accepted mime types. -- -- All contents of documents for which the mime type is not found in this list -- are discarded. withAcceptedMimeTypes :: [String] -> SysConfig withAcceptedMimeTypes = setS theAcceptedMimeTypes -- | Specify a content handler for documents of a given mime type withMimeTypeHandler :: String -> IOSArrow XmlTree XmlTree -> SysConfig withMimeTypeHandler mt pa = chgS theMimeTypeHandlers $ insert mt pa -- | @withMimeTypeFile filename@ : input option, -- set the mime type table for @file:@ documents by given file. -- The format of this config file must be in the syntax of a debian linux \"mime.types\" config file withMimeTypeFile :: String -> SysConfig withMimeTypeFile = setS theMimeTypeFile -- | Force a given mime type for all file contents. -- -- The mime type for file access will then not be computed by looking into a mime.types file withFileMimeType :: String -> SysConfig withFileMimeType = setS theFileMimeType -- | @withWarnings yes/no@ : system option, issue warnings during reading, HTML parsing and processing, -- default is 'yes' withWarnings :: Bool -> SysConfig withWarnings = setS theWarnings -- | @withErrors yes/no@ : system option for suppressing error messages, default is 'no' withErrors :: Bool -> SysConfig withErrors b = setS theErrorMsgHandler h where h | b = errorOutputToStderr | otherwise = const $ return () -- | @withRemoveWS yes/no@ : read and write option, remove all whitespace, used for document indentation, default is 'no' withRemoveWS :: Bool -> SysConfig withRemoveWS = setS theRemoveWS -- | @withPreserveComment yes/no@ : read option, preserve comments during canonicalization, default is 'no' withPreserveComment :: Bool -> SysConfig withPreserveComment = setS thePreserveComment -- | @withParseByMimeType yes/no@ : read option, select the parser by the mime type of the document -- (pulled out of the HTTP header). -- -- When the mime type is set to \"text\/html\" -- the configured HTML parser is taken, when it\'s set to -- \"text\/xml\" or \"text\/xhtml\" the configured XML parser is taken. -- If the mime type is something else, no further processing is performed, -- the contents is given back to the application in form of a single text node. -- If the default document encoding is set to isoLatin1, this even enables processing -- of arbitray binary data. withParseByMimeType :: Bool -> SysConfig withParseByMimeType = setS theParseByMimeType -- | @withParseHTML yes/no@: read option, use HTML parser, default is 'no' (use XML parser) withParseHTML :: Bool -> SysConfig withParseHTML = setS theParseHTML -- | @withValidate yes/no@: read option, validate document against DTD, default is 'yes' withValidate :: Bool -> SysConfig withValidate = setS theValidate -- | @withSubstDTDEntities yes/no@: read option, substitute general entities defined in DTD, default is 'yes'. -- switching this option and the validate option off can lead to faster parsing, because then -- there is no need to access the DTD withSubstDTDEntities :: Bool -> SysConfig withSubstDTDEntities = setS theSubstDTDEntities -- | @withSubstHTMLEntities yes/no@: read option, substitute general entities defined in HTML DTD, default is 'no'. -- switching this option on and the substDTDEntities and validate options off can lead to faster parsing -- because there is no need to access a DTD, but still the HTML general entities are substituted withSubstHTMLEntities :: Bool -> SysConfig withSubstHTMLEntities = setS theSubstHTMLEntities -- | @withCheckNamespaces yes/no@: read option, check namespaces, default is 'no' withCheckNamespaces :: Bool -> SysConfig withCheckNamespaces = setS theCheckNamespaces -- | @withCanonicalize yes/no@ : read option, canonicalize document, default is 'yes' withCanonicalize :: Bool -> SysConfig withCanonicalize = setS theCanonicalize -- | @withIgnoreNoneXmlContents yes\/no@ : input option, ignore document contents of none XML\/HTML documents. -- -- This option can be useful for implementing crawler like applications, e.g. an URL checker. -- In those cases net traffic can be reduced. withIgnoreNoneXmlContents :: Bool -> SysConfig withIgnoreNoneXmlContents = setS theIgnoreNoneXmlContents -- ------------------------------------------------------------ -- | @withStrictInput yes/no@ : input option, input of file and HTTP contents is read eagerly, default is 'no' withStrictInput :: Bool -> SysConfig withStrictInput = setS theStrictInput -- | @withEncodingErrors yes/no@ : input option, ignore all encoding errors, default is 'no' withEncodingErrors :: Bool -> SysConfig withEncodingErrors = setS theEncodingErrors -- | @withInputEncoding encodingName@ : input option -- -- Set default document encoding ('utf8', 'isoLatin1', 'usAscii', 'iso8859_2', ... , 'iso8859_16', ...). -- Only XML, HTML and text documents are decoded, -- default decoding for XML\/HTML is utf8, for text iso latin1 (no decoding). withInputEncoding :: String -> SysConfig withInputEncoding = setS theInputEncoding -- | @withDefaultBaseURI URI@ , input option, set the default base URI -- -- This option can be useful when parsing documents from stdin or contained in a string, and interpreting -- relative URIs within the document withDefaultBaseURI :: String -> SysConfig withDefaultBaseURI = setS theDefaultBaseURI withInputOption :: String -> String -> SysConfig withInputOption n v = chgS theInputOptions (addEntry n v) withInputOptions :: Attributes -> SysConfig withInputOptions = foldr (>>>) id . map (uncurry withInputOption) -- | @withRedirect yes/no@ : input option, automatically follow redirected URIs, default is 'yes' withRedirect :: Bool -> SysConfig withRedirect = setS theRedirect -- | @withProxy \"host:port\"@ : input option, configure a proxy for HTTP access, e.g. www-cache:3128 withProxy :: String -> SysConfig withProxy = setS theProxy -- ------------------------------------------------------------ -- | @withIndent yes/no@ : output option, indent document before output, default is 'no' withIndent :: Bool -> SysConfig withIndent = setS theIndent -- | @withOutputEncoding encoding@ , output option, -- default is the default input encoding or utf8, if input encoding is not set withOutputEncoding :: String -> SysConfig withOutputEncoding = setS theOutputEncoding -- | @withOutputXML@ : output option, default writing -- -- Default is writing XML: quote special XML chars \>,\<,\",\',& where neccessary, -- add XML processing instruction -- and encode document with respect to 'withOutputEncoding' withOutputXML :: SysConfig withOutputXML = setS theOutputFmt XMLoutput -- | Write XHTML: quote all special XML chars, use HTML entity refs or char refs for none ASCII chars withOutputHTML :: SysConfig withOutputHTML = setS theOutputFmt HTMLoutput -- | Write XML: quote only special XML chars, don't substitute chars by HTML entities, -- and don\'t generate empty elements for HTML elements, -- which may contain any contents, e.g. @@ instead of @