hxt-9.3.1.1/0000755000000000000000000000000012036750467010666 5ustar0000000000000000hxt-9.3.1.1/LICENSE0000644000000000000000000000212012036750467011666 0ustar0000000000000000The MIT License Copyright (c) 2005 Uwe Schmidt, Martin Schmidt, Torben Kuseler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hxt-9.3.1.1/Setup.lhs0000644000000000000000000000015712036750467012501 0ustar0000000000000000#!/usr/bin/runhaskell > module Main where > import Distribution.Simple > main :: IO () > main = defaultMain hxt-9.3.1.1/hxt.cabal0000644000000000000000000001707112036750467012463 0ustar0000000000000000-- arch-tag: Haskell XML Toolbox main description file Name: hxt Version: 9.3.1.1 Synopsis: A collection of tools for processing XML with Haskell. Description: The Haskell XML Toolbox bases on the ideas of HaXml and HXML, but introduces a more general approach for processing XML with Haskell. The Haskell XML Toolbox uses a generic data model for representing XML documents, including the DTD subset and the document subset, in Haskell. It contains a validating XML parser, a HTML parser, namespace support, an XPath expression evaluator, an XSLT library, a RelaxNG schema validator and funtions for serialization and deserialization of user defined data. The library makes extensive use of the arrow approach for processing XML. Since version 9 the toolbox is partitioned into various (sub-)packages. This package contains the core functionality, hxt-curl, hxt-tagsoup, hxt-relaxng, hxt-xpath, hxt-xslt, hxt-regex-xmlschema contain the extensions. hxt-unicode contains encoding and decoding functions, hxt-charproperties char properties for unicode and XML. . Changes from 9.3.1.0: in readString all input decoding switched off . Changes from 9.3.0.1: lower bound for network set to be >= 2.4 . Changes from 9.3.0: upper bound for network set to be < 2.4 (URI signatures changed in 2.4) . Changes from 9.2.2: XMLSchema validation integrated . Changes from 9.2.1: user defined mime type handlers added . Changes from 9.2.0: New warnings from ghc-7.4 removed License: MIT License-file: LICENSE Author: Uwe Schmidt, Martin Schmidt, Torben Kuseler Maintainer: Uwe Schmidt Stability: Stable Category: XML Homepage: http://www.fh-wedel.de/~si/HXmlToolbox/index.html Copyright: Copyright (c) 2005-2012 Uwe Schmidt Build-type: Simple Cabal-version: >=1.8 extra-source-files: examples/arrows/absurls/AbsURIs.hs examples/arrows/absurls/lousy.html examples/arrows/absurls/Makefile examples/arrows/absurls/ProcessDocument.hs examples/arrows/AGentleIntroductionToHXT/.ghci examples/arrows/AGentleIntroductionToHXT/Makefile examples/arrows/AGentleIntroductionToHXT/PicklerExample/Baseball.hs examples/arrows/AGentleIntroductionToHXT/PicklerExample/Makefile examples/arrows/AGentleIntroductionToHXT/PicklerExample/new-simple2.xml examples/arrows/AGentleIntroductionToHXT/PicklerExample/simple2.xml examples/arrows/AGentleIntroductionToHXT/SimpleExamples.hs examples/arrows/dtd2hxt/DTDtoHXT.hs examples/arrows/dtd2hxt/.ghci examples/arrows/dtd2hxt/Makefile examples/arrows/HelloWorld/bye.xml examples/arrows/HelloWorld/HelloWorld.hs examples/arrows/HelloWorld/hello.xml examples/arrows/HelloWorld/Makefile examples/arrows/HelloWorld/Mini.hs examples/arrows/hparser/emptyElements.html examples/arrows/hparser/example1.xml examples/arrows/hparser/example1CRLF.xml examples/arrows/hparser/HXmlParser.hs examples/arrows/hparser/invalid1.xml examples/arrows/hparser/invalid2.rng examples/arrows/hparser/invalid3.rng examples/arrows/hparser/invalid.xml examples/arrows/hparser/lousy.html examples/arrows/hparser/Makefile examples/arrows/hparser/namespace0.xml examples/arrows/hparser/namespace1.xml examples/arrows/hparser/valid1.rng examples/arrows/hparser/valid1.xml examples/arrows/performance/GenDoc.hs examples/arrows/performance/Makefile examples/arrows/pickle/Makefile examples/arrows/pickle/PickleTest.hs examples/xhtml/tmp.xml examples/xhtml/xhtml1-frameset.dtd examples/xhtml/xhtml1-strict.dtd examples/xhtml/xhtml1-transitional.dtd examples/xhtml/xhtml-lat1.ent examples/xhtml/xhtml-special.ent examples/xhtml/xhtml-symbol.ent examples/xhtml/xhtml.xml library exposed-modules: Control.Arrow.ArrowExc, Control.Arrow.ArrowIO, Control.Arrow.ArrowIf, Control.Arrow.ArrowList, Control.Arrow.ArrowNF, Control.Arrow.ArrowNavigatableTree, Control.Arrow.ArrowState, Control.Arrow.ArrowTree, Control.Arrow.IOListArrow, Control.Arrow.IOStateListArrow, Control.Arrow.ListArrow, Control.Arrow.ListArrows, Control.Arrow.NTreeEdit, Control.Arrow.StateListArrow, Control.FlatSeq, Data.AssocList, Data.Atom, Data.Function.Selector, Data.Tree.Class, Data.Tree.NTree.TypeDefs, Data.Tree.NTree.Edit, Data.Tree.NTree.Zipper.TypeDefs, Data.Tree.NavigatableTree.Class, Data.Tree.NavigatableTree.XPathAxis, Text.XML.HXT.Arrow.Binary, Text.XML.HXT.Arrow.DTDProcessing, Text.XML.HXT.Arrow.DocumentInput, Text.XML.HXT.Arrow.DocumentOutput, Text.XML.HXT.Arrow.Edit, Text.XML.HXT.Arrow.GeneralEntitySubstitution, Text.XML.HXT.Arrow.Namespace, Text.XML.HXT.Arrow.ParserInterface, Text.XML.HXT.Arrow.Pickle, Text.XML.HXT.Arrow.Pickle.DTD, Text.XML.HXT.Arrow.Pickle.Schema, Text.XML.HXT.Arrow.Pickle.Xml, Text.XML.HXT.Arrow.ProcessDocument, Text.XML.HXT.Arrow.ReadDocument, Text.XML.HXT.Arrow.WriteDocument, Text.XML.HXT.Arrow.XmlArrow, Text.XML.HXT.Arrow.XmlOptions, Text.XML.HXT.Arrow.XmlRegex, Text.XML.HXT.Arrow.XmlState, Text.XML.HXT.Arrow.XmlState.ErrorHandling, Text.XML.HXT.Arrow.XmlState.MimeTypeTable, Text.XML.HXT.Arrow.XmlState.RunIOStateArrow, Text.XML.HXT.Arrow.XmlState.TraceHandling, Text.XML.HXT.Arrow.XmlState.TypeDefs, Text.XML.HXT.Arrow.XmlState.URIHandling, Text.XML.HXT.Arrow.XmlState.SystemConfig, Text.XML.HXT.Core, Text.XML.HXT.DOM.FormatXmlTree, Text.XML.HXT.DOM.Interface, Text.XML.HXT.DOM.MimeTypeDefaults, Text.XML.HXT.DOM.MimeTypes, Text.XML.HXT.DOM.QualifiedName, Text.XML.HXT.DOM.ShowXml, Text.XML.HXT.DOM.TypeDefs, Text.XML.HXT.DOM.Util, Text.XML.HXT.DOM.XmlKeywords, Text.XML.HXT.DOM.XmlNode, Text.XML.HXT.DTDValidation.AttributeValueValidation, Text.XML.HXT.DTDValidation.DTDValidation, Text.XML.HXT.DTDValidation.DocTransformation, Text.XML.HXT.DTDValidation.DocValidation, Text.XML.HXT.DTDValidation.IdValidation, Text.XML.HXT.DTDValidation.RE, Text.XML.HXT.DTDValidation.TypeDefs, Text.XML.HXT.DTDValidation.Validation, Text.XML.HXT.DTDValidation.XmlRE, Text.XML.HXT.IO.GetFILE, Text.XML.HXT.Parser.HtmlParsec, Text.XML.HXT.Parser.ProtocolHandlerUtil, Text.XML.HXT.Parser.XhtmlEntities, Text.XML.HXT.Parser.XmlCharParser, Text.XML.HXT.Parser.XmlDTDParser, Text.XML.HXT.Parser.XmlDTDTokenParser, Text.XML.HXT.Parser.XmlEntities, Text.XML.HXT.Parser.XmlParsec, Text.XML.HXT.Parser.XmlTokenParser, Text.XML.HXT.XMLSchema.DataTypeLibW3CNames, Text.XML.HXT.Version hs-source-dirs: src ghc-options: -Wall ghc-prof-options: -auto-all -caf-all extensions: MultiParamTypeClasses DeriveDataTypeable FunctionalDependencies FlexibleInstances build-depends: base >= 4 && < 5, containers >= 0.2 && < 1, directory >= 1 && < 2, filepath >= 1 && < 2, parsec >= 2.1 && < 4, HUnit >= 1.2 && < 2, mtl >= 2 && < 3, network >= 2.4 && < 3, deepseq >= 1.1 && < 2, bytestring >= 0.9 && < 1, binary >= 0.5 && < 1, hxt-charproperties >= 9.1 && < 10, hxt-unicode >= 9.0.1 && < 10, hxt-regex-xmlschema >= 9 && < 10 Source-Repository head Type: git Location: git://github.com/UweSchmidt/hxt.git hxt-9.3.1.1/src/0000755000000000000000000000000012036750467011455 5ustar0000000000000000hxt-9.3.1.1/src/Control/0000755000000000000000000000000012036750467013075 5ustar0000000000000000hxt-9.3.1.1/src/Control/FlatSeq.hs0000644000000000000000000000502512036750467014772 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.FlatSeq Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Force evaluation like deepseq in Control.DeepSeq, but control the depth of evaluation. flatseq may evaluate more than seq but less than deepseq -} -- ------------------------------------------------------------ module Control.FlatSeq where import Data.Word -- ------------------------------------------------------------ infixr 0 $!! ($!!) :: WNFData a => (a -> b) -> a -> b f $!! x = rwnf x `seq` f x {-# INLINE ($!!) #-} flatseq :: WNFData a => a -> b -> b flatseq a b = rwnf a `seq` b {-# INLINE flatseq #-} rlnf :: (a -> ()) -> [a] -> () rlnf _ [] = () rlnf r (x:xs) = r x `seq` rlnf r xs {-# INLINE rlnf #-} -- | A class of types that can be partially evaluated, but evaluation can be propagated deeper than WHNF class WNFData a where -- | Default for rwnf is reduction to WHNF rwnf :: a -> () rwnf a = a `seq` () {-# INLINE rwnf #-} -- | Default for rwnf2 is rwnf rwnf2 :: a -> () rwnf2 = rwnf {-# INLINE rwnf2 #-} instance WNFData Int instance WNFData Integer instance WNFData Float instance WNFData Double instance WNFData Char instance WNFData Bool instance WNFData () instance WNFData Word instance WNFData Word8 instance WNFData Word16 instance WNFData Word32 instance WNFData Word64 instance WNFData a => WNFData [a] where rwnf [] = () rwnf (x:xs) = x `seq` rwnf xs {-# INLINE rwnf #-} instance (WNFData a, WNFData b) => WNFData (a,b) where rwnf (x,y) = rwnf x `seq` rwnf y {-# INLINE rwnf #-} instance (WNFData a, WNFData b, WNFData c) => WNFData (a,b,c) where rwnf (x,y,z) = rwnf x `seq` rwnf y `seq` rwnf z {-# INLINE rwnf #-} instance (WNFData a, WNFData b, WNFData c, WNFData d) => WNFData (a,b,c,d) where rwnf (x1,x2,x3,x4) = rwnf x1 `seq` rwnf x2 `seq` rwnf x3 `seq` rwnf x4 {-# INLINE rwnf #-} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/0000755000000000000000000000000012036750467014167 5ustar0000000000000000hxt-9.3.1.1/src/Control/Arrow/ListArrows.hs0000644000000000000000000000321012036750467016630 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ListArrows Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Module for importing all list arrows -} -- ------------------------------------------------------------ module Control.Arrow.ListArrows ( module Control.Arrow -- arrow classes , module Control.Arrow.ArrowExc , module Control.Arrow.ArrowIf , module Control.Arrow.ArrowIO , module Control.Arrow.ArrowList , module Control.Arrow.ArrowNavigatableTree , module Control.Arrow.ArrowNF , module Control.Arrow.ArrowState , module Control.Arrow.ArrowTree , module Control.Arrow.ListArrow -- arrow types , module Control.Arrow.StateListArrow , module Control.Arrow.IOListArrow , module Control.Arrow.IOStateListArrow , module Control.Arrow.NTreeEdit -- extra arrows ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowExc import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowNavigatableTree import Control.Arrow.ArrowNF import Control.Arrow.ArrowState import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import Control.Arrow.ListArrow -- arrow types import Control.Arrow.StateListArrow import Control.Arrow.IOListArrow import Control.Arrow.IOStateListArrow import Control.Arrow.NTreeEdit -- extra arrows -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowState.hs0000644000000000000000000000456612036750467016631 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies #-} -- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowState Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: multi parameter classes and functional depenedencies required Arrows for managing an explicit state State arrows work similar to state monads. A state value is threaded through the application of arrows. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowState ( ArrowState(..) ) where import Control.Arrow -- | The interface for accessing and changing the state component. -- -- Multi parameter classes and functional dependencies are required. class Arrow a => ArrowState s a | a -> s where -- | change the state of a state arrow by applying a function -- for computing a new state from the old and the arrow input. -- Result is the arrow input changeState :: (s -> b -> s) -> a b b -- | access the state with a function using the arrow input -- as data for selecting state components. accessState :: (s -> b -> c) -> a b c -- | read the complete state, ignore arrow input -- -- definition: @ getState = accessState (\\ s x -> s) @ getState :: a b s getState = accessState (\ s _x -> s) {-# INLINE getState #-} -- | overwrite the old state -- -- definition: @ setState = changeState (\\ s x -> x) @ setState :: a s s setState = changeState (\ _s x -> x) -- changeState (const id) {-# INLINE setState #-} -- | change state (and ignore input) and return new state -- -- convenience function, -- usefull for generating e.g. unique identifiers: -- -- example with SLA state list arrows -- -- > newId :: SLA Int b String -- > newId = nextState (+1) -- > >>> -- > arr (('#':) . show) -- > -- > runSLA 0 (newId <+> newId <+> newId) undefined -- > = ["#1", "#2", "#3"] nextState :: (s -> s) -> a b s nextState sf = changeState (\s -> const (sf s)) >>> getState -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/NTreeEdit.hs0000644000000000000000000000245312036750467016352 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.NTreeEdit Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable arrows for efficient editing of rose trees -} -- ------------------------------------------------------------ module Control.Arrow.NTreeEdit where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ListArrow import Data.Maybe import Data.Tree.NTree.TypeDefs import Data.Tree.NTree.Edit -- ------------------------------------------------------------ -- | Edit parts of a rose tree -- -- The subtrees to be modified are selected by the first part of the IfThen pairs -- The modification by the second part editNTreeA :: [IfThen (LA (NTree b) c) (LA (NTree b) (NTree b))] -> LA (NTree b) (NTree b) editNTreeA cs = arrL $ editNTreeBottomUp ef where ef = listToMaybe . (runLA . foldr (\ (g :-> h) -> ifA g (listA h)) none $ cs) fmapNTreeA :: (b -> Maybe b) -> LA (NTree b) (NTree b) fmapNTreeA f = arr $ mapNTree' f -- eof ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowList.hs0000644000000000000000000002775112036750467016465 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowList Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable The list arrow class This module defines the interface for list arrows. A list arrow is a function, that gives a list of results for a given argument. A single element result represents a normal function. An empty list oven indicates, the function is undefined for the given argument. The empty list may also represent False, none empty lists True. A list with more than one element gives all results for a nondeterministic function. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowList ( ArrowList(..) ) where import Control.Arrow infixl 8 >>., >. infixl 2 $<, $<<, $<<<, $<<<< infixl 2 $<$ -- ------------------------------------------------------------ -- | The interface for list arrows -- -- Only 'mkA', 'isA' '(>>.)' don't have default implementations class (Arrow a, ArrowPlus a, ArrowZero a, ArrowApply a) => ArrowList a where -- | construction of a 2 argument arrow from a binary function -- | -- | example: @ a1 &&& a2 >>> arr2 f @ arr2 :: (b1 -> b2 -> c) -> a (b1, b2) c arr2 = arr . uncurry {-# INLINE arr2 #-} -- | construction of a 3 argument arrow from a 3-ary function -- | -- | example: @ a1 &&& a2 &&& a3 >>> arr3 f @ arr3 :: (b1 -> b2 -> b3 -> c) -> a (b1, (b2, b3)) c arr3 f = arr (\ ~(x1, ~(x2, x3)) -> f x1 x2 x3) {-# INLINE arr3 #-} -- | construction of a 4 argument arrow from a 4-ary function -- | -- | example: @ a1 &&& a2 &&& a3 &&& a4 >>> arr4 f @ arr4 :: (b1 -> b2 -> b3 -> b4 -> c) -> a (b1, (b2, (b3, b4))) c arr4 f = arr (\ ~(x1, ~(x2, ~(x3, x4))) -> f x1 x2 x3 x4) {-# INLINE arr4 #-} -- | construction of a 2 argument arrow from a singe argument arrow arr2A :: (b -> a c d) -> a (b, c) d arr2A f = first (arr f) >>> app {-# INLINE arr2A #-} -- | constructor for a list arrow from a function with a list as result arrL :: (b -> [c]) -> a b c -- | constructor for a list arrow with 2 arguments arr2L :: (b -> c -> [d]) -> a (b, c) d arr2L = arrL . uncurry {-# INLINE arr2L #-} -- | constructor for a const arrow: @ constA = arr . const @ constA :: c -> a b c constA = arr . const {-# INLINE constA #-} -- | constructor for a const arrow: @ constL = arrL . const @ constL :: [c] -> a b c constL = arrL . const {-# INLINE constL #-} -- | builds an arrow from a predicate. -- If the predicate holds, the single list containing the input is returned, else the empty list isA :: (b -> Bool) -> a b b -- | combinator for converting the result of a list arrow into another list -- -- example: @ foo >>. reverse @ reverses the the result of foo -- -- example: @ foo >>. take 1 @ constructs a deterministic version of foo by deleting all further results (>>.) :: a b c -> ([c] -> [d]) -> a b d -- | combinator for converting the result of an arrow into a single element result (>.) :: a b c -> ([c] -> d ) -> a b d af >. f = af >>. ((:[]) . f) {-# INLINE (>.) #-} -- | combinator for converting an arrow into a determinstic version with all results collected in a single element list -- -- @ listA af = af >>. (:[]) @ -- -- this is useful when the list of results computed by an arrow must be manipulated (e.g. sorted) -- -- example for sorting the results of a filter -- -- > collectAndSort :: a b c -> a b c -- > -- > collectAndSort collect = listA collect >>> arrL sort listA :: a b c -> a b [c] listA af = af >>. (:[]) {-# INLINE listA #-} -- | the inverse of 'listA' -- -- @ listA af >>> unlistA = af @ -- -- unlistA is defined as @ arrL id @ unlistA :: a [b] b unlistA = arrL id {-# INLINE unlistA #-} -- | the identity arrow, alias for returnA this :: a b b this = returnA {-# INLINE this #-} -- | the zero arrow, alias for zeroArrow none :: a b c none = zeroArrow {-# INLINE none #-} -- | converts an arrow, that may fail, into an arrow that always succeeds -- -- example: @ withDefault none \"abc\" @ is equivalent to @ constA \"abc\" @ withDefault :: a b c -> c -> a b c withDefault a d = a >>. \ x -> if null x then [d] else x {-# INLINE withDefault #-} -- | makes a list arrow deterministic, the number of results is at most 1 -- -- definition -- -- > single f = f >>. take 1 -- -- examples with strings: -- -- > runLA ( single none ) "x" == [] -- > runLA ( single this ) "x" == ["x"] -- > runLA ( single -- > (constA "y" -- > <+> this ) ) "x" == ["y"] single :: a b c -> a b c single f = f >>. take 1 -- | compute an arrow from the input and apply the arrow to this input -- -- definition: @ (f &&& this) >>> app @ -- -- in a point free style, there is no way to use an argument in 2 places, -- this is a combinator for simulating this. first the argument is used to compute an arrow, -- then this new arrow is applied to the input -- -- applyA coresponds to: @ apply f x = let g = f x in g x @ -- -- see also: '$<', '$<<', '$<<<', '$<<<<', '$<$' applyA :: a b (a b c) -> a b c applyA f = (f &&& this) >>> app -- | compute the parameter for an arrow with extra parameters from the input -- and apply the arrow for all parameter values to the input -- -- a kind of \"function call\" for arrows, useful for joining arrows -- -- > infixl 2 ($<) -- -- definition: -- -- > g $< f = applyA (f >>> arr g) -- -- if @f@ fails, the whole arrow fails, e.g. @ g \$\< none == none @ -- -- if @f@ computes n values and @g@ is deterministic, the whole arrow computes n values -- -- examples with simple list arrows with strings -- -- > prefixString :: String -> a String String -- > prefixString s = arr (s++) -- > -- > runLA ( prefixString $< none ) "x" == [] -- > runLA ( prefixString $< constA "y" ) "x" == ["yx"] -- > runLA ( prefixString $< this ) "x" == ["xx"] -- > runLA ( prefixString $< constA "y" -- > <+> constA "z" ) "x" == ["yx","zx"] -- > runLA ( prefixString $< constA "y" -- > <+> this -- > <+> constA "z" ) "x" == ["yx","xx","zx"] -- -- see also: 'applyA', '$<<', '$<<<', '$<<<<', '$<$' ($<) :: (c -> a b d) -> a b c -> a b d g $< f = applyA (f >>> arr g) -- | binary version of '$<' -- -- example with simple list arrows with strings -- -- > infixString :: String -> String -> a String String -- > infixString s1 s2 -- > = arr (\ s -> s1 ++ s ++ s2) -- > -- > runLA ( infixString $<< constA "y" &&& constA "z" ) "x" = ["yxz"] -- > runLA ( infixString $<< this &&& this ) "x" = ["xxx"] -- > runLA ( infixString $<< constA "y" -- > &&& (constA "z" <+> this) ) "x" = ["yxz", "yxx"] ($<<) :: (c1 -> c2 -> a b d) -> a b (c1, c2) -> a b d f $<< g = applyA (g >>> arr2 f) -- | version of '$<' for arrows with 3 extra parameters -- -- typical usage -- -- > f $<<< g1 &&& g2 &&& g3 ($<<<) :: (c1 -> c2 -> c3 -> a b d) -> a b (c1, (c2, c3)) -> a b d f $<<< g = applyA (g >>> arr3 f) -- | version of '$<' for arrows with 4 extra parameters -- -- typical usage -- -- > f $<<<< g1 &&& g2 &&& g3 &&& g4 ($<<<<) :: (c1 -> c2 -> c3 -> c4 -> a b d) -> a b (c1, (c2, (c3, c4))) -> a b d f $<<<< g = applyA (g >>> arr4 f) -- | compute the parameter for an arrow @f@ with an extra parameter by an arrow @g@ -- and apply all the results from @g@ sequentially to the input -- -- > infixl 2 ($<$) -- -- typical usage: -- -- > g :: a b c -- > g = ... -- > -- > f :: c -> a b b -- > f x = ... x ... -- > -- > f $<$ g -- -- @f@ computes the extra parameters for @g@ from the input of type @b@ and @g@ is applied with this -- parameter to the input. This allows programming in a point wise style in @g@, which becomes -- neccessary, when a value is needed more than once. -- -- this combinator is useful, when transforming a single value (document) step by step, -- with @g@ for collecting the data for all steps, and @f@ for transforming the input step by step -- -- if @g@ is deterministic (computes exactly one result), -- @ g $\<$ f == g $\< f @ holds -- -- if @g@ fails, @ f $<$ g == this @ -- -- if @g@ computes more than one result, @f@ is applied sequentially to the input for every result from @g@ -- -- examples with simple list arrows with strings -- -- > prefixString :: String -> a String String -- > prefixString s = arr (s++) -- > -- > runLA ( prefixString $<$ none ) "x" == ["x"] -- > runLA ( prefixString $<$ constA "y" ) "x" == ["yx"] -- > runLA ( prefixString $<$ constA "y" <+> constA "z" ) "x" == ["zyx"] -- > runLA ( prefixString $<$ constA "y" <+> this -- > <+> constA "z" ) "x" == ["zxyx"] -- -- example with two extra parameter -- -- > g1 :: a b c1 -- > g2 :: a b c2 -- > -- > f :: (c1, c2) -> a b b -- > f (x1, x2) = ... x1 ... x2 ... -- > -- > f $<$ g1 &&& g2 -- -- see also: 'applyA', '$<' ($<$) :: (c -> (a b b)) -> a b c -> a b b g $<$ f = applyA (listA (f >>> arr g) >>> arr seqA) -- | merge the result pairs of an arrow with type @a a1 (b1, b2)@ -- by combining the tuple components with the @op@ arrow -- -- examples with simple list arrows working on strings and XmlTrees -- -- > a1 :: a String (XmlTree, XmlTree) -- > a1 = selem "foo" [this >>> mkText] -- > &&& -- > selem "bar" [arr (++"0") >>> mkText] -- > -- > runLA (a1 >>> mergeA (<+>) >>> xshow this) "42" == ["42","420"] -- > runLA (a1 >>> mergeA (+=) >>> xshow this) "42" == ["42420"] -- -- see also: 'applyA', '$<' and '+=' in class 'Text.XML.HXT.Arrow.ArrowXml' mergeA :: (a (a1, b1) a1 -> a (a1, b1) b1 -> a (a1, b1) c) -> a (a1, b1) c mergeA op = (\ x -> arr fst `op` constA (snd x)) $< this -- | useful only for arrows with side effects: perform applies an arrow to the input -- ignores the result and returns the input -- -- example: @ ... >>> perform someTraceArrow >>> ... @ perform :: a b c -> a b b perform f = listA f &&& this >>> arr snd {-# INLINE perform #-} -- | generalization of arrow combinator '<+>' -- -- definition: @ catA = foldl (\<+\>) none @ catA :: [a b c] -> a b c catA = foldl (<+>) none {-# INLINE catA #-} -- | generalization of arrow combinator '>>>' -- -- definition: @ seqA = foldl (>>>) this @ seqA :: [a b b] -> a b b seqA = foldl (>>>) this {-# INLINE seqA #-} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/IOStateListArrow.hs0000644000000000000000000002160112036750467017702 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} -- ------------------------------------------------------------ {- | Module : Control.Arrow.IOStateListArrow Copyright : Copyright (C) 2005-8 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of arrows with IO and a state -} -- ------------------------------------------------------------ module Control.Arrow.IOStateListArrow ( IOSLA(..) , liftSt , runSt ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.Arrow.ArrowState import Control.DeepSeq import Control.Exception ( SomeException , try ) {- import qualified Debug.Trace as T -} -- ------------------------------------------------------------ -- | list arrow combined with a state and the IO monad newtype IOSLA s a b = IOSLA { runIOSLA :: s -> a -> IO (s, [b]) } instance Category (IOSLA s) where id = IOSLA $ \ s x -> return (s, [x]) -- don't defined id = arr id, this gives loops during optimization {-# INLINE id #-} IOSLA g . IOSLA f = IOSLA $ \ s x -> do (s1, ys) <- f s x sequence' s1 ys where sequence' s' [] = return (s', []) sequence' s' (x':xs') = do (s1', ys') <- g s' x' (s2', zs') <- sequence' s1' xs' return (s2', ys' ++ zs') instance Arrow (IOSLA s) where arr f = IOSLA $ \ s x -> return (s, [f x]) {-# INLINE arr #-} first (IOSLA f) = IOSLA $ \ s (x1, x2) -> do (s', ys1) <- f s x1 return (s', [ (y1, x2) | y1 <- ys1 ]) -- just for efficiency second (IOSLA g) = IOSLA $ \ s (x1, x2) -> do (s', ys2) <- g s x2 return (s', [ (x1, y2) | y2 <- ys2 ]) -- just for efficiency IOSLA f *** IOSLA g = IOSLA $ \ s (x1, x2) -> do (s1, ys1) <- f s x1 (s2, ys2) <- g s1 x2 return (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) -- just for efficiency IOSLA f &&& IOSLA g = IOSLA $ \ s x -> do (s1, ys1) <- f s x (s2, ys2) <- g s1 x return (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) instance ArrowZero (IOSLA s) where zeroArrow = IOSLA $ \ s -> const (return (s, [])) {-# INLINE zeroArrow #-} instance ArrowPlus (IOSLA s) where IOSLA f <+> IOSLA g = IOSLA $ \ s x -> do (s1, rs1) <- f s x (s2, rs2) <- g s1 x return (s2, rs1 ++ rs2) instance ArrowChoice (IOSLA s) where left (IOSLA f) = IOSLA $ \ s -> either (\ x -> do (s1, y) <- f s x return (s1, map Left y) ) (\ x -> return (s, [Right x])) right (IOSLA f) = IOSLA $ \ s -> either (\ x -> return (s, [Left x])) (\ x -> do (s1, y) <- f s x return (s1, map Right y) ) instance ArrowApply (IOSLA s) where app = IOSLA $ \ s (IOSLA f, x) -> f s x {-# INLINE app #-} instance ArrowList (IOSLA s) where arrL f = IOSLA $ \ s x -> return (s, (f x)) {-# INLINE arrL #-} arr2A f = IOSLA $ \ s (x, y) -> runIOSLA (f x) s y {-# INLINE arr2A #-} constA c = IOSLA $ \ s -> const (return (s, [c])) {-# INLINE constA #-} isA p = IOSLA $ \ s x -> return (s, if p x then [x] else []) {-# INLINE isA #-} IOSLA f >>. g = IOSLA $ \ s x -> do (s1, ys) <- f s x return (s1, g ys) {-# INLINE (>>.) #-} -- just for efficency perform (IOSLA f) = IOSLA $ \ s x -> do (s1, _ys) <- f s x return (s1, [x]) {-# INLINE perform #-} instance ArrowIf (IOSLA s) where ifA (IOSLA p) ta ea = IOSLA $ \ s x -> do (s1, res) <- p s x runIOSLA ( if null res then ea else ta ) s1 x (IOSLA f) `orElse` g = IOSLA $ \ s x -> do r@(s1, res) <- f s x if null res then runIOSLA g s1 x else return r instance ArrowIO (IOSLA s) where arrIO cmd = IOSLA $ \ s x -> do res <- cmd x return (s, [res]) {-# INLINE arrIO #-} instance ArrowExc (IOSLA s) where tryA f = IOSLA $ \ s x -> do res <- try' $ runIOSLA f s x return $ case res of Left er -> (s, [Left er]) Right (s1, ys) -> (s1, [Right x' | x' <- ys]) where try' :: IO a -> IO (Either SomeException a) try' = try instance ArrowIOIf (IOSLA s) where isIOA p = IOSLA $ \ s x -> do res <- p x return (s, if res then [x] else []) {-# INLINE isIOA #-} instance ArrowState s (IOSLA s) where changeState cf = IOSLA $ \ s x -> let s' = cf s x in return (seq s' s', [x]) {-# INLINE changeState #-} accessState af = IOSLA $ \ s x -> return (s, [af s x]) {-# INLINE accessState #-} -- ------------------------------------------------------------ -- | -- lift the state of an IOSLA arrow to a state with an additional component. -- -- This is uesful, when running predefined IO arrows, e.g. for document input, -- in a context with a more complex state component. liftSt :: IOSLA s1 b c -> IOSLA (s1, s2) b c liftSt (IOSLA f) = IOSLA $ \ (s1, s2) x -> do (s1', ys) <- f s1 x return ((s1', s2), ys) -- | -- run an arrow with augmented state in the context of a simple state arrow. -- An initial value for the new state component is needed. -- -- This is useful, when running an arrow with an extra environment component, e.g. -- for namespace handling in XML. runSt :: s2 -> IOSLA (s1, s2) b c -> IOSLA s1 b c runSt s2 (IOSLA f) = IOSLA $ \ s1 x -> do ((s1', _s2'), ys) <- f (s1, s2) x return (s1', ys) -- ------------------------------------------------------------ instance ArrowTree (IOSLA s) instance ArrowNavigatableTree (IOSLA s) instance ArrowNF (IOSLA s) where rnfA (IOSLA f) = IOSLA $ \ s x -> do res <- f s x ( -- T.trace "start rnfA for IOSLA" $ snd res ) `deepseq` return ( -- T.trace "end rnfA for IOSLA" $ res ) instance ArrowWNF (IOSLA s) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowIf.hs0000644000000000000000000001253612036750467016103 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowIf Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Conditionals for List Arrows This module defines conditional combinators for list arrows. The empty list as result represents False, none empty lists True. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowIf ( module Control.Arrow.ArrowIf ) where import Control.Arrow import Control.Arrow.ArrowList import Data.List ( partition ) -- ------------------------------------------------------------ -- | The interface for arrows as conditionals. -- -- Requires list arrows because False is represented as empty list, True as none empty lists. -- -- Only 'ifA' and 'orElse' don't have default implementations class ArrowList a => ArrowIf a where -- | if lifted to arrows ifA :: a b c -> a b d -> a b d -> a b d -- | shortcut: @ ifP p = ifA (isA p) @ ifP :: (b -> Bool) -> a b d -> a b d -> a b d ifP p = ifA (isA p) {-# INLINE ifP #-} -- | negation: @ neg f = ifA f none this @ neg :: a b c -> a b b neg f = ifA f none this {-# INLINE neg #-} -- | @ f \`when\` g @ : when the predicate g holds, f is applied, else the identity filter this when :: a b b -> a b c -> a b b f `when` g = ifA g f this {-# INLINE when #-} -- | shortcut: @ f \`whenP\` p = f \`when\` (isA p) @ whenP :: a b b -> (b -> Bool) -> a b b f `whenP` g = ifP g f this {-# INLINE whenP #-} -- | @ f \`whenNot\` g @ : when the predicate g does not hold, f is applied, else the identity filter this whenNot :: a b b -> a b c -> a b b f `whenNot` g = ifA g this f {-# INLINE whenNot #-} -- | like 'whenP' whenNotP :: a b b -> (b -> Bool) -> a b b f `whenNotP` g = ifP g this f {-# INLINE whenNotP #-} -- | @ g \`guards\` f @ : when the predicate g holds, f is applied, else none guards :: a b c -> a b d -> a b d f `guards` g = ifA f g none {-# INLINE guards #-} -- | like 'whenP' guardsP :: (b -> Bool) -> a b d -> a b d f `guardsP` g = ifP f g none {-# INLINE guardsP #-} -- | shortcut for @ f `guards` this @ filterA :: a b c -> a b b filterA f = ifA f this none {-# INLINE filterA #-} -- | @ f \`containing\` g @ : keep only those results from f for which g holds -- -- definition: @ f \`containing\` g = f >>> g \`guards\` this @ containing :: a b c -> a c d -> a b c f `containing` g = f >>> g `guards` this {-# INLINE containing #-} -- | @ f \`notContaining\` g @ : keep only those results from f for which g does not hold -- -- definition: @ f \`notContaining\` g = f >>> ifA g none this @ notContaining :: a b c -> a c d -> a b c f `notContaining` g = f >>> ifA g none this {-# INLINE notContaining #-} -- | @ f \`orElse\` g @ : directional choice: if f succeeds, the result of f is the result, else g is applied orElse :: a b c -> a b c -> a b c -- | generalisation of 'orElse' for multi way branches like in case expressions. -- -- An auxiliary data type 'IfThen' with an infix constructor ':->' is used for writing multi way branches -- -- example: @ choiceA [ p1 :-> e1, p2 :-> e2, this :-> default ] @ choiceA :: [IfThen (a b c) (a b d)] -> a b d choiceA = foldr ifA' none where ifA' (g :-> f) = ifA g f -- | tag a value with Left or Right, if arrow has success, input is tagged with Left, else with Right tagA :: a b c -> a b (Either b b) tagA p = ifA p (arr Left) (arr Right) -- | split a list value with an arrow and returns a pair of lists. -- This is the arrow version of 'span'. The arrow is deterministic. -- -- example: @ runLA (spanA (isA (\/= \'-\'))) \"abc-def\" @ gives @ [(\"abc\",\"-def\")] @ as result spanA :: a b b -> a [b] ([b],[b]) spanA p = ifA ( arrL (take 1) >>> p ) ( arr head &&& (arr tail >>> spanA p) >>> arr (\ ~(x, ~(xs,ys)) -> (x : xs, ys)) ) ( arr (\ l -> ([],l)) ) -- | partition a list of values into a pair of lists -- -- This is the arrow Version of 'Data.List.partition' partitionA :: a b b -> a [b] ([b],[b]) partitionA p = listA ( arrL id >>> tagA p ) >>^ ( (\ ~(l1, l2) -> (unTag l1, unTag l2) ) . partition (isLeft) ) where isLeft (Left _) = True isLeft _ = False unTag = map (either id id) -- ------------------------------------------------------------ -- | an auxiliary data type for 'choiceA' data IfThen a b = a :-> b -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowExc.hs0000644000000000000000000000166712036750467016267 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowExc Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: not portable The exception arrow class -} -- ------------------------------------------------------------ module Control.Arrow.ArrowExc ( ArrowExc(..) ) where import Control.Arrow import Control.Arrow.ArrowIO import Control.Exception ( SomeException ) class (Arrow a, ArrowChoice a, ArrowZero a, ArrowIO a) => ArrowExc a where tryA :: a b c -> a b (Either SomeException c) catchA :: a b c -> a SomeException c -> a b c catchA f h = tryA f >>> ( h ||| returnA ) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowIO.hs0000644000000000000000000000420212036750467016043 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowIO Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Lifting of IO actions to arrows -} -- ------------------------------------------------------------ module Control.Arrow.ArrowIO ( ArrowIO(..) , ArrowIOIf(..) ) where import Control.Arrow -- | the interface for converting an IO action into an arrow class Arrow a => ArrowIO a where -- | construct an arrow from an IO action arrIO :: (b -> IO c) -> a b c -- | construct an arrow from an IO action without any parameter arrIO0 :: IO c -> a b c arrIO0 f = arrIO (const f) {-# INLINE arrIO0 #-} -- | construction of a 2 argument arrow from a binary IO action -- | -- | example: @ a1 &&& a2 >>> arr2 f @ arrIO2 :: (b1 -> b2 -> IO c) -> a (b1, b2) c arrIO2 f = arrIO (\ ~(x1, x2) -> f x1 x2) {-# INLINE arrIO2 #-} -- | construction of a 3 argument arrow from a 3-ary IO action -- | -- | example: @ a1 &&& a2 &&& a3 >>> arr3 f @ arrIO3 :: (b1 -> b2 -> b3 -> IO c) -> a (b1, (b2, b3)) c arrIO3 f = arrIO (\ ~(x1, ~(x2, x3)) -> f x1 x2 x3) {-# INLINE arrIO3 #-} -- | construction of a 4 argument arrow from a 4-ary IO action -- | -- | example: @ a1 &&& a2 &&& a3 &&& a4 >>> arr4 f @ arrIO4 :: (b1 -> b2 -> b3 -> b4 -> IO c) -> a (b1, (b2, (b3, b4))) c arrIO4 f = arrIO (\ ~(x1, ~(x2, ~(x3, x4))) -> f x1 x2 x3 x4) {-# INLINE arrIO4 #-} -- | the interface for converting an IO predicate into a list arrow class (Arrow a, ArrowIO a) => ArrowIOIf a where -- | builds an arrow from an IO predicate -- -- if the predicate holds, the single list containing the input is returned, else the empty list, -- similar to 'Control.Arrow.ArrowList.isA' isIOA :: (b -> IO Bool) -> a b b -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/StateListArrow.hs0000644000000000000000000001607512036750467017463 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} -- ------------------------------------------------------------ {- | Module : Control.Arrow.StateListArrow Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of list arrows with a state -} -- ------------------------------------------------------------ module Control.Arrow.StateListArrow ( SLA(..) , fromSLA ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowState import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.DeepSeq -- ------------------------------------------------------------ -- | list arrow combined with a state newtype SLA s a b = SLA { runSLA :: s -> a -> (s, [b]) } instance Category (SLA s) where id = SLA $ \ s x -> (s, [x]) {-# INLINE id #-} SLA g . SLA f = SLA $ \ s x -> let ~(s1, ys) = f s x sequence' s' [] = (s', []) sequence' s' (x':xs') = let ~(s1', ys') = g s' x' ~(s2', zs') = sequence' s1' xs' in (s2', ys' ++ zs') in sequence' s1 ys instance Arrow (SLA s) where arr f = SLA $ \ s x -> (s, [f x]) {-# INLINE arr #-} first (SLA f) = SLA $ \ s ~(x1, x2) -> let ~(s', ys1) = f s x1 in (s', [ (y1, x2) | y1 <- ys1 ]) -- just for efficiency second (SLA g) = SLA $ \ s ~(x1, x2) -> let ~(s', ys2) = g s x2 in (s', [ (x1, y2) | y2 <- ys2 ]) -- just for efficiency SLA f *** SLA g = SLA $ \ s ~(x1, x2) -> let ~(s1, ys1) = f s x1 ~(s2, ys2) = g s1 x2 in (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) -- just for efficiency SLA f &&& SLA g = SLA $ \ s x -> let ~(s1, ys1) = f s x ~(s2, ys2) = g s1 x in (s2, [ (y1, y2) | y1 <- ys1, y2 <- ys2 ]) instance ArrowZero (SLA s) where zeroArrow = SLA $ \ s -> const (s, []) {-# INLINE zeroArrow #-} instance ArrowPlus (SLA s) where SLA f <+> SLA g = SLA $ \ s x -> let ~(s1, rs1) = f s x ~(s2, rs2) = g s1 x in (s2, rs1 ++ rs2) instance ArrowChoice (SLA s) where left (SLA f) = SLA $ \ s -> let lf x = (s1, map Left y) where ~(s1, y) = f s x rf x = (s, [Right x]) in either lf rf right (SLA f) = SLA $ \ s -> let lf x = (s, [Left x]) rf x = (s1, map Right y) where ~(s1, y) = f s x in either lf rf instance ArrowApply (SLA s) where app = SLA $ \ s (SLA f, x) -> f s x {-# INLINE app #-} instance ArrowList (SLA s) where arrL f = SLA $ \ s x -> (s, (f x)) {-# INLINE arrL #-} arr2A f = SLA $ \ s ~(x, y) -> runSLA (f x) s y {-# INLINE arr2A #-} constA c = SLA $ \ s -> const (s, [c]) {-# INLINE constA #-} isA p = SLA $ \ s x -> (s, if p x then [x] else []) {-# INLINE isA #-} SLA f >>. g = SLA $ \ s x -> let ~(s1, ys) = f s x in (s1, g ys) {-# INLINE (>>.) #-} -- just for efficency perform (SLA f) = SLA $ \ s x -> let ~(s1, _ys) = f s x in (s1, [x]) {-# INLINE perform #-} instance ArrowIf (SLA s) where ifA (SLA p) ta ea = SLA $ \ s x -> let ~(s1, res) = p s x in runSLA ( if null res then ea else ta ) s1 x (SLA f) `orElse` g = SLA $ \ s x -> let r@(s1, res) = f s x in if null res then runSLA g s1 x else r instance ArrowState s (SLA s) where changeState cf = SLA $ \ s x -> (cf s x, [x]) {-# INLINE changeState #-} accessState af = SLA $ \ s x -> (s, [af s x]) {-# INLINE accessState #-} instance ArrowTree (SLA s) instance ArrowNavigatableTree (SLA s) instance ArrowNF (SLA s) where rnfA (SLA f) = SLA $ \ s x -> let res = f s x in snd res `deepseq` res instance ArrowWNF (SLA s) -- ------------------------------------------------------------ -- | conversion of state list arrows into arbitray other -- list arrows. -- -- allows running a state list arrow within another arrow: -- -- example: -- -- > ... >>> fromSLA 0 (... setState ... getState ... ) >>> ... -- -- runs a state arrow with initial state 0 (e..g. an Int) within -- another arrow sequence fromSLA :: ArrowList a => s -> SLA s b c -> a b c fromSLA s f = arrL (snd . (runSLA f s)) {-# INLINE fromSLA #-} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowTree.hs0000644000000000000000000003336012036750467016442 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowTree Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : stable Portability: portable List arrows for tree processing. Trees that implement the "Data.Tree.Class" interface, can be processed with these arrows. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowTree ( ArrowTree(..) , Tree ) where import Data.Tree.Class (Tree) import qualified Data.Tree.Class as T hiding (Tree) import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.ArrowIf infixl 5 />, //>, ArrowTree a where -- | construct a leaf mkLeaf :: Tree t => b -> a c (t b) mkLeaf = constA . T.mkLeaf {-# INLINE mkLeaf #-} -- | construct an inner node mkTree :: Tree t => b -> [t b] -> a c (t b) mkTree n = constA . T.mkTree n {-# INLINE mkTree #-} -- | select the children of the root of a tree getChildren :: Tree t => a (t b) (t b) getChildren = arrL T.getChildren {-# INLINE getChildren #-} -- | select the node info of the root of a tree getNode :: Tree t => a (t b) b getNode = arr T.getNode {-# INLINE getNode #-} -- | select the attribute of the root of a tree hasNode :: Tree t => (b -> Bool) -> a (t b) (t b) hasNode p = (getNode >>> isA p) `guards` this {-# INLINE hasNode #-} -- | substitute the children of the root of a tree setChildren :: Tree t => [t b] -> a (t b) (t b) setChildren cs = arr (T.setChildren cs) {-# INLINE setChildren #-} -- | substitute the attribute of the root of a tree setNode :: Tree t => b -> a (t b) (t b) setNode n = arr (T.setNode n) {-# INLINE setNode #-} -- | edit the children of the root of a tree changeChildren :: Tree t => ([t b] -> [t b]) -> a (t b) (t b) changeChildren csf = arr (T.changeChildren csf) {-# INLINE changeChildren #-} -- | edit the attribute of the root of a tree changeNode :: Tree t => (b -> b) -> a (t b) (t b) changeNode nf = arr (T.changeNode nf) {-# INLINE changeNode #-} -- compound arrows -- | apply an arrow element wise to all children of the root of a tree -- collect these results and substitute the children with this result -- -- example: @ processChildren isText @ deletes all subtrees, for which isText does not hold -- -- example: @ processChildren (none \`when\` isCmt) @ removes all children, for which isCmt holds processChildren :: Tree t => a (t b) (t b) -> a (t b) (t b) processChildren f = arr T.getNode &&& listA (arrL T.getChildren >>> f) -- new children, deterministic filter: single element result >>> arr2 T.mkTree -- | similar to processChildren, but the new children are computed by processing -- the whole input tree -- -- example: @ replaceChildren (deep isText) @ selects all subtrees for which isText holds -- and substitutes the children component of the root node with this list replaceChildren :: Tree t => a (t b) (t b) -> a (t b) (t b) replaceChildren f = arr T.getNode &&& listA f -- compute new children >>> arr2 T.mkTree -- | -- pronounced \"slash\", meaning g inside f -- -- defined as @ f \/> g = f >>> getChildren >>> g @ -- -- example: @ hasName \"html\" \/> hasName \"body\" \/> hasName \"h1\" @ -- -- This expression selects -- all \"h1\" elements in the \"body\" element of an \"html\" element, an expression, that -- corresponds 1-1 to the XPath selection path \"html\/body\/h1\" (/>) :: Tree t => a b (t c) -> a (t c) d -> a b d f /> g = f >>> getChildren >>> g {-# INLINE (/>) #-} -- | -- pronounced \"double slash\", meaning g arbitrarily deep inside f -- -- defined as @ f \/\/> g = f >>> getChildren >>> deep g @ -- -- example: @ hasName \"html\" \/\/> hasName \"table\" @ -- -- This expression selects -- all top level \"table\" elements within an \"html\" element, an expression. -- Attantion: This does not correspond -- to the XPath selection path \"html\/\/table\". The latter on matches all table elements -- even nested ones, but @\/\/>@ gives in many cases the appropriate functionality. (//>) :: Tree t => a b (t c) -> a (t c) d -> a b d f //> g = f >>> getChildren >>> deep g {-# INLINE (//>) #-} -- | -- pronounced \"outside\" meaning f containing g -- -- defined as @ f \<\/ g = f \`containing\` (getChildren >>> g) @ ( a (t b) (t b) -> a (t b) (t b) -> a (t b) (t b) f >> g) {-# INLINE ( a (t b) c -> a (t b) c deep f = f -- success when applying f `orElse` (getChildren >>> deep f) -- seach children -- | recursively searches a whole tree for subrees, for which a predicate holds. -- The search is performed bottom up. -- -- example: @ deepest isHtmlTable @ selects all innermost table elements in a document -- but no table elements containing tables. See 'deep' and 'multi' for other search strategies. deepest :: Tree t => a (t b) c -> a (t b) c deepest f = (getChildren >>> deepest f) -- seach children `orElse` f -- no success: apply f to root -- | recursively searches a whole tree for subtrees, for which a predicate holds. -- The search is performed top down. All nodes of the tree are searched, even within the -- subtrees of trees for which the predicate holds. -- -- example: @ multi isHtmlTable @ selects all table elements, even nested ones. multi :: Tree t => a (t b) c -> a (t b) c multi f = f -- combine result for root <+> (getChildren >>> multi f) -- with result for all descendants -- | recursively transforms a whole tree by applying an arrow to all subtrees, -- this is done bottom up depth first, leaves first, root as last tree -- -- example: @ processBottomUp (getChildren \`when\` isHtmlFont) @ removes all font tags in a HTML document, even nested ones -- (with an appropriate definition of isHtmlFont) processBottomUp :: Tree t => a (t b) (t b) -> a (t b) (t b) processBottomUp f = processChildren (processBottomUp f) -- process all descendants first >>> f -- then process root -- | similar to 'processBottomUp', but recursively transforms a whole tree by applying an arrow to all subtrees -- with a top down depth first traversal strategie. In many cases 'processBottomUp' and 'processTopDown' -- give same results. processTopDown :: Tree t => a (t b) (t b) -> a (t b) (t b) processTopDown f = f -- first process root >>> processChildren (processTopDown f) -- then process all descendants of new root -- | recursively transforms a whole tree by applying an arrow to all subtrees, -- but transformation stops when a predicte does not hold for a subtree, -- leaves are transformed first processBottomUpWhenNot :: Tree t => a (t b) (t b) -> a (t b) (t b) -> a (t b) (t b) processBottomUpWhenNot f p = ( processChildren (processBottomUpWhenNot f p) >>> f ) `whenNot` p -- | recursively transforms a whole tree by applying an arrow to all subtrees, -- but transformation stops when a tree is successfully transformed. -- the transformation is done top down -- -- example: @ processTopDownUntil (isHtmlTable \`guards\` tranformTable) @ -- transforms all top level table elements into something else, but inner tables remain unchanged processTopDownUntil :: Tree t => a (t b) (t b) -> a (t b) (t b) processTopDownUntil f = f `orElse` processChildren (processTopDownUntil f) -- | computes a list of trees by applying an arrow to the input -- and inserts this list in front of index i in the list of children -- -- example: @ insertChildrenAt 0 (deep isCmt) @ selects all subtrees for which isCmt holds -- and copies theses in front of the existing children insertChildrenAt :: Tree t => Int -> a (t b) (t b) -> a (t b) (t b) insertChildrenAt i f = listA f &&& this >>> arr2 insertAt where insertAt newcs = T.changeChildren (\ cs -> let (cs1, cs2) = splitAt i cs in cs1 ++ newcs ++ cs2 ) -- | similar to 'insertChildrenAt', but the insertion position is searched with a predicate insertChildrenAfter :: Tree t => a (t b) (t b) -> a (t b) (t b) -> a (t b) (t b) insertChildrenAfter p f = replaceChildren ( ( ( listA getChildren >>> spanA p ) &&& listA f ) >>> arr2L (\ (xs1, xs2) xs -> xs1 ++ xs ++ xs2) ) -- | an arrow for inserting a whole subtree with some holes in it (a template) -- into a document. The holes can be filled with contents from the input. -- -- Example -- -- > insertTreeTemplateTest :: ArrowXml a => a b XmlTree -- > insertTreeTemplateTest -- > = doc -- > >>> -- > insertTreeTemplate template pattern -- > where -- > doc -- the input data -- > = constA "The TitleThe content" -- > >>> xread -- > template -- the output template with 2 holes: xxx and yyy -- > = constA "xxx

yyy

" -- > >>> xread -- > pattern -- > = [ hasText (== "xxx") -- fill the xxx hole with the input contents from element "x/y" -- > :-> ( getChildren >>> hasName "y" >>> deep isText ) -- > -- > , hasText (== "yyy") -- fill the yyy hole with the input contents from element "x/z" -- > :-> ( getChildren >>> hasName "z" >>> getChildren ) -- > ] -- -- computes the XML tree for the following document -- -- > "The Title

The content

" insertTreeTemplate :: Tree t => a (t b) (t b) -> -- the the template [IfThen (a (t b) c) (a (t b) (t b))] -> -- the list of nodes in the template to be substituted a (t b) (t b) insertTreeTemplate template choices = insertTree $< this where insertTree t = template -- swap input and template >>> processTemplate where processTemplate = choiceA choices' -- check whether node is a "hole" within the template `orElse` processChildren processTemplate -- else descent into template tree choices' = map feedTree choices -- modify choices, such that the input is feed into the action arrows feedTree (cond :-> action) = cond :-> (constA t >>> action) -- the real input becomes the input at the holes hxt-9.3.1.1/src/Control/Arrow/ArrowNavigatableTree.hs0000644000000000000000000002640212036750467020577 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowNavigatableTree Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable List arrows for navigatable trees Trees that implement the "Data.Tree.NavigatableTree.Class" interface, can be processed with these arrows. -} -- ------------------------------------------------------------ module Control.Arrow.ArrowNavigatableTree where import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Data.Maybe import Data.Tree.NavigatableTree.Class ( NavigatableTree , NavigatableTreeToTree , NavigatableTreeModify ) import qualified Data.Tree.NavigatableTree.Class as T import qualified Data.Tree.NavigatableTree.XPathAxis as T -- ------------------------------------------------------------ -- | The interface for navigatable tree arrows -- -- all functions have default implementations class (ArrowList a) => ArrowNavigatableTree a where -- move one step towards the root moveUp :: NavigatableTree t => a (t b) (t b) moveUp = arrL $ maybeToList . T.mvUp -- descend one step to the leftmost child moveDown :: NavigatableTree t => a (t b) (t b) moveDown = arrL $ maybeToList . T.mvDown -- move to the left neighbour moveLeft :: NavigatableTree t => a (t b) (t b) moveLeft = arrL $ maybeToList . T.mvLeft -- move to the right neighbour moveRight :: NavigatableTree t => a (t b) (t b) moveRight = arrL $ maybeToList . T.mvRight -- derived functions parentAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) parentAxis = arrL T.parentAxis -- | XPath axis: ancestor ancestorAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) ancestorAxis = arrL T.ancestorAxis -- | XPath axis: ancestor or self ancestorOrSelfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) ancestorOrSelfAxis = arrL T.ancestorOrSelfAxis -- | XPath axis: child childAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) childAxis = arrL T.childAxis -- | XPath axis: descendant descendantAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) descendantAxis = arrL T.descendantAxis -- | XPath axis: descendant or self descendantOrSelfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) descendantOrSelfAxis = arrL T.descendantOrSelfAxis -- | not an XPath axis but useful: descendant or following descendantOrFollowingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) descendantOrFollowingAxis = descendantAxis <+> followingAxis -- | not an official XPath axis but useful: reverse descendant or self, used in preceding axis revDescendantOrSelfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) revDescendantOrSelfAxis = arrL T.revDescendantOrSelfAxis -- | XPath axis: following sibling followingSiblingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) followingSiblingAxis = arrL T.followingSiblingAxis -- | XPath axis: preceeding sibling precedingSiblingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) precedingSiblingAxis = arrL T.precedingSiblingAxis -- | XPath axis: self selfAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) selfAxis = arrL T.selfAxis -- | XPath axis: following followingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) followingAxis = arrL T.followingAxis -- | XPath axis: preceding precedingAxis :: (ArrowList a, NavigatableTree t) => a (t b) (t b) precedingAxis = arrL T.precedingAxis -- ------------------------------------------------------------ -- | move to the root moveToRoot :: (Arrow a, NavigatableTree t) => a (t b) (t b) moveToRoot = arr T.mvToRoot isAtRoot :: (ArrowList a, NavigatableTree t) => a (t b) (t b) isAtRoot = isA (null . T.ancestorAxis) -- ------------------------------------------------------------ -- | Conversion from a tree into a navigatable tree addNav :: ( ArrowList a , NavigatableTreeToTree nt t ) => a (t b) (nt b) addNav = arr T.fromTree -- | Conversion from a navigatable tree into an ordinary tree remNav :: ( ArrowList a , NavigatableTreeToTree nt t ) => a (nt b) (t b) remNav = arr T.toTree -- | apply an operation using navigation to an ordinary tree -- -- This root and all children may be visited in arbitrary order withNav :: ( ArrowList a , NavigatableTreeToTree nt t ) => a (nt b) (nt c) -> a (t b) (t c) withNav f = addNav >>> f >>> remNav -- | apply a simple operation without use of navigation to a navigatable tree -- -- This enables to apply arbitrary tree operations to navigatable trees withoutNav :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) withoutNav f = ( (remNav >>> f) -- apply the simple arrow to the tree &&& this -- remember the navigation context ) >>> arr (uncurry T.substThisTree) -- resore the context -- ------------------------------------------------------------ -- | Filter an axis with an ordinary tree predicate -- -- Example: In a tree of Ints find all nodes in the subtrees (in preorder) that have label 42 -- -- > descendantAxis >>> filterAxis (hasNode (== 42)) -- -- Example: In an XML Tree find the following nodes of a node with attribute id and value 42 -- -- > descendantAxis >>> filterAxis (hasAttrValue "id" (=="42")) >>> followingAxis filterAxis :: ( ArrowIf a , NavigatableTreeToTree nt t ) => a (t b) c -> a (nt b) (nt b) filterAxis p = (remNav >>> p) `guards` this {-# INLINE filterAxis #-} -- | Move to the next tree on a given axis. Deterministic arrow -- -- Example: Move to the next node in a preorder visit: next child or else next following -- -- > moveOn descendantOrFollowingAxis moveOn :: ( ArrowList a , NavigatableTree t ) => a (t b) (t b) -> a (t b) (t b) moveOn axis = single $ axis {-# INLINE moveOn #-} -- ------------------------------------------------------------ -- | Change the current subtree of a navigatable tree. -- -- The arrow for computing the changes should be deterministic. If it fails -- nothing is changed. changeThisTree :: ( ArrowList a , ArrowIf a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) changeThisTree cf = withoutNav $ single cf `orElse` this -- | Substitute the current subtree of a navigatable tree by a given tree substThisTree :: ( ArrowList a , ArrowIf a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => t b -> a (nt b) (nt b) substThisTree t = changeThisTree (constA t) -- ------------------------------------------------------------ -- | apply an ordinary arrow to the current subtree of a navigatabe tree and add the result trees in front of the current tree. -- -- If this arrow is applied to the root, it will fail, because we want a tree as result, not a forest. addToTheLeft :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) addToTheLeft = addToOneSide $ foldl (\ acc t -> acc >>= T.addTreeLeft t) {-# INLINE addToTheLeft #-} -- | apply an ordinary arrow to the current subtree of a navigatabe tree and add the result trees behind the current tree. -- -- If this arrow is applied to the root, it will fail, because we want a tree as result, not a forest. addToTheRight :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (t b) (t b) -> a (nt b) (nt b) addToTheRight = addToOneSide $ foldr (\ t acc -> acc >>= T.addTreeRight t) {-# INLINE addToTheRight #-} -- | addToOneSide does the real work for 'addToTheLeft' and 'addToTheRight' addToOneSide :: ( ArrowList a , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => ( Maybe (nt b) -> [t b] -> Maybe (nt b) ) -> a (t b) (t b) -> a (nt b) (nt b) addToOneSide side f = ( ( remNav >>> listA f ) &&& this ) >>> arrL ( uncurry (\ ts nt -> side (Just nt) ts) >>> maybeToList ) -- ------------------------------------------------------------ -- | drop the direct left sibling tree of the given navigatable tree -- -- If this arrow is applied to the root or a leftmost tree, it will fail, because there is nothing to remove dropFromTheLeft :: ( ArrowList a -- , NavigatableTreeToTree nt t , NavigatableTreeModify nt t ) => a (nt b) (nt b) dropFromTheLeft = arrL $ T.dropTreeLeft >>> maybeToList {-# INLINE dropFromTheLeft #-} -- | drop the direct left sibling tree of the given navigatable tree -- -- If this arrow is applied to the root or a rightmost tree, it will fail, because there is nothing to remove dropFromTheRight :: ( ArrowList a , NavigatableTreeModify nt t ) => a (nt b) (nt b) dropFromTheRight = arrL $ T.dropTreeRight >>> maybeToList {-# INLINE dropFromTheRight #-} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ArrowNF.hs0000644000000000000000000000351112036750467016041 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ArrowNF Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: non-portable Arrows for evaluation of normal form results -} -- ------------------------------------------------------------ module Control.Arrow.ArrowNF where import Control.Arrow import Control.Arrow.ArrowList import Control.DeepSeq import Control.FlatSeq -- | -- complete evaluation of an arrow result using 'Control.DeepSeq' -- -- this is sometimes useful for preventing space leaks, especially after reading -- and validation of a document, all DTD stuff is not longer in use and can be -- recycled by the GC. strictA :: (Arrow a, NFData b) => a b b strictA = arr $ \ x -> deepseq x x class (Arrow a) => ArrowNF a where rnfA :: (NFData c) => a b c -> a b c rnfA f = f >>^ (\ x -> deepseq x x) {-# INLINE rnfA #-} -- | -- partial evaluation of an arrow result using 'Control.FlatSeq' -- -- There are tow arrows with force the partial evaluation. By convention -- the 2. should be less lazy than the 1. -- -- These arrows are sometimes useful for preventing space leaks, especially when parsing -- complex data structures. In many cases the evaluated AST is more space efficient -- than the unevaluaded with a lot of closures. class (Arrow a, ArrowList a) => ArrowWNF a where rwnfA :: (WNFData c) => a b c -> a b c rwnfA f = f >>. \ x -> rlnf rwnf x `seq` x {-# INLINE rwnfA #-} rwnf2A :: (WNFData c) => a b c -> a b c rwnf2A f = f >>. \ x -> rlnf rwnf2 x `seq` x {-# INLINE rwnf2A #-} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/ListArrow.hs0000644000000000000000000000750612036750467016461 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.ListArrow Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of pure list arrows -} -- ------------------------------------------------------------ module Control.Arrow.ListArrow ( LA(..) , fromLA ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.DeepSeq import Data.List ( partition ) -- ------------------------------------------------------------ -- | pure list arrow data type newtype LA a b = LA { runLA :: a -> [b] } instance Category LA where id = LA $ (:[]) {-# INLINE id #-} LA g . LA f = LA $ concatMap g . f {-# INLINE (.) #-} instance Arrow LA where arr f = LA $ \ x -> [f x] {-# INLINE arr #-} first (LA f) = LA $ \ ~(x1, x2) -> [ (y1, x2) | y1 <- f x1 ] -- just for efficiency second (LA g) = LA $ \ ~(x1, x2) -> [ (x1, y2) | y2 <- g x2 ] LA f *** LA g = LA $ \ ~(x1, x2) -> [ (y1, y2) | y1 <- f x1, y2 <- g x2] LA f &&& LA g = LA $ \ x -> [ (y1, y2) | y1 <- f x , y2 <- g x ] instance ArrowZero LA where zeroArrow = LA $ const [] {-# INLINE zeroArrow #-} instance ArrowPlus LA where LA f <+> LA g = LA $ \ x -> f x ++ g x {-# INLINE (<+>) #-} instance ArrowChoice LA where left (LA f) = LA $ either (map Left . f) ((:[]) . Right) right (LA f) = LA $ either ((:[]) . Left) (map Right . f) LA f +++ LA g = LA $ either (map Left . f) (map Right . g) LA f ||| LA g = LA $ either f g instance ArrowApply LA where app = LA $ \ (LA f, x) -> f x {-# INLINE app #-} instance ArrowList LA where arrL = LA {-# INLINE arrL #-} arr2A f = LA $ \ ~(x, y) -> runLA (f x) y {-# INLINE arr2A #-} isA p = LA $ \ x -> if p x then [x] else [] {-# INLINE isA #-} LA f >>. g = LA $ g . f {-# INLINE (>>.) #-} withDefault a d = a >>. \ x -> if null x then [d] else x instance ArrowIf LA where ifA (LA p) t e = LA $ \ x -> runLA ( if null (p x) then e else t ) x {-# INLINE ifA #-} (LA f) `orElse` (LA g) = LA $ \ x -> ( let res = f x in if null res then g x else res ) {-# INLINE orElse #-} spanA p = LA $ (:[]) . span (not . null . runLA p) partitionA p = LA $ (:[]) . partition (not . null . runLA p) instance ArrowTree LA instance ArrowNavigatableTree LA instance ArrowNF LA where rnfA (LA f) = LA $ \ x -> let res = f x in res `deepseq` res instance ArrowWNF LA -- ------------------------------------------------------------ -- | conversion of pure list arrows into other possibly more complex -- list arrows fromLA :: ArrowList a => LA b c -> a b c fromLA f = arrL (runLA f) {-# INLINE fromLA #-} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Control/Arrow/IOListArrow.hs0000644000000000000000000001216212036750467016703 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Control.Arrow.IOListArrow Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) Stability : experimental Portability: portable Implementation of pure list arrows with IO -} -- ------------------------------------------------------------ module Control.Arrow.IOListArrow ( IOLA(..) ) where import Prelude hiding (id, (.)) import Control.Category import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowNF import Control.Arrow.ArrowTree import Control.Arrow.ArrowNavigatableTree import Control.DeepSeq import Control.Exception ( SomeException , try ) -- ------------------------------------------------------------ -- | list arrow combined with IO monad newtype IOLA a b = IOLA { runIOLA :: a -> IO [b] } instance Category IOLA where id = IOLA $ return . (:[]) IOLA g . IOLA f = IOLA $ \ x -> do ys <- f x zs <- sequence . map g $ ys return (concat zs) instance Arrow IOLA where arr f = IOLA $ \ x -> return [f x] first (IOLA f) = IOLA $ \ ~(x1, x2) -> do ys1 <- f x1 return [ (y1, x2) | y1 <- ys1 ] -- just for efficiency second (IOLA g) = IOLA $ \ ~(x1, x2) -> do ys2 <- g x2 return [ (x1, y2) | y2 <- ys2 ] -- just for efficiency IOLA f *** IOLA g = IOLA $ \ ~(x1, x2) -> do ys1 <- f x1 ys2 <- g x2 return [ (y1, y2) | y1 <- ys1, y2 <- ys2 ] -- just for efficiency IOLA f &&& IOLA g = IOLA $ \ x -> do ys1 <- f x ys2 <- g x return [ (y1, y2) | y1 <- ys1, y2 <- ys2 ] instance ArrowZero IOLA where zeroArrow = IOLA $ const (return []) instance ArrowPlus IOLA where IOLA f <+> IOLA g = IOLA $ \ x -> do rs1 <- f x rs2 <- g x return (rs1 ++ rs2) instance ArrowChoice IOLA where left (IOLA f) = IOLA $ either (\ x -> f x >>= (\ y -> return (map Left y))) (return . (:[]) . Right) right (IOLA f) = IOLA $ either (return . (:[]) . Left) (\ x -> f x >>= (\ y -> return (map Right y))) instance ArrowApply IOLA where app = IOLA $ \ (IOLA f, x) -> f x instance ArrowList IOLA where arrL f = IOLA $ \ x -> return (f x) arr2A f = IOLA $ \ ~(x, y) -> runIOLA (f x) y constA c = IOLA $ const (return [c]) isA p = IOLA $ \x -> return (if p x then [x] else []) IOLA f >>. g = IOLA $ \x -> do ys <- f x return (g ys) instance ArrowIf IOLA where ifA (IOLA p) ta ea = IOLA $ \x -> do res <- p x runIOLA (if null res then ea else ta) x (IOLA f) `orElse` g = IOLA $ \x -> do res <- f x if null res then runIOLA g x else return res instance ArrowIO IOLA where arrIO cmd = IOLA $ \x -> do res <- cmd x return [res] instance ArrowExc IOLA where tryA f = IOLA $ \ x -> do res <- try' $ runIOLA f x return $ case res of Left er -> [Left er] Right ys -> [Right x' | x' <- ys] where try' :: IO a -> IO (Either SomeException a) try' = try instance ArrowIOIf IOLA where isIOA p = IOLA $ \x -> do res <- p x return (if res then [x] else []) instance ArrowTree IOLA instance ArrowNavigatableTree IOLA instance ArrowNF IOLA where rnfA (IOLA f) = IOLA $ \ x -> do res <- f x res `deepseq` return res instance ArrowWNF IOLA -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/0000755000000000000000000000000012036750467012401 5ustar0000000000000000hxt-9.3.1.1/src/Text/XML/0000755000000000000000000000000012036750467013041 5ustar0000000000000000hxt-9.3.1.1/src/Text/XML/HXT/0000755000000000000000000000000012036750467013504 5ustar0000000000000000hxt-9.3.1.1/src/Text/XML/HXT/Core.hs0000644000000000000000000000422612036750467014734 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Core Copyright : Copyright (C) 2006-2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable The HXT arrow interface The application programming interface to the arrow modules of the Haskell XML Toolbox. This module exports all important arrows for input, output, parsing, validating and transforming XML. It also exports all basic datatypes and functions of the toolbox. -} -- ------------------------------------------------------------ module Text.XML.HXT.Core ( module Control.Arrow.ListArrows , module Text.XML.HXT.DOM.Interface , module Text.XML.HXT.Arrow.XmlArrow , module Text.XML.HXT.Arrow.XmlState , module Text.XML.HXT.Arrow.DocumentInput , module Text.XML.HXT.Arrow.DocumentOutput , module Text.XML.HXT.Arrow.Edit , module Text.XML.HXT.Arrow.GeneralEntitySubstitution , module Text.XML.HXT.Arrow.Namespace , module Text.XML.HXT.Arrow.Pickle , module Text.XML.HXT.Arrow.ProcessDocument , module Text.XML.HXT.Arrow.ReadDocument , module Text.XML.HXT.Arrow.WriteDocument , module Text.XML.HXT.Arrow.Binary , module Text.XML.HXT.Arrow.XmlOptions , module Text.XML.HXT.Version ) where import Control.Arrow.ListArrows -- arrow classes import Data.Atom () -- import this explicitly import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.DocumentInput import Text.XML.HXT.Arrow.DocumentOutput import Text.XML.HXT.Arrow.Edit import Text.XML.HXT.Arrow.GeneralEntitySubstitution import Text.XML.HXT.Arrow.Namespace import Text.XML.HXT.Arrow.Pickle import Text.XML.HXT.Arrow.ProcessDocument import Text.XML.HXT.Arrow.ReadDocument import Text.XML.HXT.Arrow.WriteDocument import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlOptions import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlRegex () -- import this explicitly import Text.XML.HXT.Arrow.Binary import Text.XML.HXT.Version -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Version.hs0000644000000000000000000000011612036750467015463 0ustar0000000000000000module Text.XML.HXT.Version where hxt_version :: String hxt_version = "9.1.0" hxt-9.3.1.1/src/Text/XML/HXT/Arrow/0000755000000000000000000000000012036750467014576 5ustar0000000000000000hxt-9.3.1.1/src/Text/XML/HXT/Arrow/XmlRegex.hs0000644000000000000000000003227112036750467016672 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlRegex Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Regular Expression Matcher working on lists of XmlTrees It's intended to import this module with an explicit import declaration for not spoiling the namespace with these somewhat special arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlRegex ( XmlRegex , mkZero , mkUnit , mkPrim , mkPrim' , mkPrimA , mkDot , mkStar , mkAlt , mkAlts , mkSeq , mkSeqs , mkRep , mkRng , mkOpt , mkPerm , mkPerms , mkMerge , nullable , delta , matchXmlRegex , splitXmlRegex , scanXmlRegex , matchRegexA , splitRegexA , scanRegexA ) where import Control.Arrow.ListArrows import Data.Maybe import Text.XML.HXT.DOM.Interface import Text.XML.HXT.DOM.ShowXml ( xshow ) -- ------------------------------------------------------------ -- the exported regex arrows -- | check whether a sequence of XmlTrees match an Xml regular expression -- -- The arrow for 'matchXmlRegex'. -- -- The expession is build up from simple arrows acting as predicate ('mkPrimA') for -- an XmlTree and of the usual cobinators for sequence ('mkSeq'), repetition -- ('mkStar', mkRep', 'mkRng') and choice ('mkAlt', 'mkOpt') matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees matchRegexA re ts = ts >>. (\ s -> maybe [s] (const []) . matchXmlRegex re $ s) -- | split the sequence of trees computed by the filter a into -- -- The arrow for 'splitXmlRegex'. -- -- a first part matching the regex and a rest, -- if a prefix of the input sequence does not match the regex, the arrow fails -- else the pair containing the result lists is returned splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees) splitRegexA re ts = ts >>. (maybeToList . splitXmlRegex re) -- | scan the input sequence with a regex and give the result as a list of lists of trees back -- the regex must at least match one input tree, so the empty sequence should not match the regex -- -- The arrow for 'scanXmlRegex'. scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees scanRegexA re ts = ts >>. (fromMaybe [] . scanXmlRegex re) -- ------------------------------------------------------------ data XmlRegex = Zero String | Unit | Sym (XmlTree -> Bool) String -- optional external repr. of predicate | Dot | Star XmlRegex | Alt XmlRegex XmlRegex | Seq XmlRegex XmlRegex | Rep Int XmlRegex -- 1 or more repetitions | Rng Int Int XmlRegex -- n..m repetitions | Perm XmlRegex XmlRegex | Merge XmlRegex XmlRegex -- ------------------------------------------------------------ {- just for documentation class Inv a where inv :: a -> Bool instance Inv XmlRegex where inv (Zero _) = True inv Unit = True inv (Sym p _) = p holds for some XmlTrees inv Dot = True inv (Star e) = inv e inv (Alt e1 e2) = inv e1 && inv e2 inv (Seq e1 e2) = inv e1 && inv e2 inv (Rep i e) = i > 0 && inv e inv (Rng i j e) = (i < j || (i == j && i > 1)) && inv e inv (Perm e1 e2) = inv e1 && inv e2 -} -- ------------------------------------------------------------ -- -- smart constructors mkZero :: String -> XmlRegex mkZero = Zero mkUnit :: XmlRegex mkUnit = Unit mkPrim :: (XmlTree -> Bool) -> XmlRegex mkPrim p = Sym p "" mkPrim' :: (XmlTree -> Bool) -> String -> XmlRegex mkPrim' = Sym mkPrimA :: LA XmlTree XmlTree -> XmlRegex mkPrimA a = mkPrim (not . null . runLA a) mkDot :: XmlRegex mkDot = Dot mkStar :: XmlRegex -> XmlRegex mkStar (Zero _) = mkUnit -- {}* == () mkStar e@Unit = e -- ()* == () mkStar e@(Star _e1) = e -- (r*)* == r* mkStar (Rep 1 e1) = mkStar e1 -- (r+)* == r* mkStar e@(Alt _ _) = Star (rmStar e) -- (a*|b)* == (a|b)* mkStar e = Star e rmStar :: XmlRegex -> XmlRegex rmStar (Alt e1 e2) = mkAlt (rmStar e1) (rmStar e2) rmStar (Star e1) = rmStar e1 rmStar (Rep 1 e1) = rmStar e1 rmStar e1 = e1 mkAlt :: XmlRegex -> XmlRegex -> XmlRegex mkAlt e1 (Zero _) = e1 -- e1 u {} = e1 mkAlt (Zero _) e2 = e2 -- {} u e2 = e2 mkAlt e1@(Star Dot) _e2 = e1 -- A* u e1 = A* mkAlt _e1 e2@(Star Dot) = e2 -- e1 u A* = A* mkAlt (Sym p1 e1) (Sym p2 e2) = mkPrim' (\ x -> p1 x || p2 x) (e e1 e2) -- melting of predicates where e "" x2 = x2 e x1 "" = x1 e x1 x2 = x1 ++ "|" ++ x2 mkAlt e1 e2@(Sym _ _) = mkAlt e2 e1 -- symmetry: predicates always first mkAlt e1@(Sym _ _) (Alt e2@(Sym _ _) e3) = mkAlt (mkAlt e1 e2) e3 -- prepare melting of predicates mkAlt (Alt e1 e2) e3 = mkAlt e1 (mkAlt e2 e3) -- associativity mkAlt e1 e2 = Alt e1 e2 mkAlts :: [XmlRegex] -> XmlRegex mkAlts = foldr mkAlt (mkZero "") mkSeq :: XmlRegex -> XmlRegex -> XmlRegex mkSeq e1@(Zero _) _e2 = e1 mkSeq _e1 e2@(Zero _) = e2 mkSeq Unit e2 = e2 mkSeq e1 Unit = e1 mkSeq (Seq e1 e2) e3 = mkSeq e1 (mkSeq e2 e3) mkSeq e1 e2 = Seq e1 e2 mkSeqs :: [XmlRegex] -> XmlRegex mkSeqs = foldr mkSeq mkUnit mkRep :: Int -> XmlRegex -> XmlRegex mkRep 0 e = mkStar e mkRep _ e@(Zero _) = e mkRep _ e@Unit = e mkRep i e = Rep i e mkRng :: Int -> Int -> XmlRegex -> XmlRegex mkRng 0 0 _e = mkUnit mkRng 1 1 e = e mkRng lb ub _e | lb > ub = Zero $ "illegal range " ++ show lb ++ ".." ++ show ub mkRng _l _u e@(Zero _) = e mkRng _l _u e@Unit = e mkRng lb ub e = Rng lb ub e mkOpt :: XmlRegex -> XmlRegex mkOpt = mkRng 0 1 mkPerm :: XmlRegex -> XmlRegex -> XmlRegex mkPerm e1@(Zero _) _ = e1 mkPerm _ e2@(Zero _) = e2 mkPerm Unit e2 = e2 mkPerm e1 Unit = e1 mkPerm e1 e2 = Perm e1 e2 mkPerms :: [XmlRegex] -> XmlRegex mkPerms = foldr mkPerm mkUnit mkMerge :: XmlRegex -> XmlRegex -> XmlRegex mkMerge e1@(Zero _) _ = e1 mkMerge _ e2@(Zero _) = e2 mkMerge Unit e2 = e2 mkMerge e1 Unit = e1 mkMerge e1 e2 = Merge e1 e2 -- ------------------------------------------------------------ instance Show XmlRegex where show (Zero s) = "{err:" ++ s ++ "}" show Unit = "()" show (Sym _p "") = "" show (Sym _p r ) = r show Dot = "." show (Star e) = "(" ++ show e ++ ")*" show (Alt e1 e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")" show (Seq e1 e2) = show e1 ++ show e2 show (Rep 1 e) = "(" ++ show e ++ ")+" show (Rep i e) = "(" ++ show e ++ "){" ++ show i ++ ",}" show (Rng 0 1 e) = "(" ++ show e ++ ")?" show (Rng i j e) = "(" ++ show e ++ "){" ++ show i ++ "," ++ show j ++ "}" show (Perm e1 e2) = "(" ++ show e1 ++ show e2 ++ "|" ++ show e2 ++ show e1 ++ ")" show (Merge e1 e2) = "(" ++ show e1 ++ "&" ++ show e2 ++ ")" -- ------------------------------------------------------------ unexpected :: XmlTree -> String -> String unexpected t e = emsg e ++ (cut 80 . xshow) [t] where emsg "" = "unexpected: " emsg s = "expected: " ++ s ++ ", but got: " cut n s | null rest = s' | otherwise = s' ++ "..." where (s', rest) = splitAt n s -- ------------------------------------------------------------ nullable :: XmlRegex -> Bool nullable (Zero _) = False nullable Unit = True nullable (Sym _p _) = False -- assumption: p holds for at least one tree nullable Dot = False nullable (Star _) = True nullable (Alt e1 e2) = nullable e1 || nullable e2 nullable (Seq e1 e2) = nullable e1 && nullable e2 nullable (Rep _i e) = nullable e nullable (Rng i _ e) = i == 0 || nullable e nullable (Perm e1 e2) = nullable e1 && nullable e2 nullable (Merge e1 e2) = nullable e1 && nullable e2 -- ------------------------------------------------------------ delta :: XmlRegex -> XmlTree -> XmlRegex delta e@(Zero _) _ = e delta Unit c = mkZero $ unexpected c "" delta (Sym p e) c | p c = mkUnit | otherwise = mkZero $ unexpected c e delta Dot _ = mkUnit delta e@(Star e1) c = mkSeq (delta e1 c) e delta (Alt e1 e2) c = mkAlt (delta e1 c) (delta e2 c) delta (Seq e1 e2) c | nullable e1 = mkAlt (mkSeq (delta e1 c) e2) (delta e2 c) | otherwise = mkSeq (delta e1 c) e2 delta (Rep i e) c = mkSeq (delta e c) (mkRep (i-1) e) delta (Rng i j e) c = mkSeq (delta e c) (mkRng ((i-1) `max` 0) (j-1) e) delta (Perm e1 e2) c = case e1' of (Zero _) -> mkPerm e1 (delta e2 c) _ -> mkPerm e1' e2 where e1' = delta e1 c delta (Merge e1 e2) c = mkAlt (mkMerge (delta e1 c) e2) (mkMerge e1 (delta e2 c)) -- ------------------------------------------------------------ delta' :: XmlRegex -> XmlTrees -> XmlRegex delta' = foldl delta -- | match a sequence of XML trees with a regular expression over trees -- -- If the input matches, the result is Nothing, else Just an error message is returned matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String matchXmlRegex e = res . delta' e where res (Zero er) = Just er res re | nullable re = Nothing -- o.k. | otherwise = Just $ "input does not match " ++ show e -- ------------------------------------------------------------ -- | split a sequence of XML trees into a pair of a a matching prefix and a rest -- -- If there is no matching prefix, Nothing is returned splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees) splitXmlRegex re = splitXmlRegex' re [] splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees) splitXmlRegex' re res [] | nullable re = Just (reverse res, []) | otherwise = Nothing splitXmlRegex' (Zero _) _ _ = Nothing splitXmlRegex' re res xs@(x:xs') | isJust res' = res' | nullable re = Just (reverse res, xs) | otherwise = Nothing where re' = delta re x res' = splitXmlRegex' re' (x:res) xs' -- ------------------------------------------------------------ -- | scan a sequence of XML trees and split it into parts matching the given regex -- -- If the parts cannot be split because of a missing match, or because of the -- empty sequence as match, Nothing is returned scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees] scanXmlRegex re ts = scanXmlRegex' re (splitXmlRegex re ts) scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees] scanXmlRegex' _ Nothing = Nothing scanXmlRegex' _ (Just (rs, [])) = Just [rs] scanXmlRegex' _ (Just ([], _)) = Nothing -- re is nullable (the empty word matches), nothing split off -- would give infinite list of empty lists scanXmlRegex' re (Just (rs, rest)) | isNothing res = Nothing | otherwise = Just (rs : fromJust res) where res = scanXmlRegex' re (splitXmlRegex re rest) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/DocumentInput.hs0000644000000000000000000003446412036750467017743 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentInput Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document input -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentInput ( getXmlContents , getXmlEntityContents , getEncoding , getTextEncoding , decodeDocument , addInputError ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import Control.Arrow.ListArrow import Data.List ( isPrefixOf ) import Data.String.Unicode ( getDecodingFct , guessEncoding , normalizeNL ) import System.FilePath ( takeExtension ) import qualified Text.XML.HXT.IO.GetFILE as FILE import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.ParserInterface ( parseXmlDocEncodingSpec , parseXmlEntityEncodingSpec , removeEncodingSpec ) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ---------------------------------------------------------- protocolHandlers :: AssocList String (IOStateArrow s XmlTree XmlTree) protocolHandlers = [ ("file", getFileContents) , ("http", getHttpContents) , ("stdin", getStdinContents) ] getProtocolHandler :: IOStateArrow s String (IOStateArrow s XmlTree XmlTree) getProtocolHandler = arr (\ s -> lookupDef getUnsupported s protocolHandlers) getUnsupported :: IOStateArrow s XmlTree XmlTree getUnsupported = perform ( getAttrValue a_source >>> arr (("unsupported protocol in URI " ++) . show) >>> applyA (arr issueFatal) ) >>> setDocumentStatusFromSystemState "accessing documents" getStringContents :: IOStateArrow s XmlTree XmlTree getStringContents = setCont $< getAttrValue a_source >>> addAttr transferMessage "OK" >>> addAttr transferStatus "200" where setCont contents = replaceChildren (txt contents') >>> addAttr transferURI (take 7 contents) -- the "string:" prefix is stored, this is required by setBaseURIFromDoc >>> addAttr a_source (show . prefix 48 $ contents') -- a quoted prefix of the content, max 48 chars is taken as source name where contents' = drop (length stringProtocol) contents prefix l s | length s' > l = take (l - 3) s' ++ "..." | otherwise = s' where s' = take (l + 1) s getFileContents :: IOStateArrow s XmlTree XmlTree getFileContents = applyA ( ( getSysVar theStrictInput &&& ( getAttrValue transferURI >>> getPathFromURI ) ) >>> traceValue 2 (\ (b, f) -> "read file " ++ show f ++ " (strict input = " ++ show b ++ ")") >>> arrIO (uncurry FILE.getCont) >>> ( arr (uncurry addInputError) -- io error occured ||| arr addTxtContent -- content read ) ) >>> addMimeType getStdinContents :: IOStateArrow s XmlTree XmlTree getStdinContents = applyA ( getSysVar theStrictInput >>> arrIO FILE.getStdinCont >>> ( arr (uncurry addInputError) -- io error occured ||| arr addTxtContent -- content read ) ) addInputError :: Attributes -> String -> IOStateArrow s XmlTree XmlTree addInputError al e = issueFatal e >>> seqA (map (uncurry addAttr) al) >>> setDocumentStatusFromSystemState "accessing documents" addMimeType :: IOStateArrow s XmlTree XmlTree addMimeType = addMime $< ( ( getSysVar theFileMimeType >>> isA (not . null) ) `orElse` ( getAttrValue transferURI >>> ( uriToMime $< getMimeTypeTable ) ) ) where addMime mt = addAttr transferMimeType mt uriToMime mtt = arr $ ( \ uri -> extensionToMimeType (drop 1 . takeExtension $ uri) mtt ) addTxtContent :: Blob -> IOStateArrow s XmlTree XmlTree addTxtContent bc = replaceChildren (blb bc) >>> addAttr transferMessage "OK" >>> addAttr transferStatus "200" getHttpContents :: IOStateArrow s XmlTree XmlTree getHttpContents = withoutUserState $ applyA $ getSysVar theHttpHandler getContentsFromString :: IOStateArrow s XmlTree XmlTree getContentsFromString = ( getAttrValue a_source >>> isA (isPrefixOf stringProtocol) ) `guards` getStringContents getContentsFromDoc :: IOStateArrow s XmlTree XmlTree getContentsFromDoc = ( ( addTransferURI $< getBaseURI >>> getCont ) `when` ( setAbsURI $< ( getAttrValue a_source >>^ ( \ src-> (if null src then "stdin:" else src) ) -- empty document name -> read from stdin ) ) ) >>> setDocumentStatusFromSystemState "getContentsFromDoc" where setAbsURI src = ifA ( constA src >>> changeBaseURI ) this ( issueFatal ("illegal URI : " ++ show src) ) addTransferURI uri = addAttr transferURI uri getCont = applyA ( getBaseURI -- compute the handler and call it >>> traceValue 2 (("getContentsFromDoc: reading " ++) . show) >>> getSchemeFromURI >>> getProtocolHandler ) `orElse` this -- don't change tree, when no handler can be found setBaseURIFromDoc :: IOStateArrow s XmlTree XmlTree setBaseURIFromDoc = perform ( getAttrValue transferURI >>> isA (isPrefixOf stringProtocol) -- do not change base URI when reading from a string >>> setBaseURI ) {- | Read the content of a document. This routine is usually called from 'Text.XML.HXT.Arrow.ProcessDocument.getDocumentContents'. The input must be a root node (constructed with 'Text.XML.HXT.Arrow.XmlArrow.root'), usually without children. The attribute list contains all input parameters, e.g. URI or source file name, encoding preferences, ... If the source name is empty, the input is read from standard input. The source is transformed into an absolute URI. If the source is a relative URI, or a file name, it is expanded into an absolut URI with respect to the current base URI. The default base URI is of protocol \"file\" and points to the current working directory. The currently supported protocols are \"http\", \"file\", \"stdin\" and \"string\". The latter two are internal protocols. An uri of the form \"stdin:\" stands for the content of the standard input stream. \"string:some text\" means, that \"some text\" is taken as input. This internal protocol is used for reading from normal 'String' values. -} getXmlContents :: IOStateArrow s XmlTree XmlTree getXmlContents = getXmlContents' parseXmlDocEncodingSpec >>> setBaseURIFromDoc getXmlEntityContents :: IOStateArrow s XmlTree XmlTree getXmlEntityContents = traceMsg 2 "getXmlEntityContents" >>> addAttr transferMimeType text_xml_external_parsed_entity -- the default transfer mimetype >>> getXmlContents' parseXmlEntityEncodingSpec >>> addAttr transferMimeType text_xml_external_parsed_entity >>> processChildren ( removeEncodingSpec >>> changeText normalizeNL -- newline normalization must be done here ) -- the following calls of the parsers don't do this >>> setBaseURIFromDoc >>> traceMsg 2 "getXmlEntityContents done" getXmlContents' :: IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree getXmlContents' parseEncodingSpec = ( getContentsFromString -- no decoding done for string: protocol `orElse` ( getContentsFromDoc >>> choiceA [ isXmlHtmlDoc :-> ( parseEncodingSpec >>> filterErrorMsg >>> decodeDocument ) , isTextDoc :-> decodeDocument , this :-> this ] >>> perform ( getAttrValue transferURI >>> traceValue 1 (("getXmlContents: content read and decoded for " ++) . show) ) >>> traceDoc "getXmlContents'" ) ) `when` isRoot isMimeDoc :: (String -> Bool) -> IOStateArrow s XmlTree XmlTree isMimeDoc isMT = fromLA $ ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> null t || isMT t) ) `guards` this isTextDoc, isXmlHtmlDoc :: IOStateArrow s XmlTree XmlTree isTextDoc = isMimeDoc isTextMimeType isXmlHtmlDoc = isMimeDoc (\ mt -> isHtmlMimeType mt || isXmlMimeType mt) -- ------------------------------------------------------------ getEncoding :: IOStateArrow s XmlTree String getEncoding = catA [ xshow getChildren -- 1. guess: guess encoding by looking at the first few bytes >>> arr guessEncoding , getAttrValue transferEncoding -- 2. guess: take the transfer encoding , getAttrValue a_encoding -- 3. guess: take encoding parameter in root node , getSysVar theInputEncoding -- 4. guess: take encoding parameter in global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses getTextEncoding :: IOStateArrow s XmlTree String getTextEncoding = catA [ getAttrValue transferEncoding -- 1. guess: take the transfer encoding , getAttrValue a_encoding -- 2. guess: take encoding parameter in root node , getSysVar theInputEncoding -- 3. guess: take encoding parameter in global state , constA isoLatin1 -- default : no encoding ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses decodeDocument :: IOStateArrow s XmlTree XmlTree decodeDocument = choiceA [ ( isRoot >>> isXmlHtmlDoc ) :-> ( decodeX $< getSysVar theExpat) , ( isRoot >>> isTextDoc ) :-> ( decodeArr $< getTextEncoding ) , this :-> this ] where decodeX :: Bool -> IOStateArrow s XmlTree XmlTree decodeX False = decodeArr $< getEncoding decodeX True = noDecode $< getEncoding -- parse with expat noDecode enc = traceMsg 2 ("no decoding (done by expat): encoding is " ++ show enc) >>> addAttr transferEncoding enc decodeArr :: String -> IOStateArrow s XmlTree XmlTree decodeArr enc = maybe notFound found . getDecodingFct $ enc where found df = traceMsg 2 ("decodeDocument: encoding is " ++ show enc) >>> ( decodeText df $< getSysVar theEncodingErrors ) >>> addAttr transferEncoding enc notFound = issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "decoding document" {- just for performance test decodeText _ _ = this -} decodeText df withEncErrors = processChildren ( getText -- get the document content -- the following 3 lines -- don't seem to raise the space problem in decodeText -- space is allocated in blobToString and in parsec >>> arr df -- decode the text, result is (string, [errMsg]) >>> ( ( fst ^>> mkText ) -- take decoded string and build text node <+> ( if withEncErrors then ( arrL snd -- take the error messages >>> arr ((enc ++) . (" encoding error" ++)) -- prefix with enc error >>> applyA (arr issueErr) -- build issueErr arrow and apply >>> none -- neccessary for type match with <+> ) else none ) ) ) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/XmlState.hs0000644000000000000000000000675412036750467016707 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the interface for the basic state maipulation functions -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState ( -- * Data Types XIOState , XIOSysState , IOStateArrow , IOSArrow , SysConfig , SysConfigList , -- * User State Manipulation getUserState , setUserState , changeUserState , withExtendedUserState , withOtherUserState , withoutUserState , -- * Run IO State arrows runX , -- * Global System State Configuration and Access configSysVars , setSysAttr , unsetSysAttr , getSysAttr , getAllSysAttrs , setSysAttrString , setSysAttrInt , getSysAttrInt , getConfigAttr , -- * Error Handling clearErrStatus , setErrStatus , getErrStatus , setErrMsgStatus , setErrorMsgHandler , errorMsgStderr , errorMsgCollect , errorMsgStderrAndCollect , errorMsgIgnore , getErrorMessages , filterErrorMsg , issueWarn , issueErr , issueFatal , issueExc , setDocumentStatus , setDocumentStatusFromSystemState , documentStatusOk , -- * Tracing setTraceLevel , getTraceLevel , withTraceLevel , setTraceCmd , getTraceCmd , trace , traceMsg , traceValue , traceString , traceSource , traceTree , traceDoc , -- * Document Base setBaseURI , getBaseURI , changeBaseURI , setDefaultBaseURI , getDefaultBaseURI , runInLocalURIContext , -- * URI Manipulation expandURIString , expandURI , mkAbsURI , getFragmentFromURI , getPathFromURI , getPortFromURI , getQueryFromURI , getRegNameFromURI , getSchemeFromURI , getUserInfoFromURI , -- * Mime Type Handling getMimeTypeTable , setMimeTypeTable , setMimeTypeTableFromFile , -- * System Configuration and Options yes , no , withAcceptedMimeTypes , withAddDefaultDTD , withSysAttr , withCanonicalize , withCompression , withCheckNamespaces , withDefaultBaseURI , withStrictDeserialize , withEncodingErrors , withErrors , withFileMimeType , withIgnoreNoneXmlContents , withIndent , withInputEncoding , withInputOption , withInputOptions , withMimeTypeFile , withMimeTypeHandler , withNoEmptyElemFor , withXmlPi , withOutputEncoding , withOutputXML , withOutputHTML , withOutputXHTML , withOutputPLAIN , withParseByMimeType , withParseHTML , withPreserveComment , withProxy , withRedirect , withRemoveWS , withShowHaskell , withShowTree , withStrictInput , withSubstDTDEntities , withSubstHTMLEntities , withTextMode , withTrace , withValidate , withWarnings ) where import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.MimeTypeTable import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow import Text.XML.HXT.Arrow.XmlState.SystemConfig import Text.XML.HXT.Arrow.XmlState.TraceHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.URIHandling -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/ReadDocument.hs0000644000000000000000000004675712036750467017527 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ReadDocument Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Compound arrows for reading an XML\/HTML document or an XML\/HTML string -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ReadDocument ( readDocument , readFromDocument , readString , readFromString , hread , xread ) where import Control.Arrow.ListArrows import Data.Maybe ( fromMaybe ) import qualified Data.Map as M import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes , canonicalizeForXPath , rememberDTDAttrl , removeDocWhiteSpace ) import Text.XML.HXT.Arrow.ParserInterface import Text.XML.HXT.Arrow.ProcessDocument ( getDocumentContents , parseXmlDocument , parseXmlDocumentWithExpat , parseHtmlDocument , propagateAndValidateNamespaces , andValidateNamespaces ) import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- {- | the main document input filter this filter can be configured by a list of configuration options, a value of type 'Text.XML.HXT.XmlState.TypeDefs.SysConfig' for all available options see module 'Text.XML.HXT.Arrow.XmlState.SystemConfig' - @withValidate yes\/no@ : switch on\/off DTD validation. Only for XML parsed documents, not for HTML parsing. - @withSubstDTDEntities yes\/no@ : switch on\/off entity substitution for general entities defined in DTD validation. Default is @yes@. Switching this option and the validation off can lead to faster parsing, in that case reading the DTD documents is not longer necessary. Only used with XML parsed documents, not with HTML parsing. - @withSubstHTMLEntities yes\/no@ : switch on\/off entity substitution for general entities defined in HTML validation. Default is @no@. Switching this option on and the validation and substDTDEntities off can lead to faster parsing, in that case reading the DTD documents is not longer necessary, HTML general entities are still substituted. Only used with XML parsed documents, not with HTML parsing. - @withParseHTML yes\/no@ : switch on HTML parsing. - @withParseByMimeType yes\/no@ : select XML\/HTML parser by document mime type. text\/xml and text\/xhtml are parsed as XML, text\/html as HTML. - @withCheckNamespaces yes\/no@ : Switch on\/off namespace propagation and checking - @withInputEncoding \@ : Set default encoding. - @withTagSoup@ : use light weight and lazy parser based on tagsoup lib. This is only available when package hxt-tagsoup is installed and the source contains an @import Text.XML.HXT.TagSoup@. - @withRelaxNG \@ : validate document with Relax NG, the parameter is for the schema URI. This implies using XML parser, no validation against DTD, and canonicalisation. - @withCurl [\...]@ : Use the libCurl binding for HTTP access. This is only available when package hxt-curl is installed and the source contains an @import Text.XML.HXT.Curl@. - @withHTTP [\...]@ : Use the Haskell HTTP package for HTTP access. This is only available when package hxt-http is installed and the source contains an @import Text.XML.HXT.HTTP@. examples: > readDocument [] "test.xml" reads and validates a document \"test.xml\", no namespace propagation, only canonicalization is performed > ... > import Text.XML.HXT.Curl > ... > > readDocument [ withValidate no > , withInputEncoding isoLatin1 > , withParseByMimeType yes > , withCurl [] > ] "http://localhost/test.php" reads document \"test.php\", parses it as HTML or XML depending on the mimetype given from the server, but without validation, default encoding 'isoLatin1'. HTTP access is done via libCurl. > readDocument [ withParseHTML yes > , withInputEncoding isoLatin1 > ] "" reads a HTML document from standard input, no validation is done when parsing HTML, default encoding is 'isoLatin1', > readDocument [ withInputEncoding isoLatin1 > , withValidate no > , withMimeTypeFile "/etc/mime.types" > , withStrictInput yes > ] "test.svg" reads an SVG document from \"test.svg\", sets the mime type by looking in the system mimetype config file, default encoding is 'isoLatin1', > ... > import Text.XML.HXT.Curl > import Text.XML.HXT.TagSoup > ... > > readDocument [ withParseHTML yes > , withTagSoup > , withProxy "www-cache:3128" > , withCurl [] > , withWarnings no > ] "http://www.haskell.org/" reads Haskell homepage with HTML parser, ignoring any warnings (at the time of writing, there were some HTML errors), with http access via libCurl interface and proxy \"www-cache\" at port 3128, parsing is done with tagsoup HTML parser. This requires packages \"hxt-curl\" and \"hxt-tagsoup\" to be installed > readDocument [ withValidate yes > , withCheckNamespaces yes > , withRemoveWS yes > , withTrace 2 > , withHTTP [] > ] "http://www.w3c.org/" read w3c home page (xhtml), validate and check namespaces, remove whitespace between tags, trace activities with level 2. HTTP access is done with Haskell HTTP package > readDocument [ withValidate no > , withSubstDTDEntities no > ... > ] "http://www.w3c.org/" read w3c home page (xhtml), but without accessing the DTD given in that document. Only the predefined XML general entity refs are substituted. > readDocument [ withValidate no > , withSubstDTDEntities no > , withSubstHTMLEntities yes > ... > ] "http://www.w3c.org/" same as above, but with substituion of all general entity refs defined in XHTML. for minimal complete examples see 'Text.XML.HXT.Arrow.WriteDocument.writeDocument' and 'runX', the main starting point for running an XML arrow. -} readDocument :: SysConfigList -> String -> IOStateArrow s b XmlTree readDocument config src = localSysEnv $ readDocument' config src readDocument' :: SysConfigList -> String -> IOStateArrow s b XmlTree readDocument' config src = configSysVars config >>> readD $< getSysVar theWithCache where readD True = constA undefined -- just for generalizing the signature to: IOStateArrow s b XmlTree >>> -- instead of IOStateArrow s XmlTree XmlTree (withoutUserState $< (getSysVar theCacheRead >>^ ($ src))) readD False = readDocument'' src readDocument'' :: String -> IOStateArrow s b XmlTree readDocument'' src = getDocumentContents src >>> ( processDoc $<< ( getMimeType &&& getSysVar (theParseByMimeType .&&&. theParseHTML .&&&. theAcceptedMimeTypes .&&&. theRelaxValidate .&&&. theXmlSchemaValidate ) ) ) >>> traceMsg 1 ("readDocument: " ++ show src ++ " processed") >>> traceSource >>> traceTree where processNoneEmptyDoc p = ifA (fromLA hasEmptyBody) (replaceChildren none) p where hasEmptyBody = hasAttrValue transferStatus (/= "200") -- test on empty response body for not o.k. responses `guards` -- e.g. 3xx status values ( neg getChildren <+> ( getChildren >>> isWhiteSpace ) ) getMimeType = getAttrValue transferMimeType >>^ stringToLower applyMimeTypeHandler mt = withoutUserState (applyMTH $< getSysVar theMimeTypeHandlers) where applyMTH mtTable = fromMaybe none $ fmap (\ f -> processNoneEmptyDoc (traceMimeStart >>> f >>> traceMimeEnd) ) $ M.lookup mt mtTable traceMimeStart = traceMsg 2 $ "readDocument: calling user defined document parser" traceMimeEnd = traceMsg 2 $ "readDocument: user defined document parser finished" processDoc mimeType options = traceMsg 1 (unwords [ "readDocument:", show src , "(mime type:", show mimeType, ") will be processed" ] ) >>> ( applyMimeTypeHandler mimeType -- try user defined document handlers `orElse` processDoc' mimeType options ) processDoc' mimeType ( parseByMimeType , ( parseHtml , ( acceptedMimeTypes , ( validateWithRelax , validateWithXmlSchema )))) = ( if isAcceptedMimeType acceptedMimeTypes mimeType then ( processNoneEmptyDoc ( ( parse $< getSysVar (theValidate .&&&. theSubstDTDEntities .&&&. theSubstHTMLEntities .&&&. theIgnoreNoneXmlContents .&&&. theTagSoup .&&&. theExpat ) ) >>> ( if isXmlOrHtml then ( ( checknamespaces $< getSysVar (theCheckNamespaces .&&&. theTagSoup ) ) >>> rememberDTDAttrl >>> ( canonicalize $< getSysVar (thePreserveComment .&&&. theCanonicalize .&&&. theTagSoup ) ) >>> ( whitespace $< getSysVar (theRemoveWS .&&&. theTagSoup ) ) >>> relaxOrXmlSchema ) else this ) ) ) else ( traceMsg 1 (unwords [ "readDocument:", show src , "mime type:", show mimeType, "not accepted"]) >>> replaceChildren none -- remove contents of not accepted mimetype ) ) where isAcceptedMimeType :: [String] -> String -> Bool isAcceptedMimeType mts mt | null mts || null mt = True | otherwise = foldr (matchMt mt') False $ mts' where mt' = parseMt mt mts' = map parseMt $ mts parseMt = break (== '/') >>> second (drop 1) matchMt (ma,mi) (mas,mis) r = ( (ma == mas || mas == "*") && (mi == mis || mis == "*") ) || r parse ( validate , ( substDTD , ( substHTML , ( removeNoneXml , ( withTagSoup' , withExpat' ))))) | not isXmlOrHtml = if removeNoneXml then replaceChildren none -- don't parse, if mime type is not XML nor HTML else this -- but remove contents when option is set | isHtml || withTagSoup' = configSysVar (setS theLowerCaseNames isHtml) >>> parseHtmlDocument -- parse as HTML or with tagsoup XML | isXml = if withExpat' then parseXmlDocumentWithExpat else parseXmlDocument validate substDTD substHTML validateWithRelax -- parse as XML | otherwise = this -- suppress warning checknamespaces (withNamespaces, withTagSoup') | withNamespaces && withTagSoup' = andValidateNamespaces -- propagation is done in tagsoup | withNamespaces || validateWithRelax || validateWithXmlSchema = propagateAndValidateNamespaces -- RelaxNG and XML Schema require correct namespaces | otherwise = this canonicalize (preserveCmt, (canonicalize', withTagSoup')) | withTagSoup' = this -- tagsoup already removes redundant stuff | validateWithRelax || validateWithXmlSchema = canonicalizeAllNodes -- no comments in schema validation | canonicalize' && preserveCmt = canonicalizeForXPath | canonicalize' = canonicalizeAllNodes | otherwise = this relaxOrXmlSchema | validateWithXmlSchema = withoutUserState $< getSysVar theXmlSchemaValidator | validateWithRelax = withoutUserState $< getSysVar theRelaxValidator | otherwise = this whitespace (removeWS, withTagSoup') | ( removeWS || validateWithXmlSchema -- XML Schema does not like WS ) && not withTagSoup' = removeDocWhiteSpace -- tagsoup already removes whitespace | otherwise = this isHtml = ( not parseByMimeType && parseHtml ) -- force HTML || ( parseByMimeType && isHtmlMimeType mimeType ) isXml = ( not parseByMimeType && not parseHtml ) || ( parseByMimeType && ( isXmlMimeType mimeType || null mimeType ) -- mime type is XML or not known ) isXmlOrHtml = isHtml || isXml -- ------------------------------------------------------------ -- | -- the arrow version of 'readDocument', the arrow input is the source URI readFromDocument :: SysConfigList -> IOStateArrow s String XmlTree readFromDocument config = applyA ( arr $ readDocument config ) -- ------------------------------------------------------------ -- | -- read a document that is stored in a normal Haskell String -- -- the same function as readDocument, but the parameter forms the input. -- All options available for 'readDocument' are applicable for readString, -- except input encoding options. -- -- Encoding: No decoding is done, the String argument is taken as Unicode string -- All decoding must be done before calling readString, even if the -- XML document contains an encoding spec. readString :: SysConfigList -> String -> IOStateArrow s b XmlTree readString config content = readDocument config (stringProtocol ++ content) -- ------------------------------------------------------------ -- | -- the arrow version of 'readString', the arrow input is the source URI readFromString :: SysConfigList -> IOStateArrow s String XmlTree readFromString config = applyA ( arr $ readString config ) -- ------------------------------------------------------------ -- | -- parse a string as HTML content, substitute all HTML entity refs and canonicalize tree. -- (substitute char refs, ...). Errors are ignored. -- -- This arrow delegates all work to the parseHtmlContent parser in module HtmlParser. -- -- This is a simpler version of 'readFromString' without any options, -- but it does not run in the IO monad. hread :: ArrowXml a => a String XmlTree hread = fromLA $ parseHtmlContent -- substHtmlEntityRefs is done in parser >>> -- as well as subst HTML char refs editNTreeA [isError :-> none] -- ignores all errors {- no longer neccesary, text nodes are merged in parser >>> canonicalizeContents -- -} -- ------------------------------------------------------------ -- | -- parse a string as XML content, substitute all predefined XML entity refs and canonicalize tree -- This xread arrow delegates all work to the xread parser function in module XmlParsec xread :: ArrowXml a => a String XmlTree xread = parseXmlContent {- -- the old version, where the parser does not subst char refs and cdata xread = root [] [parseXmlContent] -- substXmlEntityRefs is done in parser >>> canonicalizeContents >>> getChildren -- -} -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/Edit.hs0000644000000000000000000006122512036750467016025 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Edit Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable common edit arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes , canonicalizeForXPath , canonicalizeContents , collapseAllXText , collapseXText , xshowEscapeXml , escapeXmlRefs , escapeHtmlRefs , haskellRepOfXmlDoc , treeRepOfXmlDoc , addHeadlineToXmlDoc , indentDoc , numberLinesInXmlDoc , preventEmptyElements , removeComment , removeAllComment , removeWhiteSpace , removeAllWhiteSpace , removeDocWhiteSpace , transfCdata , transfAllCdata , transfCharRef , transfAllCharRef , substAllXHTMLEntityRefs , substXHTMLEntityRef , rememberDTDAttrl , addDefaultDTDecl , hasXmlPi , addXmlPi , addXmlPiEncoding , addDoctypeDecl , addXHtmlDoctypeStrict , addXHtmlDoctypeTransitional , addXHtmlDoctypeFrameset ) where import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Control.Arrow.NTreeEdit import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar ) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import qualified Text.XML.HXT.DOM.ShowXml as XS import Text.XML.HXT.DOM.FormatXmlTree ( formatXmlTree ) import Text.XML.HXT.Parser.HtmlParsec ( emptyHtmlTags ) import Text.XML.HXT.Parser.XmlEntities ( xmlEntities ) import Text.XML.HXT.Parser.XhtmlEntities ( xhtmlEntities ) import Data.List ( isPrefixOf ) import qualified Data.Map as M import Data.Maybe -- ------------------------------------------------------------ -- | -- Applies some "Canonical XML" rules to a document tree. -- -- The rules differ slightly for canonical XML and XPath in handling of comments -- -- Note: This is not the whole canonicalization as it is specified by the W3C -- Recommendation. Adding attribute defaults or sorting attributes in lexicographic -- order is done by the @transform@ function of module @Text.XML.HXT.Validator.Validation@. -- Replacing entities or line feed normalization is done by the parser. -- -- -- Not implemented yet: -- -- - Whitespace within start and end tags is normalized -- -- - Special characters in attribute values and character content are replaced by character references -- -- see 'canonicalizeAllNodes' and 'canonicalizeForXPath' canonicalizeTree' :: LA XmlTree XmlTree -> LA XmlTree XmlTree canonicalizeTree' toBeRemoved = processChildren ( (none `when` (isText <+> isXmlPi)) -- remove XML PI and all text around XML root element >>> (deep isPi `when` isDTD) -- remove DTD parts, except PIs whithin DTD ) >>> canonicalizeNodes toBeRemoved canonicalizeNodes :: LA XmlTree XmlTree -> LA XmlTree XmlTree canonicalizeNodes toBeRemoved = editNTreeA $ [ toBeRemoved :-> none , ( isElem >>> getAttrl >>> getChildren >>> isCharRef ) -- canonicalize attribute list :-> ( processAttrl ( processChildren transfCharRef >>> collapseXText' -- combine text in attribute values ) >>> ( collapseXText' -- and combine text in content `when` (getChildren >>. has2XText) ) ) , ( isElem >>> (getChildren >>. has2XText) ) :-> collapseXText' -- combine text in content , isCharRef :-> ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText ) , isCdata :-> ( getCdata >>> mkText ) ] -- | -- Applies some "Canonical XML" rules to a document tree. -- -- The rule differ slightly for canonical XML and XPath in handling of comments -- -- Note: This is not the whole canonicalization as it is specified by the W3C -- Recommendation. Adding attribute defaults or sorting attributes in lexicographic -- order is done by the @transform@ function of module @Text.XML.HXT.Validator.Validation@. -- Replacing entities or line feed normalization is done by the parser. -- -- Rules: remove DTD parts, processing instructions, comments and substitute char refs in attribute -- values and text -- -- Not implemented yet: -- -- - Whitespace within start and end tags is normalized -- -- - Special characters in attribute values and character content are replaced by character references canonicalizeAllNodes :: ArrowList a => a XmlTree XmlTree canonicalizeAllNodes = fromLA $ canonicalizeTree' isCmt -- remove comment {-# INLINE canonicalizeAllNodes #-} -- | -- Canonicalize a tree for XPath -- Like 'canonicalizeAllNodes' but comment nodes are not removed -- -- see 'canonicalizeAllNodes' canonicalizeForXPath :: ArrowList a => a XmlTree XmlTree canonicalizeForXPath = fromLA $ canonicalizeTree' none -- comment remains there {-# INLINE canonicalizeForXPath #-} -- | -- Canonicalize the contents of a document -- -- substitutes all char refs in text and attribute values, -- removes CDATA section and combines all sequences of resulting text -- nodes into a single text node -- -- see 'canonicalizeAllNodes' canonicalizeContents :: ArrowList a => a XmlTree XmlTree canonicalizeContents = fromLA $ canonicalizeNodes none {-# INLINE canonicalizeContents #-} -- ------------------------------------------------------------ has2XText :: XmlTrees -> XmlTrees has2XText ts0@(t1 : ts1@(t2 : ts2)) | XN.isText t1 = if XN.isText t2 then ts0 else has2XText ts2 | otherwise = has2XText ts1 has2XText _ = [] collapseXText' :: LA XmlTree XmlTree collapseXText' = replaceChildren ( listA getChildren >>> arrL (foldr mergeText' []) ) where mergeText' :: XmlTree -> XmlTrees -> XmlTrees mergeText' t1 (t2 : ts2) | XN.isText t1 && XN.isText t2 = let s1 = fromJust . XN.getText $ t1 s2 = fromJust . XN.getText $ t2 t = XN.mkText (s1 ++ s2) in t : ts2 mergeText' t1 ts = t1 : ts -- | -- Collects sequences of text nodes in the list of children of a node into one single text node. -- This is useful, e.g. after char and entity reference substitution collapseXText :: ArrowList a => a XmlTree XmlTree collapseXText = fromLA collapseXText' -- | -- Applies collapseXText recursively. -- -- -- see also : 'collapseXText' collapseAllXText :: ArrowList a => a XmlTree XmlTree collapseAllXText = fromLA $ processBottomUp collapseXText' -- ------------------------------------------------------------ -- | apply an arrow to the input and convert the resulting XML trees into an XML escaped string -- -- This is a save variant for converting a tree into an XML string representation -- that is parsable with 'Text.XML.HXT.Arrow.ReadDocument'. -- It is implemented with 'Text.XML.HXT.Arrow.XmlArrow.xshow', -- but xshow does no XML escaping. The XML escaping is done with -- 'Text.XML.HXT.Arrow.Edit.escapeXmlDoc' before xshow is applied. -- -- So the following law holds -- -- > xshowEscapeXml f >>> xread == f xshowEscapeXml :: ArrowXml a => a n XmlTree -> a n String xshowEscapeXml f = f >. (uncurry XS.xshow'' escapeXmlRefs) -- ------------------------------------------------------------ -- | -- escape XmlText, -- transform all special XML chars into char- or entity- refs type EntityRefTable = M.Map Int String xmlEntityRefTable , xhtmlEntityRefTable :: EntityRefTable xmlEntityRefTable = buildEntityRefTable $ xmlEntities xhtmlEntityRefTable = buildEntityRefTable $ xhtmlEntities buildEntityRefTable :: [(String, Int)] -> EntityRefTable buildEntityRefTable = M.fromList . map (\ (x,y) -> (y,x) ) type EntitySubstTable = M.Map String String xhtmlEntitySubstTable :: EntitySubstTable xhtmlEntitySubstTable = M.fromList . map (second $ (:[]) . toEnum) $ xhtmlEntities -- ------------------------------------------------------------ substXHTMLEntityRef :: LA XmlTree XmlTree substXHTMLEntityRef = ( getEntityRef >>> arrL subst >>> mkText ) `orElse` this where subst name = maybe [] (:[]) $ M.lookup name xhtmlEntitySubstTable substAllXHTMLEntityRefs :: ArrowXml a => a XmlTree XmlTree substAllXHTMLEntityRefs = fromLA $ processBottomUp substXHTMLEntityRef -- ------------------------------------------------------------ escapeXmlRefs :: (Char -> String -> String, Char -> String -> String) escapeXmlRefs = (cquote, aquote) where cquote c | c `elem` "<&" = ('&' :) . ((lookupRef c xmlEntityRefTable) ++) . (';' :) | otherwise = (c :) aquote c | c `elem` "<>\"\'&\n\r\t" = ('&' :) . ((lookupRef c xmlEntityRefTable) ++) . (';' :) | otherwise = (c :) escapeHtmlRefs :: (Char -> String -> String, Char -> String -> String) escapeHtmlRefs = (cquote, aquote) where cquote c | isHtmlTextEsc c = ('&' :) . ((lookupRef c xhtmlEntityRefTable) ++) . (';' :) | otherwise = (c :) aquote c | isHtmlAttrEsc c = ('&' :) . ((lookupRef c xhtmlEntityRefTable) ++) . (';' :) | otherwise = (c :) isHtmlTextEsc c = c >= toEnum(128) || ( c `elem` "<&" ) isHtmlAttrEsc c = c >= toEnum(128) || ( c `elem` "<>\"\'&\n\r\t" ) lookupRef :: Char -> EntityRefTable -> String lookupRef c = fromMaybe ('#' : show (fromEnum c)) . M.lookup (fromEnum c) {-# INLINE lookupRef #-} -- ------------------------------------------------------------ preventEmptyElements :: ArrowList a => [String] -> Bool -> a XmlTree XmlTree preventEmptyElements ns isHtml = fromLA $ editNTreeA [ ( isElem >>> isNoneEmpty >>> neg getChildren ) :-> replaceChildren (txt "") ] where isNoneEmpty | not (null ns) = hasNameWith (localPart >>> (`elem` ns)) | isHtml = hasNameWith (localPart >>> (`notElem` emptyHtmlTags)) | otherwise = this -- ------------------------------------------------------------ -- | -- convert a document into a Haskell representation (with show). -- -- Useful for debugging and trace output. -- see also : 'treeRepOfXmlDoc', 'numberLinesInXmlDoc' haskellRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree haskellRepOfXmlDoc = fromLA $ root [getAttrl] [show ^>> mkText] -- | -- convert a document into a text and add line numbers to the text representation. -- -- Result is a root node with a single text node as child. -- Useful for debugging and trace output. -- see also : 'haskellRepOfXmlDoc', 'treeRepOfXmlDoc' numberLinesInXmlDoc :: ArrowList a => a XmlTree XmlTree numberLinesInXmlDoc = fromLA $ processChildren (changeText numberLines) where numberLines :: String -> String numberLines str = concat $ zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str) where lineNr :: Int -> String lineNr n = (reverse (take 6 (reverse (show n) ++ replicate 6 ' '))) ++ " " -- | -- convert a document into a text representation in tree form. -- -- Useful for debugging and trace output. -- see also : 'haskellRepOfXmlDoc', 'numberLinesInXmlDoc' treeRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree treeRepOfXmlDoc = fromLA $ root [getAttrl] [formatXmlTree ^>> mkText] addHeadlineToXmlDoc :: ArrowXml a => a XmlTree XmlTree addHeadlineToXmlDoc = fromLA $ ( addTitle $< (getAttrValue a_source >>^ formatTitle) ) where addTitle str = replaceChildren ( txt str <+> getChildren <+> txt "\n" ) formatTitle str = "\n" ++ headline ++ "\n" ++ underline ++ "\n\n" where headline = "content of: " ++ str underline = map (const '=') headline -- ------------------------------------------------------------ -- | -- remove a Comment node removeComment :: ArrowXml a => a XmlTree XmlTree removeComment = none `when` isCmt -- | -- remove all comments in a tree recursively removeAllComment :: ArrowXml a => a XmlTree XmlTree removeAllComment = fromLA $ editNTreeA [isCmt :-> none] -- ------------------------------------------------------------ -- | -- simple filter for removing whitespace. -- -- no check on sigificant whitespace, e.g. in HTML \-elements, is done. -- -- -- see also : 'removeAllWhiteSpace', 'removeDocWhiteSpace' removeWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeWhiteSpace = fromLA $ none `when` isWhiteSpace -- | -- simple recursive filter for removing all whitespace. -- -- removes all text nodes in a tree that consist only of whitespace. -- -- -- see also : 'removeWhiteSpace', 'removeDocWhiteSpace' removeAllWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeAllWhiteSpace = fromLA $ editNTreeA [isWhiteSpace :-> none] -- fromLA $ processBottomUp removeWhiteSpace' -- less efficient -- ------------------------------------------------------------ -- | -- filter for removing all not significant whitespace. -- -- the tree traversed for removing whitespace between elements, -- that was inserted for indentation and readability. -- whitespace is only removed at places, where it's not significat -- preserving whitespace may be controlled in a document tree -- by a tag attribute @xml:space@ -- -- allowed values for this attribute are @default | preserve@ -- -- input is root node of the document to be cleaned up, -- output the semantically equivalent simplified tree -- -- -- see also : 'indentDoc', 'removeAllWhiteSpace' removeDocWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeDocWhiteSpace = fromLA $ removeRootWhiteSpace removeRootWhiteSpace :: LA XmlTree XmlTree removeRootWhiteSpace = processChildren processRootElement `when` isRoot where processRootElement :: LA XmlTree XmlTree processRootElement = removeWhiteSpace >>> processChild where processChild = choiceA [ isDTD :-> removeAllWhiteSpace -- whitespace in DTD is redundant , this :-> replaceChildren ( getChildren >>. indentTrees insertNothing False 1 ) ] -- ------------------------------------------------------------ -- | -- filter for indenting a document tree for pretty printing. -- -- the tree is traversed for inserting whitespace for tag indentation. -- -- whitespace is only inserted or changed at places, where it isn't significant, -- is's not inserted between tags and text containing non whitespace chars. -- -- whitespace is only inserted or changed at places, where it's not significant. -- preserving whitespace may be controlled in a document tree -- by a tag attribute @xml:space@ -- -- allowed values for this attribute are @default | preserve@. -- -- input is a complete document tree or a document fragment -- result is the semantically equivalent formatted tree. -- -- -- see also : 'removeDocWhiteSpace' indentDoc :: ArrowXml a => a XmlTree XmlTree indentDoc = fromLA $ ( ( isRoot `guards` indentRoot ) `orElse` (root [] [this] >>> indentRoot >>> getChildren) ) -- ------------------------------------------------------------ indentRoot :: LA XmlTree XmlTree indentRoot = processChildren indentRootChildren where indentRootChildren = removeText >>> indentChild >>> insertNL where removeText = none `when` isText insertNL = this <+> txt "\n" indentChild = ( replaceChildren ( getChildren >>. indentTrees (insertIndentation 2) False 1 ) `whenNot` isDTD ) -- ------------------------------------------------------------ -- -- copied from EditFilter and rewritten for arrows -- to remove dependency to the filter module indentTrees :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees indentTrees _ _ _ [] = [] indentTrees indentFilter preserveSpace level ts = runLAs lsf ls ++ indentRest rs where runLAs f l = runLA (constL l >>> f) undefined (ls, rs) = break XN.isElem ts isSignificant :: Bool isSignificant = preserveSpace || (not . null . runLAs isSignificantPart) ls isSignificantPart :: LA XmlTree XmlTree isSignificantPart = catA [ isText `guards` neg isWhiteSpace , isCdata , isCharRef , isEntityRef ] lsf :: LA XmlTree XmlTree lsf | isSignificant = this | otherwise = (none `when` isWhiteSpace) >>> (indentFilter level <+> this) indentRest :: XmlTrees -> XmlTrees indentRest [] | isSignificant = [] | otherwise = runLA (indentFilter (level - 1)) undefined indentRest (t':ts') = runLA ( ( indentElem >>> lsf ) `when` isElem ) t' ++ ( if null ts' then indentRest else indentTrees indentFilter preserveSpace level ) ts' where indentElem = replaceChildren ( getChildren >>. indentChildren ) xmlSpaceAttrValue :: String xmlSpaceAttrValue = concat . runLA (getAttrValue "xml:space") $ t' preserveSpace' :: Bool preserveSpace' = ( fromMaybe preserveSpace . lookup xmlSpaceAttrValue ) [ ("preserve", True) , ("default", False) ] indentChildren :: XmlTrees -> XmlTrees indentChildren cs' | all (maybe False (all isXmlSpaceChar) . XN.getText) cs' = [] | otherwise = indentTrees indentFilter preserveSpace' (level + 1) cs' -- filter for indenting elements insertIndentation :: Int -> Int -> LA a XmlTree insertIndentation indentWidth level = txt ('\n' : replicate (level * indentWidth) ' ') -- filter for removing all whitespace insertNothing :: Int -> LA a XmlTree insertNothing _ = none -- ------------------------------------------------------------ -- | -- converts a CDATA section into normal text nodes transfCdata :: ArrowXml a => a XmlTree XmlTree transfCdata = fromLA $ (getCdata >>> mkText) `when` isCdata -- | -- converts CDATA sections in whole document tree into normal text nodes transfAllCdata :: ArrowXml a => a XmlTree XmlTree transfAllCdata = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)] -- | -- converts a character reference to normal text transfCharRef :: ArrowXml a => a XmlTree XmlTree transfCharRef = fromLA $ ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText ) `when` isCharRef -- | -- recursively converts all character references to normal text transfAllCharRef :: ArrowXml a => a XmlTree XmlTree transfAllCharRef = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)] -- ------------------------------------------------------------ rememberDTDAttrl :: ArrowList a => a XmlTree XmlTree rememberDTDAttrl = fromLA $ ( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) ) `orElse` this ) where addDTDAttrl al = seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al addDefaultDTDecl :: ArrowList a => a XmlTree XmlTree addDefaultDTDecl = fromLA $ ( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) ) where hasDtdPrefix = isA (fst >>> (dtdPrefix `isPrefixOf`)) >>> arr (first (drop (length dtdPrefix))) addDTD [] = this addDTD al = replaceChildren ( mkDTDDoctype al none <+> txt "\n" <+> ( getChildren >>> (none `when` isDTDDoctype) ) -- remove old DTD decl ) -- ------------------------------------------------------------ hasXmlPi :: ArrowXml a => a XmlTree XmlTree hasXmlPi = fromLA ( getChildren >>> isPi >>> hasName t_xml ) -- | add an \ processing instruction -- if it's not already there addXmlPi :: ArrowXml a => a XmlTree XmlTree addXmlPi = fromLA ( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none >>> addAttr a_version "1.0" ) <+> txt "\n" ) `whenNot` hasXmlPi ) -- | add an encoding spec to the \ processing instruction addXmlPiEncoding :: ArrowXml a => String -> a XmlTree XmlTree addXmlPiEncoding enc = fromLA $ processChildren ( addAttr a_encoding enc `when` ( isPi >>> hasName t_xml ) ) -- | add an XHTML strict doctype declaration to a document addXHtmlDoctypeStrict , addXHtmlDoctypeTransitional , addXHtmlDoctypeFrameset :: ArrowXml a => a XmlTree XmlTree -- | add an XHTML strict doctype declaration to a document addXHtmlDoctypeStrict = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" -- | add an XHTML transitional doctype declaration to a document addXHtmlDoctypeTransitional = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" -- | add an XHTML frameset doctype declaration to a document addXHtmlDoctypeFrameset = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" -- | add a doctype declaration to a document -- -- The arguments are the root element name, the PUBLIC id and the SYSTEM id addDoctypeDecl :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree addDoctypeDecl rootElem public system = fromLA $ replaceChildren ( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) ) . ( if null system then id else ( (k_system, system) : ) ) $ [ (a_name, rootElem) ] ) none <+> txt "\n" <+> getChildren ) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/WriteDocument.hs0000644000000000000000000002273212036750467017731 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.WriteDocument Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Compound arrow for writing XML documents -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.WriteDocument ( writeDocument , writeDocument' , writeDocumentToString , prepareContents ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow ( initialSysState ) import Text.XML.HXT.Arrow.Edit ( haskellRepOfXmlDoc , indentDoc , addDefaultDTDecl , preventEmptyElements , removeDocWhiteSpace , treeRepOfXmlDoc ) import Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , encodeDocument , encodeDocument' ) -- ------------------------------------------------------------ -- {- | the main filter for writing documents this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument' usage: @ writeDocument optionList destination @ if @ destination @ is the empty string or \"-\", stdout is used as output device for available options see 'Text.XML.HXT.Arrow.XmlState.SystemConfig' - @withOutputXML@ : (default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary add XML processing instruction and encode document with respect to output encoding, - @withOutputHTML@ : issue HTML: translate all special XML chars and all HTML chars with a corresponding entity reference into entity references. Do not generate empty elements, e.g. @@. The short form introduces trouble in various browsers. - @withOutputXHTML@ : same as @withOutputHTML@, but all none ASCII chars are substituted by char references. - @withOutputPLAIN@ : Do not substitute any chars. This is useful when generating something else than XML/HTML, e.g. Haskell source code. - @withXmlPi yes/no@ : Add a @@ processing instruction to the beginning of the document. Default is yes. - @withAddDefaultDTD@ : if the document to be written was build by reading another document containing a Document Type Declaration, this DTD is inserted into the output document (default: no insert) - @withShowTree yes/no@ : show DOM tree representation of document (for debugging) - @withShowHaskell yes/no@ : show Haskell representaion of document (for debugging) a minimal main program for copying a document has the following structure: > module Main > where > > import Text.XML.HXT.Core > > main :: IO () > main > = do > runX ( readDocument [] "hello.xml" > >>> > writeDocument [] "bye.xml" > ) > return () an example for copying a document from the web to standard output with global trace level 1, input trace level 2, output encoding isoLatin1, and evaluation of error code is: > module Main > where > > import Text.XML.HXT.Core > import Text.XML.HXT.Curl > -- or > -- import Text.XML.HXT.HTTP > import System.Exit > > main :: IO () > main > = do > [rc] <- runX > ( configSysVars [ withTrace 1 -- set the defaults for all read-, > , withCurl [] -- write- and other operations > -- or withHTTP [] > ] > >>> > readDocument [ withTrace 2 -- use these additional > , withParseHTML yes -- options only for this read > ] > "http://www.haskell.org/" > >>> > writeDocument [ withOutputEncoding isoLatin1 > ] > "" -- output to stdout > >>> > getErrStatus > ) > exitWith ( if rc >= c_err > then ExitFailure 1 > else ExitSuccess > ) -} writeDocument :: SysConfigList -> String -> IOStateArrow s XmlTree XmlTree writeDocument config dst = localSysEnv $ configSysVars config >>> perform ( (flip writeDocument') dst $< getSysVar theTextMode ) writeDocument' :: Bool -> String -> IOStateArrow s XmlTree XmlTree writeDocument' textMode dst = ( traceMsg 1 ("writeDocument: destination is " ++ show dst) >>> ( (flip prepareContents) encodeDocument $< getSysVar idS ) >>> traceDoc "document after encoding" >>> putXmlDocument textMode dst >>> traceMsg 1 "writeDocument: finished" ) `when` documentStatusOk -- ------------------------------------------------------------ -- | -- Convert a document into a string. Formating is done the same way -- and with the same options as in 'writeDocument'. Default output encoding is -- no encoding, that means the result is a normal unicode encode haskell string. -- The default may be overwritten with the 'Text.XML.HXT.Arrow.XmlState.SystemConfig.withOutputEncoding' option. -- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option. -- -- This arrow fails, when the encoding scheme is not supported. -- The arrow is pure, it does not run in the IO monad. -- The XML PI is suppressed, if not explicitly turned on with an -- option @ (a_no_xml_pi, v_0) @ writeDocumentToString :: ArrowXml a => SysConfigList -> a XmlTree String writeDocumentToString config = prepareContents ( foldr (>>>) id (withOutputEncoding unicodeString : withXmlPi no : config ) $ initialSysState ) encodeDocument' >>> xshow getChildren -- ------------------------------------------------------------ -- | -- indent and format output prepareContents :: ArrowXml a => XIOSysState -> (Bool -> Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree prepareContents config encodeDoc = indent >>> addDtd >>> format where indent' = getS theIndent config removeWS' = getS theRemoveWS config showTree' = getS theShowTree config showHaskell' = getS theShowHaskell config outHtml' = getS theOutputFmt config == HTMLoutput outXhtml' = getS theOutputFmt config == XHTMLoutput outXml' = getS theOutputFmt config == XMLoutput noPi' = not $ getS theXmlPi config noEEsFor' = getS theNoEmptyElemFor config addDDTD' = getS theAddDefaultDTD config outEnc' = getS theOutputEncoding config addDtd | addDDTD' = addDefaultDTDecl | otherwise = this indent | indent' = indentDoc -- document indentation | removeWS' = removeDocWhiteSpace -- remove all whitespace between tags | otherwise = this format | showTree' = treeRepOfXmlDoc | showHaskell' = haskellRepOfXmlDoc | outHtml' = preventEmptyElements noEEsFor' True >>> encodeDoc -- convert doc into text with respect to output encoding with ASCII as default False noPi' ( if null outEnc' then usAscii else outEnc' ) | outXhtml' = preventEmptyElements noEEsFor' True >>> encodeDoc -- convert doc into text with respect to output encoding True noPi' outEnc' | outXml' = ( if null noEEsFor' then this else preventEmptyElements noEEsFor' False ) >>> encodeDoc -- convert doc into text with respect to output encoding True noPi' outEnc' | otherwise = this -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/ParserInterface.hs0000644000000000000000000000514712036750467020216 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ParserInterface Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable interface to the HXT XML and DTD parsers -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ParserInterface ( module Text.XML.HXT.Arrow.ParserInterface ) where import Control.Arrow.ArrowList import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import qualified Text.XML.HXT.Parser.HtmlParsec as HP import qualified Text.XML.HXT.Parser.XmlParsec as XP import qualified Text.XML.HXT.Parser.XmlDTDParser as DP -- ------------------------------------------------------------ parseXmlDoc :: ArrowXml a => a (String, String) XmlTree parseXmlDoc = arr2L XP.parseXmlDocument parseXmlDTDPart :: ArrowXml a => a (String, XmlTree) XmlTree parseXmlDTDPart = arr2L XP.parseXmlDTDPart parseXmlContent :: ArrowXml a => a String XmlTree parseXmlContent = arrL XP.xread parseXmlEntityEncodingSpec , parseXmlDocEncodingSpec , removeEncodingSpec :: ArrowXml a => a XmlTree XmlTree parseXmlDocEncodingSpec = arrL XP.parseXmlDocEncodingSpec parseXmlEntityEncodingSpec = arrL XP.parseXmlEntityEncodingSpec removeEncodingSpec = arrL XP.removeEncodingSpec parseXmlDTDdeclPart :: ArrowXml a => a XmlTree XmlTree parseXmlDTDdeclPart = arrL DP.parseXmlDTDdeclPart parseXmlDTDdecl :: ArrowXml a => a XmlTree XmlTree parseXmlDTDdecl = arrL DP.parseXmlDTDdecl parseXmlDTDEntityValue :: ArrowXml a => a XmlTree XmlTree parseXmlDTDEntityValue = arrL DP.parseXmlDTDEntityValue parseXmlEntityValueAsContent :: ArrowXml a => String -> a XmlTree XmlTree parseXmlEntityValueAsContent = arrL . XP.parseXmlEntityValueAsContent parseXmlEntityValueAsAttrValue :: ArrowXml a => String -> a XmlTree XmlTree parseXmlEntityValueAsAttrValue = arrL . XP.parseXmlEntityValueAsAttrValue -- ------------------------------------------------------------ parseHtmlDoc :: ArrowList a => a (String, String) XmlTree parseHtmlDoc = arr2L HP.parseHtmlDocument parseHtmlContent :: ArrowList a => a String XmlTree parseHtmlContent = arrL HP.parseHtmlContent -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/DocumentOutput.hs0000644000000000000000000002374112036750467020140 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentOutput Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document output -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , putXmlTree , putXmlSource , encodeDocument , encodeDocument' ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import Control.Arrow.ListArrow import Control.Arrow.ArrowExc import qualified Data.ByteString.Lazy as BS import Data.Maybe import Data.String.Unicode ( getOutputEncodingFct' ) import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XS import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.Edit ( addHeadlineToXmlDoc , addXmlPi , addXmlPiEncoding , indentDoc , numberLinesInXmlDoc , treeRepOfXmlDoc , escapeHtmlRefs , escapeXmlRefs ) import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import System.IO ( Handle , IOMode(..) , openFile , openBinaryFile , hSetBinaryMode , hPutStrLn , hClose , stdout ) -- ------------------------------------------------------------ -- -- | Write the contents of a document tree into an output stream (file or stdout). -- -- If textMode is set, writing is done with Haskell string output, else (default) -- writing is done with lazy ByteString output putXmlDocument :: Bool -> String -> IOStateArrow s XmlTree XmlTree putXmlDocument textMode dst = perform putDoc where putDoc = ( if textMode then ( xshow getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> hPutStrLn h s))) ) else ( xshowBlob getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> do BS.hPutStr h s BS.hPutStr h (stringToBlob "\n") ) ) ) ) ) >>> ( ( traceMsg 1 ("io error, document not written to " ++ outFile) >>> arr show >>> mkError c_fatal >>> filterErrorMsg ) ||| ( traceMsg 2 ("document written to " ++ outFile ++ ", textMode = " ++ show textMode) >>> none ) ) where isStdout = null dst || dst == "-" outFile = if isStdout then "stdout" else show dst hPutDocument :: (Handle -> IO ()) -> IO () hPutDocument action | isStdout = do hSetBinaryMode stdout (not textMode) action stdout hSetBinaryMode stdout False | otherwise = do handle <- ( if textMode then openFile else openBinaryFile ) dst WriteMode action handle hClose handle -- | -- write the tree representation of a document to a file putXmlTree :: String -> IOStateArrow s XmlTree XmlTree putXmlTree dst = perform ( treeRepOfXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- | -- write a document with indentaion and line numers putXmlSource :: String -> IOStateArrow s XmlTree XmlTree putXmlSource dst = perform ( (this ) `whenNot` isRoot >>> indentDoc >>> numberLinesInXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- ------------------------------------------------------------ getEncodingParam :: IOStateArrow s XmlTree String getEncodingParam = catA [ getSysVar theOutputEncoding -- 4. guess: take output encoding parameter from global state , getSysVar theInputEncoding -- 5. guess: take encoding parameter from global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) getOutputEncoding :: String -> IOStateArrow s XmlTree String getOutputEncoding defaultEnc = getEC $< getEncodingParam where getEC enc' = fromLA $ getOutputEncoding' defaultEnc enc' encodeDocument :: Bool -> Bool -> String -> IOStateArrow s XmlTree XmlTree encodeDocument quoteXml supressXmlPi defaultEnc = encode $< getOutputEncoding defaultEnc where encode enc = traceMsg 2 ("encodeDocument: encoding is " ++ show enc) >>> ( encodeDocument' quoteXml supressXmlPi enc `orElse` ( issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "encoding document" ) ) -- ------------------------------------------------------------ isBinaryDoc :: LA XmlTree XmlTree isBinaryDoc = ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> not (null t || isTextMimeType t || isXmlMimeType t)) ) `guards` this getOutputEncoding' :: String -> String -> LA XmlTree String getOutputEncoding' defaultEnc defaultEnc2 = catA [ isBinaryDoc >>> -- 0. guess: binary data found: no encoding at all constA isoLatin1 -- the content should usually be a blob -- this handling is like the decoding in DocumentInput, -- there nothing is decoded for non text or non xml contents , getChildren -- 1. guess: evaluate >>> ( ( isPi >>> hasName t_xml ) `guards` getAttrValue a_encoding ) , constA defaultEnc -- 2. guess: explicit parameter, may be "" , getAttrValue a_output_encoding -- 3. guess: take output encoding parameter in root node , constA defaultEnc2 -- default : UNICODE or utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses encodeDocument' :: ArrowXml a => Bool -> Bool -> String -> a XmlTree XmlTree encodeDocument' quoteXml supressXmlPi defaultEnc = fromLA (encode $< getOutputEncoding' defaultEnc utf8) where encode :: String -> LA XmlTree XmlTree encode encodingScheme | encodingScheme == unicodeString = replaceChildren ( (getChildren >. XS.xshow'' cQuot aQuot) >>> mkText ) | isNothing encodeFct = none | otherwise = ( if supressXmlPi then processChildren (none `when` isXmlPi) else ( addXmlPi >>> addXmlPiEncoding encodingScheme ) ) >>> ( isLatin1Blob `orElse` encodeDoc (fromJust encodeFct) ) >>> addAttr a_output_encoding encodingScheme where (cQuot, aQuot) | quoteXml = escapeXmlRefs | otherwise = escapeHtmlRefs encodeFct = getOutputEncodingFct' encodingScheme encodeDoc ef = replaceChildren ( xshowBlobWithEnc cQuot aQuot ef getChildren >>> mkBlob ) xshowBlobWithEnc cenc aenc enc f = f >. XS.xshow' cenc aenc enc -- if encoding scheme is isolatin1 and the contents is a single blob (bytestring) -- the encoding is the identity. -- This optimization enables processing (copying) of none XML contents -- without any conversions from and to strings isLatin1Blob | encodingScheme /= isoLatin1 = none | otherwise = childIsSingleBlob `guards` this where childIsSingleBlob = listA getChildren >>> isA (length >>> (== 1)) >>> unlistA >>> isBlob -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/GeneralEntitySubstitution.hs0000644000000000000000000003154712036750467022353 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.GeneralEntitySubstitution Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable general entity substitution -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.GeneralEntitySubstitution ( processGeneralEntities ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlEntityValueAsAttrValue , parseXmlEntityValueAsContent ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ data GEContext = ReferenceInContent | ReferenceInAttributeValue | ReferenceInEntityValue -- or OccursInAttributeValue -- not used during substitution but during validation -- or ReferenceInDTD -- not used: syntax check detects errors type GESubstArrow = GEContext -> RecList -> GEArrow XmlTree XmlTree type GEArrow b c = IOStateArrow GEEnv b c type RecList = [String] -- ------------------------------------------------------------ newtype GEEnv = GEEnv (M.Map String GESubstArrow) emptyGeEnv :: GEEnv emptyGeEnv = GEEnv M.empty lookupGeEnv :: String -> GEEnv -> Maybe GESubstArrow lookupGeEnv k (GEEnv env) = M.lookup k env addGeEntry :: String -> GESubstArrow -> GEEnv -> GEEnv addGeEntry k a (GEEnv env) = GEEnv $ M.insert k a env -- ------------------------------------------------------------ -- | -- substitution of general entities -- -- input: a complete document tree including root node processGeneralEntities :: IOStateArrow s XmlTree XmlTree processGeneralEntities = ( traceMsg 1 "processGeneralEntities: collect and substitute general entities" >>> withOtherUserState emptyGeEnv (processChildren (processGeneralEntity ReferenceInContent [])) >>> setDocumentStatusFromSystemState "in general entity processing" >>> traceTree >>> traceSource ) `when` documentStatusOk processGeneralEntity :: GESubstArrow processGeneralEntity context recl = choiceA [ isElem :-> ( processAttrl (processChildren substEntitiesInAttrValue) >>> processChildren (processGeneralEntity context recl) ) , isEntityRef :-> substEntityRef , isDTDDoctype :-> processChildren (processGeneralEntity context recl) , isDTDEntity :-> addEntityDecl , isDTDAttlist :-> substEntitiesInAttrDefaultValue , this :-> this ] where addEntityDecl :: GEArrow XmlTree XmlTree addEntityDecl = perform ( choiceA [ isIntern :-> addInternalEntity -- don't change sequence of cases , isExtern :-> addExternalEntity , isUnparsed :-> addUnparsedEntity ] ) where isIntern = none `when` hasDTDAttr k_system isExtern = none `when` hasDTDAttr k_ndata isUnparsed = this addInternalEntity :: GEArrow XmlTree b addInternalEntity = insertInternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: general entity definition for " ++) . show) ) &&& xshow (getChildren >>> isText) ) where insertInternal entity contents = insertEntity (substInternal contents) entity >>> none addExternalEntity :: GEArrow XmlTree b addExternalEntity = insertExternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: external entity definition for " ++) . show) ) &&& getDTDAttrValue a_url -- the absolute URL, not the relative in attr: k_system ) where insertExternal entity uri = insertEntity (substExternalParsed1Time uri) entity >>> none addUnparsedEntity :: GEArrow XmlTree b addUnparsedEntity = getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: unparsed entity definition for " ++) . show) >>> applyA (arr (insertEntity substUnparsed)) >>> none insertEntity :: (String -> GESubstArrow) -> String -> GEArrow b b insertEntity fct entity = ( getUserState >>> applyA (arr checkDefined) ) `guards` addEntity fct entity where checkDefined geEnv = maybe ok alreadyDefined . lookupGeEnv entity $ geEnv where ok = this alreadyDefined _ = issueWarn ("entity " ++ show entity ++ " already defined, repeated definition ignored") >>> none addEntity :: (String -> GESubstArrow) -> String -> GEArrow b b addEntity fct entity = changeUserState ins where ins _ geEnv = addGeEntry entity (fct entity) geEnv substEntitiesInAttrDefaultValue :: GEArrow XmlTree XmlTree substEntitiesInAttrDefaultValue = applyA ( xshow ( getDTDAttrValue a_default -- parse the default value >>> -- substitute entities mkText -- and convert value into a string >>> parseXmlEntityValueAsAttrValue "default value of attribute" >>> filterErrorMsg >>> substEntitiesInAttrValue ) >>> arr (setDTDAttrValue a_default) ) `when` hasDTDAttr a_default substEntitiesInAttrValue :: GEArrow XmlTree XmlTree substEntitiesInAttrValue = ( processGeneralEntity ReferenceInAttributeValue recl `when` isEntityRef ) >>> changeText normalizeWhiteSpace >>> transfCharRef where normalizeWhiteSpace = map ( \c -> if c `elem` "\n\t\r" then ' ' else c ) substEntityRef :: GEArrow XmlTree XmlTree substEntityRef = applyA ( ( ( getEntityRef -- get the entity name and the env >>> -- and compute the arrow to be applied traceValue 2 (("processGeneralEntity: entity reference for entity " ++) . show) >>> traceMsg 3 ("recursion list = " ++ show recl) ) &&& getUserState ) >>> arr2 substA ) where substA :: String -> GEEnv -> GEArrow XmlTree XmlTree substA entity geEnv = maybe entityNotFound entityFound . lookupGeEnv entity $ geEnv where errMsg msg = issueErr msg entityNotFound = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, no definition found, (forward reference?)") entityFound fct | entity `elem` recl = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, cyclic definition") | otherwise = fct context recl substExternalParsed1Time :: String -> String -> GESubstArrow substExternalParsed1Time uri entity cx rl = perform ( traceMsg 2 ("substExternalParsed1Time: read and parse external parsed entity " ++ show entity) >>> runInLocalURIContext ( root [sattr a_source uri] [] -- uri must be an absolute uri >>> -- abs uri is computed during parameter entity handling getXmlEntityContents >>> processExternalEntityContents ) >>> applyA ( arr $ \ s -> addEntity (substExternalParsed s) entity ) ) >>> processGeneralEntity cx rl where processExternalEntityContents :: IOStateArrow s XmlTree String processExternalEntityContents = ( ( ( documentStatusOk -- reading entity succeeded >>> -- with content stored in a text node (getChildren >>> isText) ) `guards` this ) `orElse` issueErr ("illegal value for external parsed entity " ++ show entity) ) >>> xshow (getChildren >>> isText) substExternalParsed :: String -> String -> GESubstArrow substExternalParsed s entity ReferenceInContent rl = includedIfValidating s rl entity substExternalParsed _ entity ReferenceInAttributeValue _ = forbidden entity "external parsed general" "in attribute value" substExternalParsed _ _ ReferenceInEntityValue _ = bypassed substInternal :: String -> String -> GESubstArrow substInternal s entity ReferenceInContent rl = included s rl entity substInternal s entity ReferenceInAttributeValue rl = includedInLiteral s rl entity substInternal _ _ ReferenceInEntityValue _ = bypassed substUnparsed :: String -> GESubstArrow substUnparsed entity ReferenceInContent _ = forbidden entity "unparsed" "content" substUnparsed entity ReferenceInAttributeValue _ = forbidden entity "unparsed" "attribute value" substUnparsed entity ReferenceInEntityValue _ = forbidden entity "unparsed" "entity value" -- XML 1.0 chapter 4.4.2 included :: String -> RecList -> String -> GEArrow XmlTree XmlTree included s rl entity = traceMsg 3 ("substituting general entity " ++ show entity ++ " with value " ++ show s) >>> txt s >>> parseXmlEntityValueAsContent ("substituting general entity " ++ show entity ++ " in contents") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.3 includedIfValidating :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedIfValidating = included -- XML 1.0 chapter 4.4.4 forbidden :: String -> String -> String -> GEArrow XmlTree XmlTree forbidden entity msg cx = issueErr ("reference of " ++ msg ++ show entity ++ " forbidden in " ++ cx) -- XML 1.0 chapter 4.4.5 includedInLiteral :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedInLiteral s rl entity = txt s >>> parseXmlEntityValueAsAttrValue ("substituting general entity " ++ show entity ++ " in attribute value") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.7 bypassed :: GEArrow XmlTree XmlTree bypassed = this -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/XmlOptions.hs0000644000000000000000000003303512036750467017252 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlOptions Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable system configuration and common options options -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlOptions where import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.SystemConfig import Data.Maybe import System.Console.GetOpt -- ------------------------------------------------------------ -- -- | -- commonly useful options for XML input -- -- can be used for option definition with haskell getopt -- -- defines options: 'a_trace', 'a_proxy', 'a_use_curl', 'a_do_not_use_curl', 'a_options_curl', 'a_encoding', -- 'a_issue_errors', 'a_do_not_issue_errors', 'a_parse_html', 'a_parse_by_mimetype', 'a_issue_warnings', 'a_do_not_issue_warnings', -- 'a_parse_xml', 'a_validate', 'a_do_not_validate', 'a_canonicalize', 'a_do_not_canonicalize', --- 'a_preserve_comment', 'a_do_not_preserve_comment', 'a_check_namespaces', 'a_do_not_check_namespaces', -- 'a_remove_whitespace', 'a_do_not_remove_whitespace' inputOptions :: [OptDescr SysConfig] inputOptions = [ Option "t" [a_trace] (OptArg trc "LEVEL") "trace level (0-4), default 1" , Option "p" [a_proxy] (ReqArg withProxy "PROXY") "proxy for http access (e.g. \"www-cache:3128\")" , Option "" [a_redirect] (NoArg (withRedirect True)) "automatically follow redirected URIs" , Option "" [a_no_redirect] (NoArg (withRedirect False)) "switch off following redirected URIs" , Option "" [a_default_baseuri] (ReqArg withDefaultBaseURI "URI") "default base URI, default: \"file:////\"" , Option "e" [a_encoding] (ReqArg withInputEncoding "CHARSET") ( "default document encoding (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ", ...)" ) , Option "" [a_mime_types] (ReqArg withMimeTypeFile "FILE") "set mime type configuration file, e.g. \"/etc/mime.types\"" , Option "" [a_issue_errors] (NoArg (withErrors True)) "issue all error messages on stderr (default)" , Option "" [a_do_not_issue_errors] (NoArg (withErrors False)) "ignore all error messages" , Option "" [a_ignore_encoding_errors] (NoArg (withEncodingErrors False)) "ignore encoding errors" , Option "" [a_ignore_none_xml_contents] (NoArg (withIgnoreNoneXmlContents True)) "discards all contents of none XML/HTML documents, only the meta info remains in the doc tree" , Option "" [a_accept_mimetypes] (ReqArg withMT "MIMETYPES") "only accept documents matching the given comma separated list of mimetype specs" , Option "H" [a_parse_html] (NoArg (withParseHTML True)) "parse input as HTML, try to interprete everything as HTML, no validation" , Option "M" [a_parse_by_mimetype] (NoArg (withParseByMimeType True)) "parse dependent on mime type: text/html as HTML, text/xml and text/xhtml and others as XML, else no parse" , Option "" [a_parse_xml] (NoArg (withParseHTML False)) "parse input as XML, (default)" , Option "" [a_strict_input] (NoArg (withStrictInput True)) "read input files strictly, this ensures closing the files correctly even if not read completely" , Option "" [a_issue_warnings] (NoArg (withWarnings True)) "issue warnings, when parsing HTML (default)" , Option "Q" [a_do_not_issue_warnings] (NoArg (withWarnings False)) "ignore warnings, when parsing HTML" , Option "" [a_validate] (NoArg (withValidate True)) "document validation when parsing XML (default)" , Option "w" [a_do_not_validate] (NoArg (withValidate False)) "only wellformed check, no validation" , Option "" [a_subst_dtd_entities] (NoArg (withSubstDTDEntities True)) "entities defined in DTD are substituted when parsing XML (default)" , Option "" [a_do_not_subst_dtd_entities] (NoArg (withSubstDTDEntities False)) "entities defined in DTD are NOT substituted when parsing XML" , Option "" [a_subst_html_entities] (NoArg (withSubstHTMLEntities True)) "entities defined in XHTML are substituted when parsing XML, only in effect when prev. option is switched off" , Option "" [a_do_not_subst_html_entities] (NoArg (withSubstHTMLEntities False)) "only entities predefined in XML are substituted when parsing XML (default)" , Option "" [a_canonicalize] (NoArg (withCanonicalize True)) "canonicalize document, remove DTD, comment, transform CDATA, CharRef's, ... (default)" , Option "c" [a_do_not_canonicalize] (NoArg (withCanonicalize False)) "do not canonicalize document, don't remove DTD, comment, don't transform CDATA, CharRef's, ..." , Option "C" [a_preserve_comment] (NoArg (withPreserveComment True)) "don't remove comments during canonicalisation" , Option "" [a_do_not_preserve_comment] (NoArg (withPreserveComment False)) "remove comments during canonicalisation (default)" , Option "n" [a_check_namespaces] (NoArg (withCheckNamespaces True)) "tag tree with namespace information and check namespaces" , Option "" [a_do_not_check_namespaces] (NoArg (withCheckNamespaces False)) "ignore namespaces (default)" , Option "r" [a_remove_whitespace] (NoArg (withRemoveWS True)) "remove redundant whitespace, simplifies tree and processing" , Option "" [a_do_not_remove_whitespace] (NoArg (withRemoveWS False)) "don't remove redundant whitespace (default)" ] where withMT = withAcceptedMimeTypes . words . map (\ x -> if x == ',' then ' ' else x) trc = withTrace . max 0 . min 9 . (read :: String -> Int) . ('0':) . filter (`elem` "0123456789") . fromMaybe v_1 -- | -- commonly useful options for XML output -- -- defines options: 'a_indent', 'a_output_encoding', 'a_output_html' and others outputOptions :: [OptDescr SysConfig] outputOptions = [ Option "f" [a_output_file] (ReqArg (withSysAttr a_output_file) "FILE") "output file for resulting document (default: stdout)" , Option "i" [a_indent] (NoArg (withIndent True)) "indent XML output for readability" , Option "o" [a_output_encoding] (ReqArg withOutputEncoding "CHARSET") ( "encoding of output (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ")" ) , Option "" [a_output_xml] (NoArg withOutputXML ) "output of none ASCII chars as HTMl entity references" , Option "" [a_output_html] (NoArg withOutputHTML ) "output of none ASCII chars as HTMl entity references" , Option "" [a_output_xhtml] (NoArg withOutputXHTML ) "output of HTML elements with empty content (script, ...) done in format instead of " , Option "" [a_output_plain] (NoArg withOutputPLAIN ) "output of HTML elements with empty content (script, ...) done in format instead of " , Option "" [a_no_xml_pi] (NoArg (withXmlPi False)) ("output without processing instruction, useful in combination with --" ++ show a_output_html) , Option "" [a_no_empty_elem_for] (ReqArg (withNoEmptyElemFor . words') "NAMES") "output of empty elements done in format only for given list of element names" , Option "" [a_add_default_dtd] (NoArg (withAddDefaultDTD True)) "add the document type declaration given in the input document" , Option "" [a_text_mode] (NoArg (withTextMode True)) "output in text mode" ] where words' = words . map (\ c -> if c == ',' then ' ' else c) -- | -- commonly useful options -- -- defines options: 'a_verbose', 'a_help' generalOptions :: [OptDescr SysConfig] generalOptions = [ Option "v" [a_verbose] (NoArg (withSysAttr a_verbose v_1)) "verbose output" , Option "h?" [a_help] (NoArg (withSysAttr a_help v_1)) "this message" ] -- | -- defines 'a_version' option versionOptions :: [OptDescr SysConfig] versionOptions = [ Option "V" [a_version] (NoArg (withSysAttr a_version v_1)) "show program version" ] -- | -- debug output options showOptions :: [OptDescr SysConfig] showOptions = [ Option "" [a_show_tree] (NoArg (withShowTree True)) "output tree representation instead of document source" , Option "" [a_show_haskell] (NoArg (withShowHaskell True)) "output internal Haskell representation instead of document source" ] -- ------------------------------------------------------------ a_accept_mimetypes, a_add_default_dtd, a_canonicalize, a_check_namespaces, a_collect_errors, a_default_baseuri, a_do_not_canonicalize, a_do_not_check_namespaces, a_do_not_issue_errors, a_do_not_issue_warnings, a_do_not_preserve_comment, a_do_not_remove_whitespace, a_do_not_subst_dtd_entities, a_do_not_subst_html_entities, a_do_not_validate, a_error, a_error_log, a_help, a_if_modified_since, a_if_unmodified_since, a_ignore_encoding_errors, a_ignore_none_xml_contents, a_indent, a_issue_errors, a_issue_warnings, a_mime_types, a_no_empty_elements, a_no_empty_elem_for, a_no_redirect, a_no_xml_pi, a_output_file, a_output_xml, a_output_html, a_output_xhtml, a_output_plain, a_parse_by_mimetype, a_parse_html, a_parse_xml, a_preserve_comment, a_proxy, a_redirect, a_remove_whitespace, a_show_haskell, a_show_tree, a_strict_input, a_subst_dtd_entities, a_subst_html_entities, a_text_mode, a_trace, a_validate, a_verbose :: String a_accept_mimetypes = "accept-mimetypes" a_add_default_dtd = "add-default-dtd" a_canonicalize = "canonicalize" a_check_namespaces = "check-namespaces" a_collect_errors = "collect-errors" a_default_baseuri = "default-base-URI" a_do_not_canonicalize = "do-not-canonicalize" a_do_not_check_namespaces = "do-not-check-namespaces" a_do_not_issue_errors = "do-not-issue-errors" a_do_not_issue_warnings = "do-not-issue-warnings" a_do_not_preserve_comment = "do-not-preserve-comment" a_do_not_remove_whitespace = "do-not-remove-whitespace" a_do_not_subst_dtd_entities = "do-not-subst-dtd-entities" a_do_not_subst_html_entities = "do-not-subst-html-entities" a_do_not_validate = "do-not-validate" a_error = "error" a_error_log = "errorLog" a_help = "help" a_if_modified_since = "if-modified-since" a_if_unmodified_since = "if-unmodified-since" a_ignore_encoding_errors = "ignore-encoding-errors" a_ignore_none_xml_contents = "ignore-none-xml-contents" a_indent = "indent" a_issue_warnings = "issue-warnings" a_issue_errors = "issue-errors" a_mime_types = "mimetypes" a_no_empty_elements = "no-empty-elements" a_no_empty_elem_for = "no-empty-elem-for" a_no_redirect = "no-redirect" a_no_xml_pi = "no-xml-pi" a_output_file = "output-file" a_output_html = "output-html" a_output_xhtml = "output-xhtml" a_output_xml = "output-xml" a_output_plain = "output-plain" a_parse_by_mimetype = "parse-by-mimetype" a_parse_html = "parse-html" a_parse_xml = "parse-xml" a_preserve_comment = "preserve-comment" a_proxy = "proxy" a_redirect = "redirect" a_remove_whitespace = "remove-whitespace" a_show_haskell = "show-haskell" a_show_tree = "show-tree" a_strict_input = "strict-input" a_subst_dtd_entities = "subst-dtd-entities" a_subst_html_entities = "subst-html-entities" a_text_mode = "text-mode" a_trace = "trace" a_validate = "validate" a_verbose = "verbose" -- ------------------------------------------------------------ -- | -- select options from a predefined list of option desciptions selectOptions :: [String] -> [OptDescr a] -> [OptDescr a] selectOptions ol os = concat . map (\ on -> filter (\ (Option _ ons _ _) -> on `elem` ons) os) $ ol removeOptions :: [String] -> [OptDescr a] -> [OptDescr a] removeOptions ol os = filter (\ (Option _ ons _ _) -> not . any (`elem` ol) $ ons ) os -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/Binary.hs0000644000000000000000000000616612036750467016367 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Binary Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable De-/Serialisation arrows for XmlTrees and other arbitrary values with a Binary instance -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Binary ( readBinaryValue , writeBinaryValue ) where import Control.Arrow () import Control.Arrow.ArrowExc import Control.Arrow.ArrowList import Control.Arrow.ArrowIO import Data.Binary import qualified Data.ByteString.Lazy as B import System.IO ( openBinaryFile , hClose , IOMode(..) ) import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ readBinaryValue :: (Binary a) => String -> IOStateArrow s b a readBinaryValue file = (uncurry $ decodeBinaryValue file) $< getSysVar ( theStrictDeserialize .&&&. theBinaryDeCompression ) -- | Read a serialied value from a file, optionally decompress it and decode the value -- In case of an error, the error message is issued and the arrow fails decodeBinaryValue :: (Binary a) => String -> Bool -> DeCompressionFct -> IOStateArrow s b a decodeBinaryValue file strict decompress = arrIO0 dec `catchA` issueExc "readBinaryValue" where dec = ( if strict then readItAll else B.readFile file ) >>= return . decode . decompress readItAll = do h <- openBinaryFile file ReadMode c <- B.hGetContents h B.length c `seq` do hClose h return c -- hack: force reading whole file and close it immediately -- | Serialize a value, optionally compress it, and write it to a file. -- In case of an error, the error message is issued and the arrow fails writeBinaryValue :: (Binary a) => String -> IOStateArrow s a () writeBinaryValue file = flip encodeBinaryValue file $< getSysVar theBinaryCompression encodeBinaryValue :: (Binary a) => CompressionFct -> String -> IOStateArrow s a () encodeBinaryValue compress file = arrIO enc `catchA` issueExc "writeBinaryXmlTree" where enc = B.writeFile file . compress . encode -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/XmlArrow.hs0000644000000000000000000006745012036750467016721 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlArrow Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Basic arrows for processing XML documents All arrows use IO and a global state for options, errorhandling, ... -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlArrow ( module Text.XML.HXT.Arrow.XmlArrow ) where import Control.Arrow -- classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow -- arrow types import Control.Arrow.StateListArrow import Control.Arrow.IOListArrow import Control.Arrow.IOStateListArrow import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar ) import Data.Maybe import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import qualified Text.XML.HXT.DOM.ShowXml as XS -- ------------------------------------------------------------ {- | Arrows for processing 'Text.XML.HXT.DOM.TypeDefs.XmlTree's These arrows can be grouped into predicates, selectors, constructors, and transformers. All predicates (tests) act like 'Control.Arrow.ArrowIf.none' for failure and 'Control.Arrow.ArrowIf.this' for success. A logical and can be formed by @ a1 >>> a2 @, a locical or by @ a1 \<+\> a2 @. Selector arrows will fail, when applied to wrong input, e.g. selecting the text of a node with 'getText' will fail when applied to a none text node. Edit arrows will remain the input unchanged, when applied to wrong argument, e.g. editing the content of a text node with 'changeText' applied to an element node will return the unchanged element node. -} infixl 7 += class (Arrow a, ArrowList a, ArrowTree a) => ArrowXml a where -- discriminating predicates -- | test for text nodes isText :: a XmlTree XmlTree isText = isA XN.isText {-# INLINE isText #-} isBlob :: a XmlTree XmlTree isBlob = isA XN.isBlob {-# INLINE isBlob #-} -- | test for char reference, used during parsing isCharRef :: a XmlTree XmlTree isCharRef = isA XN.isCharRef {-# INLINE isCharRef #-} -- | test for entity reference, used during parsing isEntityRef :: a XmlTree XmlTree isEntityRef = isA XN.isEntityRef {-# INLINE isEntityRef #-} -- | test for comment isCmt :: a XmlTree XmlTree isCmt = isA XN.isCmt {-# INLINE isCmt #-} -- | test for CDATA section, used during parsing isCdata :: a XmlTree XmlTree isCdata = isA XN.isCdata {-# INLINE isCdata #-} -- | test for processing instruction isPi :: a XmlTree XmlTree isPi = isA XN.isPi {-# INLINE isPi #-} -- | test for processing instruction \ isXmlPi :: a XmlTree XmlTree isXmlPi = isPi >>> hasName "xml" -- | test for element isElem :: a XmlTree XmlTree isElem = isA XN.isElem {-# INLINE isElem #-} -- | test for DTD part, used during parsing isDTD :: a XmlTree XmlTree isDTD = isA XN.isDTD {-# INLINE isDTD #-} -- | test for attribute tree isAttr :: a XmlTree XmlTree isAttr = isA XN.isAttr {-# INLINE isAttr #-} -- | test for error message isError :: a XmlTree XmlTree isError = isA XN.isError {-# INLINE isError #-} -- | test for root node (element with name \"\/\") isRoot :: a XmlTree XmlTree isRoot = isA XN.isRoot {-# INLINE isRoot #-} -- | test for text nodes with text, for which a predicate holds -- -- example: @hasText (all (\`elem\` \" \\t\\n\"))@ check for text nodes with only whitespace content hasText :: (String -> Bool) -> a XmlTree XmlTree hasText p = (isText >>> getText >>> isA p) `guards` this -- | test for text nodes with only white space -- -- implemented with 'hasTest' isWhiteSpace :: a XmlTree XmlTree isWhiteSpace = hasText (all isXmlSpaceChar) {-# INLINE isWhiteSpace #-} -- | -- test whether a node (element, attribute, pi) has a name with a special property hasNameWith :: (QName -> Bool) -> a XmlTree XmlTree hasNameWith p = (getQName >>> isA p) `guards` this {-# INLINE hasNameWith #-} -- | -- test whether a node (element, attribute, pi) has a specific qualified name -- useful only after namespace propagation hasQName :: QName -> a XmlTree XmlTree hasQName n = (getQName >>> isA (== n)) `guards` this {-# INLINE hasQName #-} -- | -- test whether a node has a specific name (prefix:localPart ore localPart), -- generally useful, even without namespace handling hasName :: String -> a XmlTree XmlTree hasName n = (getName >>> isA (== n)) `guards` this {-# INLINE hasName #-} -- | -- test whether a node has a specific name as local part, -- useful only after namespace propagation hasLocalPart :: String -> a XmlTree XmlTree hasLocalPart n = (getLocalPart >>> isA (== n)) `guards` this {-# INLINE hasLocalPart #-} -- | -- test whether a node has a specific name prefix, -- useful only after namespace propagation hasNamePrefix :: String -> a XmlTree XmlTree hasNamePrefix n = (getNamePrefix >>> isA (== n)) `guards` this {-# INLINE hasNamePrefix #-} -- | -- test whether a node has a specific namespace URI -- useful only after namespace propagation hasNamespaceUri :: String -> a XmlTree XmlTree hasNamespaceUri n = (getNamespaceUri >>> isA (== n)) `guards` this {-# INLINE hasNamespaceUri #-} -- | -- test whether an element node has an attribute node with a specific name hasAttr :: String -> a XmlTree XmlTree hasAttr n = (getAttrl >>> hasName n) `guards` this {-# INLINE hasAttr #-} -- | -- test whether an element node has an attribute node with a specific qualified name hasQAttr :: QName -> a XmlTree XmlTree hasQAttr n = (getAttrl >>> hasQName n) `guards` this {-# INLINE hasQAttr #-} -- | -- test whether an element node has an attribute with a specific value hasAttrValue :: String -> (String -> Bool) -> a XmlTree XmlTree hasAttrValue n p = (getAttrl >>> hasName n >>> xshow getChildren >>> isA p) `guards` this -- | -- test whether an element node has an attribute with a qualified name and a specific value hasQAttrValue :: QName -> (String -> Bool) -> a XmlTree XmlTree hasQAttrValue n p = (getAttrl >>> hasQName n >>> xshow getChildren >>> isA p) `guards` this -- constructor arrows ------------------------------------------------------------ -- | text node construction arrow mkText :: a String XmlTree mkText = arr XN.mkText {-# INLINE mkText #-} -- | blob node construction arrow mkBlob :: a Blob XmlTree mkBlob = arr XN.mkBlob {-# INLINE mkBlob #-} -- | char reference construction arrow, useful for document output mkCharRef :: a Int XmlTree mkCharRef = arr XN.mkCharRef {-# INLINE mkCharRef #-} -- | entity reference construction arrow, useful for document output mkEntityRef :: a String XmlTree mkEntityRef = arr XN.mkEntityRef {-# INLINE mkEntityRef #-} -- | comment node construction, useful for document output mkCmt :: a String XmlTree mkCmt = arr XN.mkCmt {-# INLINE mkCmt #-} -- | CDATA construction, useful for document output mkCdata :: a String XmlTree mkCdata = arr XN.mkCdata {-# INLINE mkCdata #-} -- | error node construction, useful only internally mkError :: Int -> a String XmlTree mkError level = arr (XN.mkError level) -- | element construction: -- | the attributes and the content of the element are computed by applying arrows -- to the input mkElement :: QName -> a n XmlTree -> a n XmlTree -> a n XmlTree mkElement n af cf = (listA af &&& listA cf) >>> arr2 (\ al cl -> XN.mkElement n al cl) -- | attribute node construction: -- | the attribute value is computed by applying an arrow to the input mkAttr :: QName -> a n XmlTree -> a n XmlTree mkAttr qn f = listA f >>> arr (XN.mkAttr qn) -- | processing instruction construction: -- | the content of the processing instruction is computed by applying an arrow to the input mkPi :: QName -> a n XmlTree -> a n XmlTree mkPi qn f = listA f >>> arr (XN.mkPi qn) -- convenient arrows for constructors -------------------------------------------------- -- | convenient arrow for element construction, more comfortable variant of 'mkElement' -- -- example for simplifying 'mkElement' : -- -- > mkElement qn (a1 <+> ... <+> ai) (c1 <+> ... <+> cj) -- -- equals -- -- > mkqelem qn [a1,...,ai] [c1,...,cj] mkqelem :: QName -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree mkqelem n afs cfs = mkElement n (catA afs) (catA cfs) {-# INLINE mkqelem #-} -- | convenient arrow for element construction with strings instead of qualified names as element names, see also 'mkElement' and 'mkelem' mkelem :: String -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree mkelem n afs cfs = mkElement (mkName n) (catA afs) (catA cfs) {-# INLINE mkelem #-} -- | convenient arrow for element constrution with attributes but without content, simple variant of 'mkelem' and 'mkElement' aelem :: String -> [a n XmlTree] -> a n XmlTree aelem n afs = catA afs >. \ al -> XN.mkElement (mkName n) al [] {-# INLINE aelem #-} -- | convenient arrow for simple element constrution without attributes, simple variant of 'mkelem' and 'mkElement' selem :: String -> [a n XmlTree] -> a n XmlTree selem n cfs = catA cfs >. XN.mkElement (mkName n) [] {-# INLINE selem #-} -- | convenient arrow for constrution of empty elements without attributes, simple variant of 'mkelem' and 'mkElement' eelem :: String -> a n XmlTree eelem n = constA (XN.mkElement (mkName n) [] []) {-# INLINE eelem #-} -- | construction of an element node with name \"\/\" for document roots root :: [a n XmlTree] -> [a n XmlTree] -> a n XmlTree root = mkelem t_root {-# INLINE root #-} -- | alias for 'mkAttr' qattr :: QName -> a n XmlTree -> a n XmlTree qattr = mkAttr {-# INLINE qattr #-} -- | convenient arrow for attribute constrution, simple variant of 'mkAttr' attr :: String -> a n XmlTree -> a n XmlTree attr = mkAttr . mkName {-# INLINE attr #-} -- constant arrows (ignoring the input) for tree construction ------------------------------ -- | constant arrow for text nodes txt :: String -> a n XmlTree txt = constA . XN.mkText {-# INLINE txt #-} -- | constant arrow for blob nodes blb :: Blob -> a n XmlTree blb = constA . XN.mkBlob {-# INLINE blb #-} -- | constant arrow for char reference nodes charRef :: Int -> a n XmlTree charRef = constA . XN.mkCharRef {-# INLINE charRef #-} -- | constant arrow for entity reference nodes entityRef :: String -> a n XmlTree entityRef = constA . XN.mkEntityRef {-# INLINE entityRef #-} -- | constant arrow for comment cmt :: String -> a n XmlTree cmt = constA . XN.mkCmt {-# INLINE cmt #-} -- | constant arrow for warning warn :: String -> a n XmlTree warn = constA . (XN.mkError c_warn) {-# INLINE warn #-} -- | constant arrow for errors err :: String -> a n XmlTree err = constA . (XN.mkError c_err) {-# INLINE err #-} -- | constant arrow for fatal errors fatal :: String -> a n XmlTree fatal = constA . (XN.mkError c_fatal) {-# INLINE fatal #-} -- | constant arrow for simple processing instructions, see 'mkPi' spi :: String -> String -> a n XmlTree spi piName piCont = constA (XN.mkPi (mkName piName) [XN.mkAttr (mkName a_value) [XN.mkText piCont]]) {-# INLINE spi #-} -- | constant arrow for attribute nodes, attribute name is a qualified name and value is a text, -- | see also 'mkAttr', 'qattr', 'attr' sqattr :: QName -> String -> a n XmlTree sqattr an av = constA (XN.mkAttr an [XN.mkText av]) {-# INLINE sqattr #-} -- | constant arrow for attribute nodes, attribute name and value are -- | given by parameters, see 'mkAttr' sattr :: String -> String -> a n XmlTree sattr an av = constA (XN.mkAttr (mkName an) [XN.mkText av]) {-# INLINE sattr #-} -- selector arrows -------------------------------------------------- -- | select the text of a text node getText :: a XmlTree String getText = arrL (maybeToList . XN.getText) {-# INLINE getText #-} -- | select the value of a char reference getCharRef :: a XmlTree Int getCharRef = arrL (maybeToList . XN.getCharRef) {-# INLINE getCharRef #-} -- | select the name of a entity reference node getEntityRef :: a XmlTree String getEntityRef = arrL (maybeToList . XN.getEntityRef) {-# INLINE getEntityRef #-} -- | select the comment of a comment node getCmt :: a XmlTree String getCmt = arrL (maybeToList . XN.getCmt) {-# INLINE getCmt #-} -- | select the content of a CDATA node getCdata :: a XmlTree String getCdata = arrL (maybeToList . XN.getCdata) {-# INLINE getCdata #-} -- | select the name of a processing instruction getPiName :: a XmlTree QName getPiName = arrL (maybeToList . XN.getPiName) {-# INLINE getPiName #-} -- | select the content of a processing instruction getPiContent :: a XmlTree XmlTree getPiContent = arrL (fromMaybe [] . XN.getPiContent) {-# INLINE getPiContent #-} -- | select the name of an element node getElemName :: a XmlTree QName getElemName = arrL (maybeToList . XN.getElemName) {-# INLINE getElemName #-} -- | select the attribute list of an element node getAttrl :: a XmlTree XmlTree getAttrl = arrL (fromMaybe [] . XN.getAttrl) {-# INLINE getAttrl #-} -- | select the DTD type of a DTD node getDTDPart :: a XmlTree DTDElem getDTDPart = arrL (maybeToList . XN.getDTDPart) {-# INLINE getDTDPart #-} -- | select the DTD attributes of a DTD node getDTDAttrl :: a XmlTree Attributes getDTDAttrl = arrL (maybeToList . XN.getDTDAttrl) {-# INLINE getDTDAttrl #-} -- | select the name of an attribute getAttrName :: a XmlTree QName getAttrName = arrL (maybeToList . XN.getAttrName) {-# INLINE getAttrName #-} -- | select the error level (c_warn, c_err, c_fatal) from an error node getErrorLevel :: a XmlTree Int getErrorLevel = arrL (maybeToList . XN.getErrorLevel) {-# INLINE getErrorLevel #-} -- | select the error message from an error node getErrorMsg :: a XmlTree String getErrorMsg = arrL (maybeToList . XN.getErrorMsg) {-# INLINE getErrorMsg #-} -- | select the qualified name from an element, attribute or pi getQName :: a XmlTree QName getQName = arrL (maybeToList . XN.getName) {-# INLINE getQName #-} -- | select the prefix:localPart or localPart from an element, attribute or pi getName :: a XmlTree String getName = arrL (maybeToList . XN.getQualifiedName) {-# INLINE getName #-} -- | select the univeral name ({namespace URI} ++ localPart) getUniversalName :: a XmlTree String getUniversalName = arrL (maybeToList . XN.getUniversalName) {-# INLINE getUniversalName #-} -- | select the univeral name (namespace URI ++ localPart) getUniversalUri :: a XmlTree String getUniversalUri = arrL (maybeToList . XN.getUniversalUri) {-# INLINE getUniversalUri #-} -- | select the local part getLocalPart :: a XmlTree String getLocalPart = arrL (maybeToList . XN.getLocalPart) {-# INLINE getLocalPart #-} -- | select the name prefix getNamePrefix :: a XmlTree String getNamePrefix = arrL (maybeToList . XN.getNamePrefix) {-# INLINE getNamePrefix #-} -- | select the namespace URI getNamespaceUri :: a XmlTree String getNamespaceUri = arrL (maybeToList . XN.getNamespaceUri) {-# INLINE getNamespaceUri #-} -- | select the value of an attribute of an element node, -- always succeeds with empty string as default value \"\" getAttrValue :: String -> a XmlTree String getAttrValue n = xshow (getAttrl >>> hasName n >>> getChildren) -- | like 'getAttrValue', but fails if the attribute does not exist getAttrValue0 :: String -> a XmlTree String getAttrValue0 n = getAttrl >>> hasName n >>> xshow getChildren -- | like 'getAttrValue', but select the value of an attribute given by a qualified name, -- always succeeds with empty string as default value \"\" getQAttrValue :: QName -> a XmlTree String getQAttrValue n = xshow (getAttrl >>> hasQName n >>> getChildren) -- | like 'getQAttrValue', but fails if attribute does not exist getQAttrValue0 :: QName -> a XmlTree String getQAttrValue0 n = getAttrl >>> hasQName n >>> xshow getChildren -- edit arrows -------------------------------------------------- -- | edit the string of a text node changeText :: (String -> String) -> a XmlTree XmlTree changeText cf = arr (XN.changeText cf) `when` isText -- | edit the blob of a blob node changeBlob :: (Blob -> Blob) -> a XmlTree XmlTree changeBlob cf = arr (XN.changeBlob cf) `when` isBlob -- | edit the comment string of a comment node changeCmt :: (String -> String) -> a XmlTree XmlTree changeCmt cf = arr (XN.changeCmt cf) `when` isCmt -- | edit an element-, attribute- or pi- name changeQName :: (QName -> QName) -> a XmlTree XmlTree changeQName cf = arr (XN.changeName cf) `when` getQName -- | edit an element name changeElemName :: (QName -> QName) -> a XmlTree XmlTree changeElemName cf = arr (XN.changeElemName cf) `when` isElem -- | edit an attribute name changeAttrName :: (QName -> QName) -> a XmlTree XmlTree changeAttrName cf = arr (XN.changeAttrName cf) `when` isAttr -- | edit a pi name changePiName :: (QName -> QName) -> a XmlTree XmlTree changePiName cf = arr (XN.changePiName cf) `when` isPi -- | edit an attribute value changeAttrValue :: (String -> String) -> a XmlTree XmlTree changeAttrValue cf = replaceChildren ( xshow getChildren >>> arr cf >>> mkText ) `when` isAttr -- | edit an attribute list of an element node changeAttrl :: (XmlTrees -> XmlTrees -> XmlTrees) -> a XmlTree XmlTree -> a XmlTree XmlTree changeAttrl cf f = ( ( listA f &&& this ) >>> arr2 changeAL ) `when` ( isElem <+> isPi ) where changeAL as x = XN.changeAttrl (\ xs -> cf xs as) x -- | replace an element, attribute or pi name setQName :: QName -> a XmlTree XmlTree setQName n = changeQName (const n) {-# INLINE setQName #-} -- | replace an element name setElemName :: QName -> a XmlTree XmlTree setElemName n = changeElemName (const n) {-# INLINE setElemName #-} -- | replace an attribute name setAttrName :: QName -> a XmlTree XmlTree setAttrName n = changeAttrName (const n) {-# INLINE setAttrName #-} -- | replace an element name setPiName :: QName -> a XmlTree XmlTree setPiName n = changePiName (const n) {-# INLINE setPiName #-} -- | replace an atribute list of an element node setAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree setAttrl = changeAttrl (const id) -- (\ x y -> y) {-# INLINE setAttrl #-} -- | add a list of attributes to an element addAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree addAttrl = changeAttrl (XN.mergeAttrl) {-# INLINE addAttrl #-} -- | add (or replace) an attribute addAttr :: String -> String -> a XmlTree XmlTree addAttr an av = addAttrl (sattr an av) {-# INLINE addAttr #-} -- | remove an attribute removeAttr :: String -> a XmlTree XmlTree removeAttr an = processAttrl (none `when` hasName an) -- | remove an attribute with a qualified name removeQAttr :: QName -> a XmlTree XmlTree removeQAttr an = processAttrl (none `when` hasQName an) -- | process the attributes of an element node with an arrow processAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree processAttrl f = setAttrl (getAttrl >>> f) -- | process a whole tree inclusive attribute list of element nodes -- see also: 'Control.Arrow.ArrowTree.processTopDown' processTopDownWithAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree processTopDownWithAttrl f = processTopDown ( f >>> ( processAttrl (processTopDown f) `when` isElem)) -- | convenient op for adding attributes or children to a node -- -- usage: @ tf += cf @ -- -- the @tf@ arrow computes an element node, and all trees computed by @cf@ are -- added to this node, if a tree is an attribute, it is inserted in the attribute list -- else it is appended to the content list. -- -- attention: do not build long content list this way because '+=' is implemented by ++ -- -- examples: -- -- > eelem "a" -- > += sattr "href" "page.html" -- > += sattr "name" "here" -- > += txt "look here" -- -- is the same as -- -- > mkelem [ sattr "href" "page.html" -- > , sattr "name" "here" -- > ] -- > [ txt "look here" ] -- -- and results in the XML fragment: \look here\<\/a\> -- -- advantage of the '+=' operator is, that attributes and content can be added -- any time step by step. -- if @tf@ computes a whole list of trees, e.g. a list of \"td\" or \"tr\" elements, -- the attributes or content is added to all trees. useful for adding \"class\" or \"style\" attributes -- to table elements. (+=) :: a b XmlTree -> a b XmlTree -> a b XmlTree tf += cf = (tf &&& listA cf) >>> arr2 addChildren where addChildren :: XmlTree -> XmlTrees -> XmlTree addChildren t cs = foldl addChild t cs addChild :: XmlTree -> XmlTree -> XmlTree addChild t c | not (XN.isElem t) = t | XN.isAttr c = XN.changeAttrl (XN.addAttr c) t | otherwise = XN.changeChildren (++ [c]) t -- | apply an arrow to the input and convert the resulting XML trees into a string representation xshow :: a n XmlTree -> a n String xshow f = f >. XS.xshow {-# INLINE xshow #-} -- | apply an arrow to the input and convert the resulting XML trees into a string representation xshowBlob :: a n XmlTree -> a n Blob xshowBlob f = f >. XS.xshowBlob {-# INLINE xshowBlob #-} {- | Document Type Definition arrows These are separated, because they are not needed for document processing, only when processing the DTD, e.g. for generating access funtions for the toolbox from a DTD (se example DTDtoHaskell in the examples directory) -} class (ArrowXml a) => ArrowDTD a where isDTDDoctype :: a XmlTree XmlTree isDTDDoctype = isA (maybe False (== DOCTYPE ) . XN.getDTDPart) isDTDElement :: a XmlTree XmlTree isDTDElement = isA (maybe False (== ELEMENT ) . XN.getDTDPart) isDTDContent :: a XmlTree XmlTree isDTDContent = isA (maybe False (== CONTENT ) . XN.getDTDPart) isDTDAttlist :: a XmlTree XmlTree isDTDAttlist = isA (maybe False (== ATTLIST ) . XN.getDTDPart) isDTDEntity :: a XmlTree XmlTree isDTDEntity = isA (maybe False (== ENTITY ) . XN.getDTDPart) isDTDPEntity :: a XmlTree XmlTree isDTDPEntity = isA (maybe False (== PENTITY ) . XN.getDTDPart) isDTDNotation :: a XmlTree XmlTree isDTDNotation = isA (maybe False (== NOTATION) . XN.getDTDPart) isDTDCondSect :: a XmlTree XmlTree isDTDCondSect = isA (maybe False (== CONDSECT) . XN.getDTDPart) isDTDName :: a XmlTree XmlTree isDTDName = isA (maybe False (== NAME ) . XN.getDTDPart) isDTDPERef :: a XmlTree XmlTree isDTDPERef = isA (maybe False (== PEREF ) . XN.getDTDPart) hasDTDAttr :: String -> a XmlTree XmlTree hasDTDAttr n = isA (isJust . lookup n . fromMaybe [] . XN.getDTDAttrl) getDTDAttrValue :: String -> a XmlTree String getDTDAttrValue n = arrL (maybeToList . lookup n . fromMaybe [] . XN.getDTDAttrl) setDTDAttrValue :: String -> String -> a XmlTree XmlTree setDTDAttrValue n v = arr (XN.changeDTDAttrl (addEntry n v)) `when` isDTD mkDTDElem :: DTDElem -> Attributes -> a n XmlTree -> a n XmlTree mkDTDElem e al cf = listA cf >>> arr (XN.mkDTDElem e al) mkDTDDoctype :: Attributes -> a n XmlTree -> a n XmlTree mkDTDDoctype = mkDTDElem DOCTYPE mkDTDElement :: Attributes -> a n XmlTree mkDTDElement al = mkDTDElem ELEMENT al none mkDTDEntity :: Attributes -> a n XmlTree mkDTDEntity al = mkDTDElem ENTITY al none mkDTDPEntity :: Attributes -> a n XmlTree mkDTDPEntity al = mkDTDElem PENTITY al none instance ArrowXml LA instance ArrowXml (SLA s) instance ArrowXml IOLA instance ArrowXml (IOSLA s) instance ArrowDTD LA instance ArrowDTD (SLA s) instance ArrowDTD IOLA instance ArrowDTD (IOSLA s) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/Pickle.hs0000644000000000000000000002166112036750467016347 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Pickle Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Pickler functions for converting between user defined data types and XmlTree data. Usefull for persistent storage and retreival of arbitray data as XML documents This module is an adaptation of the pickler combinators developed by Andrew Kennedy ( http:\/\/research.microsoft.com\/~akenn\/fun\/picklercombinators.pdf ) The difference to Kennedys approach is that the target is not a list of Chars but a list of XmlTrees. The basic picklers will convert data into XML text nodes. New are the picklers for creating elements and attributes. One extension was neccessary: The unpickling may fail. Therefore the unpickler has a Maybe result type. Failure is used to unpickle optional elements (Maybe data) and lists of arbitray length There is an example program demonstrating the use of the picklers for a none trivial data structure. (see \"examples\/arrows\/pickle\" directory) -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Pickle ( xpickleDocument -- from this module Text.XML.HXT.Arrow.Pickle , xunpickleDocument , xpickleWriteDTD , xpickleDTD , checkPickler , xpickleVal , xunpickleVal , thePicklerDTD , a_addDTD -- from Text.XML.HXT.Arrow.Pickle.Xml , pickleDoc , unpickleDoc , unpickleDoc' , showPickled , PU(..) , XmlPickler(..) , xp4Tuple , xp5Tuple , xp6Tuple , xp7Tuple , xp8Tuple , xp9Tuple , xp10Tuple , xp11Tuple , xp12Tuple , xp13Tuple , xp14Tuple , xp15Tuple , xp16Tuple , xp17Tuple , xp18Tuple , xp19Tuple , xp20Tuple , xp21Tuple , xp22Tuple , xp23Tuple , xp24Tuple , xpAddFixedAttr , xpAddNSDecl , xpAlt , xpAttr , xpAttrFixed , xpAttrImplied , xpAttrNS , xpCheckEmpty , xpCheckEmptyAttributes , xpCheckEmptyContents , xpTextAttr , xpChoice , xpDefault , xpElem , xpElemNS , xpElemWithAttrValue , xpFilterAttr , xpFilterCont , xpInt , xpLift , xpLiftEither , xpLiftMaybe , xpList , xpList1 , xpMap , xpOption , xpPair , xpPrim , xpSeq , xpSeq' , xpText , xpText0 , xpTextDT , xpText0DT , xpTree , xpTrees , xpTriple , xpUnit , xpWrap , xpWrapEither , xpWrapMaybe , xpXmlText , xpZero -- from Text.XML.HXT.Arrow.Pickle.Schema , Schema , Schemas , DataTypeDescr ) where import Control.Arrow.ListArrows import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.ReadDocument import Text.XML.HXT.Arrow.WriteDocument import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.Pickle.Xml import Text.XML.HXT.Arrow.Pickle.Schema import Text.XML.HXT.Arrow.Pickle.DTD -- ------------------------------------------------------------ -- the arrow interface for pickling and unpickling -- | store an arbitray value in a persistent XML document -- -- The pickler converts a value into an XML tree, this is written out with -- 'Text.XML.HXT.Arrow.writeDocument'. The option list is passed to 'Text.XML.HXT.Arrow.writeDocument' -- -- An option evaluated by this arrow is 'a_addDTD'. -- If 'a_addDTD' is set ('v_1'), the pickler DTD is added as an inline DTD into the document. xpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s a XmlTree xpickleDocument xp config dest = localSysEnv $ configSysVars config >>> xpickleVal xp >>> traceMsg 1 "xpickleVal applied" >>> ifA ( getSysAttr a_addDTD >>> isA (== v_1) ) ( replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren) <+> getChildren ) ) this >>> writeDocument [] dest -- | Option for generating and adding DTD when document is pickled a_addDTD :: String a_addDTD = "addDTD" -- | read an arbitray value from an XML document -- -- The document is read with 'Text.XML.HXT.Arrow.readDocument'. Options are passed -- to 'Text.XML.HXT.Arrow.readDocument'. The conversion from XmlTree is done with the -- pickler. -- -- @ xpickleDocument xp al dest >>> xunpickleDocument xp al' dest @ is the identity arrow -- when applied with the appropriate options. When during pickling indentation is switched on, -- the whitespace must be removed during unpickling. xunpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s b a xunpickleDocument xp conf src = readDocument conf src >>> traceMsg 1 ("xunpickleVal for " ++ show src ++ " started") >>> xunpickleVal xp >>> traceMsg 1 ("xunpickleVal for " ++ show src ++ " finished") -- | Write out the DTD generated out of a pickler. Calls 'xpicklerDTD' xpickleWriteDTD :: PU b -> SysConfigList -> String -> IOStateArrow s b XmlTree xpickleWriteDTD xp config dest = xpickleDTD xp >>> writeDocument config dest -- | The arrow for generating the DTD out of a pickler -- -- A DTD is generated from a pickler and check for consistency. -- Errors concerning the DTD are issued. xpickleDTD :: PU b -> IOStateArrow s b XmlTree xpickleDTD xp = root [] [ constL (thePicklerDTD xp) >>> filterErrorMsg ] -- | An arrow for checking picklers -- -- A value is transformed into an XML document by a given pickler, -- the associated DTD is extracted from the pickler and checked, -- the document including the DTD is tranlated into a string, -- this string is read and validated against the included DTD, -- and unpickled. -- The last step is the equality with the input. -- -- If the check succeeds, the arrow works like this, else it fails. checkPickler :: Eq a => PU a -> IOStateArrow s a a checkPickler xp = ( ( ( ( xpickleVal xp >>> replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren) <+> getChildren ) >>> writeDocumentToString [] >>> readFromString [withValidate True] >>> xunpickleVal xp ) &&& this ) >>> isA (uncurry (==)) ) `guards` this ) `orElse` issueErr "pickle/unpickle combinators failed" -- | The arrow version of the pickler function xpickleVal :: ArrowXml a => PU b -> a b XmlTree xpickleVal xp = arr (pickleDoc xp) -- | The arrow version of the unpickler function {- old version, runs outside IO xunpickleVal :: ArrowXml a => PU b -> a XmlTree b xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element `when` isRoot ) >>> arrL (maybeToList . unpickleDoc xp) -- -} xunpickleVal :: PU b -> IOStateArrow s XmlTree b xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element `when` isRoot ) >>> arr (unpickleDoc' xp) >>> ( ( (issueFatal $< arr ("document unpickling failed\n" ++)) >>> none ) ||| this ) -- | Compute the associated DTD of a pickler thePicklerDTD :: PU b -> XmlTrees thePicklerDTD = dtdDescrToXml . dtdDescr . theSchema -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/Namespace.hs0000644000000000000000000004052512036750467017034 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Namespace Copyright : Copyright (C) 2005-2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable namespace specific arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Namespace ( attachNsEnv , cleanupNamespaces , collectNamespaceDecl , collectPrefixUriPairs , isNamespaceDeclAttr , getNamespaceDecl , processWithNsEnv , processWithNsEnvWithoutAttrl , propagateNamespaces , uniqueNamespaces , uniqueNamespacesFromDeclAndQNames , validateNamespaces ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Data.Maybe ( isNothing , fromJust ) import Data.List ( nub ) -- ------------------------------------------------------------ -- | test whether an attribute node contains an XML Namespace declaration isNamespaceDeclAttr :: ArrowXml a => a XmlTree XmlTree isNamespaceDeclAttr = fromLA $ (getAttrName >>> isA isNameSpaceName) `guards` this {-# INLINE isNamespaceDeclAttr #-} -- | get the namespace prefix and the namespace URI out of -- an attribute tree with a namespace declaration (see 'isNamespaceDeclAttr') -- for all other nodes this arrow fails getNamespaceDecl :: ArrowXml a => a XmlTree (String, String) getNamespaceDecl = fromLA $ isNamespaceDeclAttr >>> ( ( getAttrName >>> arr getNsPrefix ) &&& xshow getChildren ) where getNsPrefix = drop 6 . qualifiedName -- drop "xmlns:" -- ------------------------------------------------------------ -- | collect all namespace declarations contained in a document -- -- apply 'getNamespaceDecl' to a whole XmlTree collectNamespaceDecl :: LA XmlTree (String, String) collectNamespaceDecl = multi getAttrl >>> getNamespaceDecl -- | collect all (namePrefix, namespaceUri) pairs from a tree -- -- all qualified names are inspected, whether a namespace uri is defined, -- for these uris the prefix and uri is returned. This arrow is useful for -- namespace cleanup, e.g. for documents generated with XSLT. It can be used -- together with 'collectNamespaceDecl' to 'cleanupNamespaces' collectPrefixUriPairs :: LA XmlTree (String, String) collectPrefixUriPairs = multi (isElem <+> getAttrl <+> isPi) >>> getQName >>> arrL getPrefixUri where getPrefixUri :: QName -> [(String, String)] getPrefixUri n | null uri = [] | px == a_xmlns || px == a_xml = [] -- these ones are reserved an predefined | otherwise = [(namePrefix n, uri)] where uri = namespaceUri n px = namePrefix n -- ------------------------------------------------------------ -- | generate unique namespaces and add all namespace declarations to all top nodes containing a namespace declaration -- Usually the top node containing namespace declarations is the root node, but this isn't mandatory. -- -- Calls 'cleanupNamespaces' with 'collectNamespaceDecl' uniqueNamespaces :: ArrowXml a => a XmlTree XmlTree uniqueNamespaces = fromLA $ cleanupNamespaces' collectNamespaceDecl -- | generate unique namespaces and add all namespace declarations for all prefix-uri pairs in all qualified names -- -- useful for cleanup of namespaces in generated documents. -- Calls 'cleanupNamespaces' with @ collectNamespaceDecl \<+> collectPrefixUriPairs @ uniqueNamespacesFromDeclAndQNames :: ArrowXml a => a XmlTree XmlTree uniqueNamespacesFromDeclAndQNames = fromLA $ cleanupNamespaces' ( collectNamespaceDecl <+> collectPrefixUriPairs ) cleanupNamespaces' :: LA XmlTree (String, String) -> LA XmlTree XmlTree cleanupNamespaces' collectNamespaces = processTopDownUntil ( hasNamespaceDecl `guards` cleanupNamespaces collectNamespaces ) where hasNamespaceDecl = isElem >>> getAttrl >>> isNamespaceDeclAttr -- | does the real work for namespace cleanup. -- -- The parameter is used for collecting namespace uris and prefixes from the input tree cleanupNamespaces :: LA XmlTree (String, String) -> LA XmlTree XmlTree cleanupNamespaces collectNamespaces = renameNamespaces $< (listA collectNamespaces >>^ (toNsEnv >>> nub)) where renameNamespaces :: NsEnv -> LA XmlTree XmlTree renameNamespaces env = processBottomUp ( processAttrl ( ( none `when` isNamespaceDeclAttr ) -- remove all namespace declarations >>> changeQName renamePrefix -- update namespace prefix of attribute names, if namespace uri is set ) >>> changeQName renamePrefix -- update namespace prefix of element names ) >>> attachEnv env1 -- add all namespaces as attributes to the root node attribute list where renamePrefix :: QName -> QName renamePrefix n | isNullXName uri = n | isNothing newPx = n | otherwise = setNamePrefix' (fromJust newPx) n where uri = namespaceUri' n newPx = lookup uri revEnv1 revEnv1 = map (\ (x, y) -> (y, x)) env1 env1 :: NsEnv env1 = newEnv [] uris uris :: [XName] uris = nub . map snd $ env genPrefixes :: [XName] genPrefixes = map (newXName . ("ns" ++) . show) [(0::Int)..] newEnv :: NsEnv -> [XName] -> NsEnv newEnv env' [] = env' newEnv env' (uri:rest) = newEnv env'' rest where env'' = (prefix, uri) : env' prefix = head (filter notAlreadyUsed $ preferedPrefixes ++ genPrefixes) preferedPrefixes = map fst . filter ((==uri).snd) $ env notAlreadyUsed s = isNothing . lookup s $ env' -- ------------------------------------------------------------ -- | auxiliary arrow for processing with a namespace environment -- -- process a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. -- The namespace environment is implemented as a 'Data.AssocList.AssocList'. -- Processing of attributes can be controlled by a boolean parameter processWithNsEnv1 :: ArrowXml a => Bool -> (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnv1 withAttr f env = ifA isElem -- the test is just an optimization ( processWithExtendedEnv $< arr (extendEnv env) ) -- only element nodes contain namespace declarations ( processWithExtendedEnv env ) where processWithExtendedEnv env' = f env' -- apply the env filter >>> ( ( if withAttr then processAttrl (f env') -- apply the env to all attributes else this ) >>> processChildren (processWithNsEnv f env') -- apply the env recursively to all children ) `when` isElem -- attrl and children only need processing for elem nodes extendEnv :: NsEnv -> XmlTree -> NsEnv extendEnv env' t' = addEntries (toNsEnv newDecls) env' where newDecls = runLA ( getAttrl >>> getNamespaceDecl ) t' -- ------------------------------------------------------------ -- | process a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. -- -- The namespace environment is implemented as a 'Data.AssocList.AssocList' processWithNsEnv :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnv = processWithNsEnv1 True -- | process all element nodes of a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. Attribute lists are not processed. -- -- See also: 'processWithNsEnv' processWithNsEnvWithoutAttrl :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnvWithoutAttrl = processWithNsEnv1 False -- ----------------------------------------------------------------------------- -- | attach all valid namespace declarations to the attribute list of element nodes. -- -- This arrow is useful for document processing, that requires access to all namespace -- declarations at any element node, but which cannot be done with a simple 'processWithNsEnv'. attachNsEnv :: ArrowXml a => NsEnv -> a XmlTree XmlTree attachNsEnv initialEnv = fromLA $ processWithNsEnvWithoutAttrl attachEnv initialEnv where attachEnv :: NsEnv -> LA XmlTree XmlTree attachEnv env = ( processAttrl (none `when` isNamespaceDeclAttr) >>> addAttrl (catA nsAttrl) ) `when` isElem where nsAttrl :: [LA XmlTree XmlTree] nsAttrl = map nsDeclToAttr env nsDeclToAttr :: (XName, XName) -> LA XmlTree XmlTree nsDeclToAttr (n, uri) = mkAttr qn (txt (unXN uri)) where qn :: QName qn | isNullXName n = newQName xmlnsXName nullXName xmlnsNamespaceXName | otherwise = newQName n xmlnsXName xmlnsNamespaceXName -- ----------------------------------------------------------------------------- -- | -- propagate all namespace declarations \"xmlns:ns=...\" to all element and attribute nodes of a document. -- -- This arrow does not check for illegal use of namespaces. -- The real work is done by 'propagateNamespaceEnv'. -- -- The arrow may be applied repeatedly if neccessary. propagateNamespaces :: ArrowXml a => a XmlTree XmlTree propagateNamespaces = fromLA $ propagateNamespaceEnv [ (xmlXName, xmlNamespaceXName) , (xmlnsXName, xmlnsNamespaceXName) ] -- | -- attaches the namespace info given by the namespace table -- to a tag node and its attributes and children. propagateNamespaceEnv :: NsEnv -> LA XmlTree XmlTree propagateNamespaceEnv = processWithNsEnv addNamespaceUri where addNamespaceUri :: NsEnv -> LA XmlTree XmlTree addNamespaceUri env' = choiceA [ isElem :-> changeElemName (setNamespace env') , isAttr :-> attachNamespaceUriToAttr env' , isPi :-> changePiName (setNamespace env') , this :-> this ] attachNamespaceUriToAttr :: NsEnv -> LA XmlTree XmlTree attachNamespaceUriToAttr attrEnv = ( ( getQName >>> isA (not . null . namePrefix) ) `guards` changeAttrName (setNamespace attrEnv) ) `orElse` ( changeAttrName (const xmlnsQN) `when` hasName a_xmlns ) -- ----------------------------------------------------------------------------- -- | -- validate the namespace constraints in a whole tree. -- -- Result is the list of errors concerning namespaces. -- Predicates 'isWellformedQName', 'isWellformedQualifiedName', 'isDeclaredNamespace' -- and 'isWellformedNSDecl' are applied to the appropriate elements and attributes. validateNamespaces :: ArrowXml a => a XmlTree XmlTree validateNamespaces = fromLA validateNamespaces1 validateNamespaces1 :: LA XmlTree XmlTree validateNamespaces1 = choiceA [ isRoot :-> ( getChildren >>> validateNamespaces1 ) -- root is correct by definition , this :-> multi validate1Namespaces ] -- | -- a single node for namespace constrains. validate1Namespaces :: LA XmlTree XmlTree validate1Namespaces = choiceA [ isElem :-> catA [ ( getQName >>> isA ( not . isWellformedQName ) ) `guards` nsError (\ n -> "element name " ++ show n ++ " is not a wellformed qualified name" ) , ( getQName >>> isA ( not . isDeclaredNamespace ) ) `guards` nsError (\ n -> "namespace for prefix in element name " ++ show n ++ " is undefined" ) , doubleOcc $< ( (getAttrl >>> getUniversalName) >>. doubles ) , getAttrl >>> validate1Namespaces ] , isAttr :-> catA [ ( getQName >>> isA ( not . isWellformedQName ) ) `guards` nsError (\ n -> "attribute name " ++ show n ++ " is not a wellformed qualified name" ) , ( getQName >>> isA ( not . isDeclaredNamespace ) ) `guards` nsError (\ n -> "namespace for prefix in attribute name " ++ show n ++ " is undefined" ) , ( hasNamePrefix a_xmlns >>> xshow getChildren >>> isA null ) `guards` nsError (\ n -> "namespace value of namespace declaration for " ++ show n ++ " has no value" ) , ( getQName >>> isA (not . isWellformedNSDecl ) ) `guards` nsError (\ n -> "illegal namespace declaration for name " ++ show n ++ " starting with reserved prefix " ++ show "xml" ) ] , isDTD :-> catA [ isDTDDoctype <+> isDTDAttlist <+> isDTDElement <+> isDTDName >>> getDTDAttrValue a_name >>> ( isA (not . isWellformedQualifiedName) `guards` nsErr (\ n -> "a DTD part contains a not wellformed qualified Name: " ++ show n) ) , isDTDAttlist >>> getDTDAttrValue a_value >>> ( isA (not . isWellformedQualifiedName) `guards` nsErr (\ n -> "an ATTLIST declaration contains as attribute name a not wellformed qualified Name: " ++ show n) ) , isDTDEntity <+> isDTDPEntity <+> isDTDNotation >>> getDTDAttrValue a_name >>> ( isA (not . isNCName) `guards` nsErr (\ n -> "an entity or notation declaration contains a not wellformed NCName: " ++ show n) ) ] , isPi :-> catA [ getName >>> ( isA (not . isNCName) `guards` nsErr (\ n -> "a PI contains a not wellformed NCName: " ++ show n) ) ] ] where nsError :: (QName -> String) -> LA XmlTree XmlTree nsError msg = getQName >>> nsErr msg nsErr :: (a -> String) -> LA a XmlTree nsErr msg = arr msg >>> mkError c_err doubleOcc :: String -> LA XmlTree XmlTree doubleOcc an = nsError (\ n -> "multiple occurences of universal name for attributes of tag " ++ show n ++ " : " ++ show an ) -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/DTDProcessing.hs0000644000000000000000000004451412036750467017612 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DTDProcessing Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable DTD processing function for including external parts of a DTD parameter entity substitution and general entity substitution Implemtation completely done with arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DTDProcessing ( processDTD ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlDTDdecl , parseXmlDTDdeclPart , parseXmlDTDEntityValue , parseXmlDTDPart ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import Data.Maybe import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ -- data DTDPart = Internal | External deriving (Eq) type RecList = [String] type DTDStateArrow b c = IOStateArrow PEEnv b c -- ------------------------------------------------------------ newtype PEEnv = PEEnv (M.Map String XmlTree) emptyPeEnv :: PEEnv emptyPeEnv = PEEnv M.empty lookupPeEnv :: String -> PEEnv -> Maybe XmlTree lookupPeEnv k (PEEnv env) = M.lookup k env addPeEntry :: String -> XmlTree -> PEEnv -> PEEnv addPeEntry k a (PEEnv env) = PEEnv $ M.insert k a env getPeValue :: DTDStateArrow String XmlTree getPeValue = (this &&& getUserState) >>> arrL (\ (n, env) -> maybeToList . lookupPeEnv n $ env) addPe :: String -> DTDStateArrow XmlTree XmlTree addPe n = traceMsg 2 ("substParamEntity: add entity " ++ show n ++ " to env") >>> changeUserState ins where ins t peEnv = addPeEntry n t peEnv -- ------------------------------------------------------------ -- | -- a filter for DTD processing -- -- inclusion of external parts of DTD, -- parameter entity substitution -- conditional section evaluation -- -- input tree must represent a complete document including root node processDTD :: IOStateArrow s XmlTree XmlTree processDTD = runInLocalURIContext ( processRoot >>> traceTree >>> traceSource ) `when` ( isRoot >>> getChildren ) where processRoot :: IOStateArrow s XmlTree XmlTree processRoot = ( traceMsg 1 ("processDTD: process parameter entities") >>> setSysAttrString a_standalone "" >>> processChildren substParamEntities >>> setDocumentStatusFromSystemState "in XML DTD processing" >>> traceMsg 1 ("processDTD: parameter entities processed") ) `when` documentStatusOk substParamEntities :: IOStateArrow s XmlTree XmlTree substParamEntities = withOtherUserState emptyPeEnv processParamEntities `when` isDTDDoctype where processParamEntities :: DTDStateArrow XmlTree XmlTree processParamEntities = mergeEntities $<<< ( listA processPredef &&& listA processInt &&& listA (runInLocalURIContext processExt) ) where mergeEntities dtdPre dtdInt dtdExt = replaceChildren (arrL $ const $ foldl1 mergeDTDs [dtdPre, dtdInt, dtdExt]) processPredef = predefDTDPart >>> substParamEntity Internal [] processInt = getChildren >>> substParamEntity Internal [] processExt = externalDTDPart >>> substParamEntity External [] mergeDTDs :: XmlTrees -> XmlTrees -> XmlTrees mergeDTDs dtdInt dtdExt = dtdInt ++ (filter (filterDTDNodes dtdInt) dtdExt) filterDTDNodes :: XmlTrees -> XmlTree -> Bool filterDTDNodes dtdPart t = not (any (filterDTDNode t) dtdPart) filterDTDNode :: XmlTree -> XmlTree -> Bool filterDTDNode t1 t2 = fromMaybe False $ do dp1 <- XN.getDTDPart t1 dp2 <- XN.getDTDPart t2 al1 <- XN.getDTDAttrl t1 al2 <- XN.getDTDAttrl t2 return ( dp1 == dp2 && ( dp1 `elem` [ELEMENT, NOTATION, ENTITY, ATTLIST] ) && ( lookup a_name al1 == lookup a_name al2 ) && ( dp1 /= ATTLIST || lookup a_value al1 == lookup a_value al2 ) ) substParamEntity :: DTDPart -> RecList -> DTDStateArrow XmlTree XmlTree substParamEntity loc recList = choiceA [ isDTDEntity :-> ( traceDTD "ENTITY declaration before DTD declaration parsing" >>> processChildren (substPeRefsInDTDdecl recList) >>> parseXmlDTDdecl >>> substPeRefsInEntityValue >>> traceDTD "ENTITY declaration after PE substitution" >>> processEntityDecl >>> traceDTD "ENTITY declaration after DTD declaration parsing" ) , ( isDTDElement <+> isDTDAttlist <+> isDTDNotation ) :-> ( traceDTD "DTD declaration before PE substitution" >>> processChildren (substPeRefsInDTDdecl recList) >>> parseXmlDTDdecl >>> traceDTD "DTD declaration after DTD declaration parsing" ) , isDTDPERef :-> substPeRefsInDTDpart recList , isDTDCondSect :-> ( if loc == Internal then issueErr "conditional sections in internal part of the DTD is not allowed" else evalCondSect $< getDTDAttrValue a_value ) , isCmt :-> none , this :-> this ] where processEntityDecl :: DTDStateArrow XmlTree XmlTree processEntityDecl = choiceA [ isDTDEntity :-> ( ifA (hasDTDAttr k_system) processExternalEntity processInternalEntity ) , isDTDPEntity :-> ( processParamEntity $< getDTDAttrValue a_name ) , this :-> none ] where processExternalEntity :: DTDStateArrow XmlTree XmlTree -- processing external entities is delayed until first usage processExternalEntity -- only the current base uri must be remembered = setDTDAttrValue a_url $< ( getDTDAttrValue k_system >>> mkAbsURI ) processInternalEntity :: DTDStateArrow XmlTree XmlTree processInternalEntity = this -- everything is already done in substPeRefsInEntityValue processParamEntity :: String -> DTDStateArrow XmlTree XmlTree processParamEntity peName = ifA (constA peName >>> getPeValue) ( issueWarn ("parameter entity " ++ show peName ++ " already defined") >>> none -- second def must be ignored ) ( ( ifA ( hasDTDAttr k_system ) -- is external param entity ? ( setDTDAttrValue a_url $< -- store absolut url ( getDTDAttrValue k_system >>> mkAbsURI ) ) -- this is too early, pe may be not referenced and file may be not there -- ( runInLocalURIContext getExternalParamEntityValue ) ( this ) -- everything is already done in substPeRefsInEntityValue ) >>> addPe peName ) substPERef :: String -> DTDStateArrow XmlTree XmlTree substPERef pn = choiceA [ isUndefinedRef :-> issueErr ("parameter entity " ++ show pn ++ " not found (forward reference?)") , isInternalRef :-> issueErr ("a parameter entity reference of " ++ show pn ++ " occurs in the internal subset of the DTD") , isUnreadExternalRef :-> ( perform ( peVal -- load the external pe value >>> -- update the pe env getExternalParamEntityValue pn -- and try again >>> addPe pn ) >>> substPERef pn ) , this :-> substPE ] `when` isDTDPERef where peVal = constA pn >>> getPeValue isUnreadExternalRef = ( peVal >>> getDTDAttrValue a_url >>> isA (not . null) ) `guards` this isInternalRef = none -- isA (const (loc == Internal)) -- TODO: check this restriction, it seams rather meaningless isUndefinedRef = neg peVal substPE = replaceChildren (peVal >>> getChildren) -- store PE value in children component substPeRefsInEntityValue :: DTDStateArrow XmlTree XmlTree substPeRefsInEntityValue = ( ( replaceChildren ( xshow ( getChildren -- substitute char entites >>> -- and parameter references transfCharRef -- combine all pieces to a single string >>> -- as the new entity value substPeRefsInValue [] ) >>> mkText ) ) `whenNot` hasDTDAttr k_system -- only apply for internal entities ) `when` ( isDTDEntity <+> isDTDPEntity ) -- only apply for entity declarations substPeRefsInDTDpart :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInDTDpart rl = recursionCheck "DTD part" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInDTDdecl: before parseXmlDTDPart" >>> ( runInPeContext ( getChildren >>> ( (constA ("parameter entity: " ++ pn)) &&& this ) >>> parseXmlDTDPart >>> traceDTD "substPeRefsInDTDpart: after parseXmlDTDPart" >>> substParamEntity loc (pn : recl) ) `when` isDTDPERef ) substPeRefsInDTDdecl :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInDTDdecl rl = recursionCheck "DTD declaration" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInDTDdecl: before parseXmlDTDdeclPart" >>> ( runInPeContext ( parseXmlDTDdeclPart >>> traceDTD "substPeRefsInDTDdecl: after parseXmlDTDdeclPart" >>> processChildren ( substPeRefsInDTDdecl (pn : recl) ) ) `when` isDTDPERef ) substPeRefsInValue :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInValue rl = recursionCheck "entity value" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> parseXmlDTDEntityValue >>> -- transfCharRef this must be done somewhere else -- >>> substPeRefsInValue (pn : recl) substPeRefsInCondSect :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInCondSect rl = recursionCheck "conditional section" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInCondSect: parseXmlDTDdeclPart" >>> runInPeContext ( parseXmlDTDdeclPart >>> traceDTD "substPeRefsInCondSect: after parseXmlDTDdeclPart" >>> processChildren ( substPeRefsInCondSect (pn : recl) ) ) recursionCheck :: String -> RecList -> (RecList -> String -> DTDStateArrow XmlTree XmlTree) -> DTDStateArrow XmlTree XmlTree recursionCheck wher rl subst = ( recusiveSubst $< getDTDAttrValue a_peref ) `when` isDTDPERef where recusiveSubst name | name `elem` rl = issueErr ("recursive call of parameter entity " ++ show name ++ " in " ++ wher) | otherwise = subst rl name runInPeContext :: DTDStateArrow XmlTree XmlTree -> DTDStateArrow XmlTree XmlTree runInPeContext f = ( runWithNewBase $< getDTDAttrValue a_url ) `orElse` f where runWithNewBase base = runInLocalURIContext ( perform (constA base >>> setBaseURI) >>> f ) evalCondSect :: String -> DTDStateArrow XmlTree XmlTree evalCondSect content = traceDTD "evalCondSect: process conditional section" >>> processChildren (substPeRefsInCondSect []) >>> parseXmlDTDdecl >>> ( hasText (== k_include) `guards` ( ( constA "conditional section" &&& txt content ) >>> parseXmlDTDPart >>> traceMsg 2 "evalCond: include DTD part" >>> substParamEntity External recList ) ) predefDTDPart :: DTDStateArrow XmlTree XmlTree predefDTDPart = ( constA "predefined entities" &&& ( constA predefinedEntities >>> mkText) ) >>> parseXmlDTDPart where predefinedEntities :: String predefinedEntities = concat [ "" , "" , "" , "" , "" ] externalDTDPart :: DTDStateArrow XmlTree XmlTree externalDTDPart = isDTDDoctype `guards` ( hasDTDAttr k_system `guards` ( getExternalDTDPart $< getDTDAttrValue k_system ) ) getExternalDTDPart :: String -> DTDStateArrow XmlTree XmlTree getExternalDTDPart src = root [sattr a_source src] [] >>> getXmlEntityContents >>> replaceChildren ( ( constA src &&& getChildren ) >>> parseXmlDTDPart ) >>> traceDoc "processExternalDTD: parsing DTD part done" >>> getChildren getExternalParamEntityValue :: String -> DTDStateArrow XmlTree XmlTree getExternalParamEntityValue pn = isDTDPEntity `guards` ( setEntityValue $< ( listA ( getEntityValue $< getDTDAttrValue a_url ) ) ) where getEntityValue :: String -> DTDStateArrow XmlTree XmlTree getEntityValue url = root [sattr a_source url] [] >>> runInLocalURIContext getXmlEntityContents >>> traceMsg 2 ("getExternalParamEntityValue: contents read for " ++ show pn ++ " from " ++ show url) >>> getChildren setEntityValue :: XmlTrees -> DTDStateArrow XmlTree XmlTree setEntityValue res | null res = issueErr ("illegal external parameter entity value for entity %" ++ pn ++";") | otherwise = replaceChildren (constL res) >>> setDTDAttrValue a_url "" -- mark entity as read traceDTD :: String -> DTDStateArrow XmlTree XmlTree traceDTD msg = traceMsg 3 msg >>> traceTree -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/ProcessDocument.hs0000644000000000000000000002756712036750467020270 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ProcessDocument Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Compound arrows for reading, parsing, validating and writing XML documents All arrows use IO and a global state for options, errorhandling, ... -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ProcessDocument ( parseXmlDocument , parseXmlDocumentWithExpat , parseHtmlDocument , validateDocument , propagateAndValidateNamespaces , andValidateNamespaces , getDocumentContents ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow ( fromLA ) import Control.Arrow.NTreeEdit import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.ParserInterface ( parseXmlDoc , parseHtmlDoc ) import Text.XML.HXT.Arrow.Edit ( transfAllCharRef , substAllXHTMLEntityRefs ) import Text.XML.HXT.Arrow.GeneralEntitySubstitution ( processGeneralEntities ) import Text.XML.HXT.Arrow.DTDProcessing ( processDTD ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlContents ) import Text.XML.HXT.Arrow.Namespace ( propagateNamespaces , validateNamespaces ) import Text.XML.HXT.DTDValidation.Validation ( validate , getDTDSubset , generalEntitiesDefined , transform ) -- ------------------------------------------------------------ {- | XML parser Input tree must be a root tree with a text tree as child containing the document to be parsed. The parser generates from the input string a tree of a wellformed XML document, processes the DTD (parameter substitution, conditional DTD parts, ...) and substitutes all general entity references. Next step is character reference substitution. Last step is the document validation. Validation can be controlled by an extra parameter. Example: > parseXmlDocument True -- parse and validate document > > parseXmlDocument False -- only parse document, don't validate This parser is useful for applications processing correct XML documents. -} parseXmlDocument :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree parseXmlDocument validateD substDTD substHTML validateRX = ( replaceChildren ( ( getAttrValue a_source &&& xshow getChildren ) >>> parseXmlDoc >>> filterErrorMsg ) >>> setDocumentStatusFromSystemState "parse XML document" >>> ( ifA (fromLA getDTDSubset) ( processDTDandEntities >>> ( if validate' -- validation only possible if there is a DTD then validateDocument else this ) ) ( if validate' -- validation only consists of checking -- for undefined entity refs -- predefined XML entity refs are substituted -- in the XML parser into char refs -- so there is no need for an entity substitution then traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs" >>> perform checkUndefinedEntityRefs >>> traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs done" >>> setDocumentStatusFromSystemState "decoding document" else this ) ) ) `when` documentStatusOk where validate' = validateD && not validateRX processDTDandEntities = ( if validateD || substDTD then processDTD else this ) >>> ( if substDTD then ( processGeneralEntities -- DTD contains general entity definitions `when` fromLA generalEntitiesDefined ) else if substHTML then substAllXHTMLEntityRefs else this ) >>> transfAllCharRef checkUndefinedEntityRefs :: IOStateArrow s XmlTree XmlTree checkUndefinedEntityRefs = deep isEntityRef >>> getEntityRef >>> arr (\ en -> "general entity reference \"&" ++ en ++ ";\" is undefined") >>> mkError c_err >>> filterErrorMsg -- ------------------------------------------------------------ parseXmlDocumentWithExpat :: IOStateArrow s XmlTree XmlTree parseXmlDocumentWithExpat = ( withoutUserState $< getSysVar theExpatParser ) `when` documentStatusOk -- ------------------------------------------------------------ {- | HTML parser Input tree must be a root tree with a text tree as child containing the document to be parsed. The parser tries to parse everything as HTML, if the HTML document is not wellformed XML or if errors occur, warnings are generated. The warnings can be issued, or suppressed. Example: @ parseHtmlDocument True @ : parse document and issue warnings This parser is useful for applications like web crawlers, where the pages may contain arbitray errors, but the application is only interested in parts of the document, e.g. the plain text. -} parseHtmlDocument :: IOStateArrow s XmlTree XmlTree parseHtmlDocument = ( perform ( getAttrValue a_source >>> traceValue 1 (("parseHtmlDoc: parse HTML document " ++) . show) ) >>> ( parseHtml $< getSysVar (theTagSoup .&&&. theExpat) ) >>> ( removeWarnings $< getSysVar (theWarnings .&&&. theTagSoup) ) >>> setDocumentStatusFromSystemState "parse HTML document" >>> traceTree >>> traceSource >>> perform ( getAttrValue a_source >>> traceValue 1 (\ src -> "parse HTML document " ++ show src ++ " finished") ) ) `when` documentStatusOk where parseHtml (withTagSoup', withExpat') | withExpat' = withoutUserState $< getSysVar theExpatParser | withTagSoup' = withoutUserState $< getSysVar theTagSoupParser | otherwise = traceMsg 1 ("parse document with parsec HTML parser") >>> replaceChildren ( ( getAttrValue a_source -- get source name &&& xshow getChildren ) -- get string to be parsed >>> parseHtmlDoc -- run parser, entity substituion is done in parser ) removeWarnings (warnings, withTagSoup') | warnings = processTopDownWithAttrl -- remove warnings inserted by parser and entity subst filterErrorMsg | withTagSoup' = this -- warnings are not generated in tagsoup | otherwise = fromLA $ editNTreeA [isError :-> none] -- remove all warnings from document -- ------------------------------------------------------------ {- | Document validation Input must be a complete document tree. The document is validated with respect to the DTD spec. Only useful for XML documents containing a DTD. If the document is valid, it is transformed with respect to the DTD, normalization of attribute values, adding default values, sorting attributes by name,... If no error was found, result is the normalized tree, else the error status is set in the list of attributes of the root node \"\/\" and the document content is removed from the tree. -} validateDocument :: IOStateArrow s XmlTree XmlTree validateDocument = ( traceMsg 1 "validating document" >>> perform ( validateDoc >>> filterErrorMsg ) >>> setDocumentStatusFromSystemState "document validation" >>> traceMsg 1 "document validated, transforming doc with respect to DTD" >>> transformDoc >>> traceMsg 1 "document transformed" >>> traceSource >>> traceTree ) `when` documentStatusOk -- ------------------------------------------------------------ {- | Namespace propagation Input must be a complete document tree. The namespace declarations are evaluated and all element and attribute names are processed by splitting the name into prefix, local part and namespace URI. Naames are checked with respect to the XML namespace definition If no error was found, result is the unchanged input tree, else the error status is set in the list of attributes of the root node \"\/\" and the document content is removed from the tree. -} propagateAndValidateNamespaces :: IOStateArrow s XmlTree XmlTree propagateAndValidateNamespaces = ( traceMsg 1 "propagating namespaces" >>> propagateNamespaces >>> traceDoc "propagating namespaces done" >>> andValidateNamespaces ) `when` documentStatusOk andValidateNamespaces :: IOStateArrow s XmlTree XmlTree andValidateNamespaces = ( traceMsg 1 "validating namespaces" >>> ( setDocumentStatusFromSystemState "namespace propagation" `when` ( validateNamespaces >>> perform filterErrorMsg ) ) >>> traceMsg 1 "namespace validation finished" ) `when` documentStatusOk -- ------------------------------------------------------------ {- | creates a new document root, adds all options as attributes to the document root and calls 'getXmlContents'. If the document name is the empty string, the document will be read from standard input. For supported protocols see 'Text.XML.HXT.Arrow.DocumentInput.getXmlContents' -} getDocumentContents :: String -> IOStateArrow s b XmlTree getDocumentContents src = root [] [] >>> addAttr a_source src >>> traceMsg 1 ("readDocument: start processing document " ++ show src) >>> getXmlContents -- ------------------------------------------------------------ validateDoc :: ArrowList a => a XmlTree XmlTree validateDoc = fromLA ( validate `when` getDTDSubset -- validate only when DTD decl is present ) transformDoc :: ArrowList a => a XmlTree XmlTree transformDoc = fromLA transform -- ------------------------------------------------------------ hxt-9.3.1.1/src/Text/XML/HXT/Arrow/XmlState/0000755000000000000000000000000012036750467016337 5ustar0000000000000000hxt-9.3.1.1/src/Text/XML/HXT/Arrow/XmlState/SystemConfig.hs0000644000000000000000000002510412036750467021307 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.SystemConfig Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable system configuration and common options options -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.SystemConfig where import Control.Arrow import Data.Map ( insert ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------ -- config options -- | @withTace level@ : system option, set the trace level, (0..4) withTrace :: Int -> SysConfig withTrace = setS theTraceLevel -- | @withSysAttr key value@ : store an arbitarty key value pair in system state withSysAttr :: String -> String -> SysConfig withSysAttr n v = chgS theAttrList (addEntry n v) -- | Specify the set of accepted mime types. -- -- All contents of documents for which the mime type is not found in this list -- are discarded. withAcceptedMimeTypes :: [String] -> SysConfig withAcceptedMimeTypes = setS theAcceptedMimeTypes -- | Specify a content handler for documents of a given mime type withMimeTypeHandler :: String -> IOSArrow XmlTree XmlTree -> SysConfig withMimeTypeHandler mt pa = chgS theMimeTypeHandlers $ insert mt pa -- | @withMimeTypeFile filename@ : input option, -- set the mime type table for @file:@ documents by given file. -- The format of this config file must be in the syntax of a debian linux \"mime.types\" config file withMimeTypeFile :: String -> SysConfig withMimeTypeFile = setS theMimeTypeFile -- | Force a given mime type for all file contents. -- -- The mime type for file access will then not be computed by looking into a mime.types file withFileMimeType :: String -> SysConfig withFileMimeType = setS theFileMimeType -- | @withWarnings yes/no@ : system option, issue warnings during reading, HTML parsing and processing, -- default is 'yes' withWarnings :: Bool -> SysConfig withWarnings = setS theWarnings -- | @withErrors yes/no@ : system option for suppressing error messages, default is 'no' withErrors :: Bool -> SysConfig withErrors b = setS theErrorMsgHandler h where h | b = errorOutputToStderr | otherwise = const $ return () -- | @withRemoveWS yes/no@ : read and write option, remove all whitespace, used for document indentation, default is 'no' withRemoveWS :: Bool -> SysConfig withRemoveWS = setS theRemoveWS -- | @withPreserveComment yes/no@ : read option, preserve comments during canonicalization, default is 'no' withPreserveComment :: Bool -> SysConfig withPreserveComment = setS thePreserveComment -- | @withParseByMimeType yes/no@ : read option, select the parser by the mime type of the document -- (pulled out of the HTTP header). -- -- When the mime type is set to \"text\/html\" -- the configured HTML parser is taken, when it\'s set to -- \"text\/xml\" or \"text\/xhtml\" the configured XML parser is taken. -- If the mime type is something else, no further processing is performed, -- the contents is given back to the application in form of a single text node. -- If the default document encoding is set to isoLatin1, this even enables processing -- of arbitray binary data. withParseByMimeType :: Bool -> SysConfig withParseByMimeType = setS theParseByMimeType -- | @withParseHTML yes/no@: read option, use HTML parser, default is 'no' (use XML parser) withParseHTML :: Bool -> SysConfig withParseHTML = setS theParseHTML -- | @withValidate yes/no@: read option, validate document against DTD, default is 'yes' withValidate :: Bool -> SysConfig withValidate = setS theValidate -- | @withSubstDTDEntities yes/no@: read option, substitute general entities defined in DTD, default is 'yes'. -- switching this option and the validate option off can lead to faster parsing, because then -- there is no need to access the DTD withSubstDTDEntities :: Bool -> SysConfig withSubstDTDEntities = setS theSubstDTDEntities -- | @withSubstHTMLEntities yes/no@: read option, substitute general entities defined in HTML DTD, default is 'no'. -- switching this option on and the substDTDEntities and validate options off can lead to faster parsing -- because there is no need to access a DTD, but still the HTML general entities are substituted withSubstHTMLEntities :: Bool -> SysConfig withSubstHTMLEntities = setS theSubstHTMLEntities -- | @withCheckNamespaces yes/no@: read option, check namespaces, default is 'no' withCheckNamespaces :: Bool -> SysConfig withCheckNamespaces = setS theCheckNamespaces -- | @withCanonicalize yes/no@ : read option, canonicalize document, default is 'yes' withCanonicalize :: Bool -> SysConfig withCanonicalize = setS theCanonicalize -- | @withIgnoreNoneXmlContents yes\/no@ : input option, ignore document contents of none XML\/HTML documents. -- -- This option can be useful for implementing crawler like applications, e.g. an URL checker. -- In those cases net traffic can be reduced. withIgnoreNoneXmlContents :: Bool -> SysConfig withIgnoreNoneXmlContents = setS theIgnoreNoneXmlContents -- ------------------------------------------------------------ -- | @withStrictInput yes/no@ : input option, input of file and HTTP contents is read eagerly, default is 'no' withStrictInput :: Bool -> SysConfig withStrictInput = setS theStrictInput -- | @withEncodingErrors yes/no@ : input option, ignore all encoding errors, default is 'no' withEncodingErrors :: Bool -> SysConfig withEncodingErrors = setS theEncodingErrors -- | @withInputEncoding encodingName@ : input option -- -- Set default document encoding ('utf8', 'isoLatin1', 'usAscii', 'iso8859_2', ... , 'iso8859_16', ...). -- Only XML, HTML and text documents are decoded, -- default decoding for XML\/HTML is utf8, for text iso latin1 (no decoding). withInputEncoding :: String -> SysConfig withInputEncoding = setS theInputEncoding -- | @withDefaultBaseURI URI@ , input option, set the default base URI -- -- This option can be useful when parsing documents from stdin or contained in a string, and interpreting -- relative URIs within the document withDefaultBaseURI :: String -> SysConfig withDefaultBaseURI = setS theDefaultBaseURI withInputOption :: String -> String -> SysConfig withInputOption n v = chgS theInputOptions (addEntry n v) withInputOptions :: Attributes -> SysConfig withInputOptions = foldr (>>>) id . map (uncurry withInputOption) -- | @withRedirect yes/no@ : input option, automatically follow redirected URIs, default is 'yes' withRedirect :: Bool -> SysConfig withRedirect = setS theRedirect -- | @withProxy \"host:port\"@ : input option, configure a proxy for HTTP access, e.g. www-cache:3128 withProxy :: String -> SysConfig withProxy = setS theProxy -- ------------------------------------------------------------ -- | @withIndent yes/no@ : output option, indent document before output, default is 'no' withIndent :: Bool -> SysConfig withIndent = setS theIndent -- | @withOutputEncoding encoding@ , output option, -- default is the default input encoding or utf8, if input encoding is not set withOutputEncoding :: String -> SysConfig withOutputEncoding = setS theOutputEncoding -- | @withOutputXML@ : output option, default writing -- -- Default is writing XML: quote special XML chars \>,\<,\",\',& where neccessary, -- add XML processing instruction -- and encode document with respect to 'withOutputEncoding' withOutputXML :: SysConfig withOutputXML = setS theOutputFmt XMLoutput -- | Write XHTML: quote all special XML chars, use HTML entity refs or char refs for none ASCII chars withOutputHTML :: SysConfig withOutputHTML = setS theOutputFmt HTMLoutput -- | Write XML: quote only special XML chars, don't substitute chars by HTML entities, -- and don\'t generate empty elements for HTML elements, -- which may contain any contents, e.g. @@ instead of @