hxt-9.3.1.15/0000755000000000000000000000000012465166667010764 5ustar0000000000000000hxt-9.3.1.15/Setup.lhs0000644000000000000000000000015712465166667012577 0ustar0000000000000000#!/usr/bin/runhaskell > module Main where > import Distribution.Simple > main :: IO () > main = defaultMain hxt-9.3.1.15/hxt.cabal0000644000000000000000000002240112465166667012552 0ustar0000000000000000-- arch-tag: Haskell XML Toolbox main description file Name: hxt Version: 9.3.1.15 Synopsis: A collection of tools for processing XML with Haskell. Description: The Haskell XML Toolbox bases on the ideas of HaXml and HXML, but introduces a more general approach for processing XML with Haskell. The Haskell XML Toolbox uses a generic data model for representing XML documents, including the DTD subset and the document subset, in Haskell. It contains a validating XML parser, a HTML parser, namespace support, an XPath expression evaluator, an XSLT library, a RelaxNG schema validator and funtions for serialization and deserialization of user defined data. The library makes extensive use of the arrow approach for processing XML. Since version 9 the toolbox is partitioned into various (sub-)packages. This package contains the core functionality, hxt-curl, hxt-tagsoup, hxt-relaxng, hxt-xpath, hxt-xslt, hxt-regex-xmlschema contain the extensions. hxt-unicode contains encoding and decoding functions, hxt-charproperties char properties for unicode and XML. . Changes from 9.3.1.14: For ghc-7.10 network-uri is automatically selected . Changes from 9.3.1.13: ghc-7.10 compatibility . Changes from 9.3.1.12: Bug when unpickling an empty attribute value removed . Changes from 9.3.1.11: Bug fix in haddock comments . Changes from 9.3.1.10: Bug in DTD validation, space and time leak in delta removed . Changes from 9.3.1.9: lower bound of mtl dependency lowered to 2.0.1 . Changes from 9.3.1.8: Bug in hread removed . Changes from 9.3.1.7: Foldable and Traversable instances for NTree added Control.Except used instead of deprecated Control.Error . Changes from 9.3.1.6: canonicalize added in hread and hreadDoc . Changes from 9.3.1.4: conditionally (no default) dependency from networt changed to network-uri with flag "network-uri" . Changes from 9.3.1.3: warnings from ghc-7.8.1 removed . Changes from 9.3.1.2: https as protocol added . Changes from 9.3.1.1: new parser xreadDoc . Changes from 9.3.1.0: in readString all input decoding switched off . Changes from 9.3.0.1: lower bound for network set to be >= 2.4 . Changes from 9.3.0: upper bound for network set to be < 2.4 (URI signatures changed in 2.4) . Changes from 9.2.2: XMLSchema validation integrated . Changes from 9.2.1: user defined mime type handlers added . Changes from 9.2.0: New warnings from ghc-7.4 removed License: MIT License-file: LICENSE Author: Uwe Schmidt, Martin Schmidt, Torben Kuseler Maintainer: Uwe Schmidt Stability: Stable Category: XML Homepage: https://github.com/UweSchmidt/hxt Copyright: Copyright (c) 2005-2015 Uwe Schmidt Build-type: Simple Cabal-version: >=1.8 extra-source-files: examples/arrows/absurls/AbsURIs.hs examples/arrows/absurls/lousy.html examples/arrows/absurls/Makefile examples/arrows/absurls/ProcessDocument.hs examples/arrows/AGentleIntroductionToHXT/.ghci examples/arrows/AGentleIntroductionToHXT/Makefile examples/arrows/AGentleIntroductionToHXT/PicklerExample/Baseball.hs examples/arrows/AGentleIntroductionToHXT/PicklerExample/Makefile examples/arrows/AGentleIntroductionToHXT/PicklerExample/new-simple2.xml examples/arrows/AGentleIntroductionToHXT/PicklerExample/simple2.xml examples/arrows/AGentleIntroductionToHXT/SimpleExamples.hs examples/arrows/dtd2hxt/DTDtoHXT.hs examples/arrows/dtd2hxt/.ghci examples/arrows/dtd2hxt/Makefile examples/arrows/HelloWorld/bye.xml examples/arrows/HelloWorld/HelloWorld.hs examples/arrows/HelloWorld/hello.xml examples/arrows/HelloWorld/Makefile examples/arrows/HelloWorld/Mini.hs examples/arrows/hparser/emptyElements.html examples/arrows/hparser/example1.xml examples/arrows/hparser/example1CRLF.xml examples/arrows/hparser/HXmlParser.hs examples/arrows/hparser/invalid1.xml examples/arrows/hparser/invalid2.rng examples/arrows/hparser/invalid3.rng examples/arrows/hparser/invalid.xml examples/arrows/hparser/lousy.html examples/arrows/hparser/Makefile examples/arrows/hparser/namespace0.xml examples/arrows/hparser/namespace1.xml examples/arrows/hparser/valid1.rng examples/arrows/hparser/valid1.xml examples/arrows/performance/GenDoc.hs examples/arrows/performance/Makefile examples/arrows/pickle/Makefile examples/arrows/pickle/PickleTest.hs examples/xhtml/tmp.xml examples/xhtml/xhtml1-frameset.dtd examples/xhtml/xhtml1-strict.dtd examples/xhtml/xhtml1-transitional.dtd examples/xhtml/xhtml-lat1.ent examples/xhtml/xhtml-special.ent examples/xhtml/xhtml-symbol.ent examples/xhtml/xhtml.xml flag network-uri description: Get Network.URI from the network-uri package, with ghc < 7.10 default is False, with ghc >= 7.10 default is True default: False library exposed-modules: Control.Arrow.ArrowExc, Control.Arrow.ArrowIO, Control.Arrow.ArrowIf, Control.Arrow.ArrowList, Control.Arrow.ArrowNF, Control.Arrow.ArrowNavigatableTree, Control.Arrow.ArrowState, Control.Arrow.ArrowTree, Control.Arrow.IOListArrow, Control.Arrow.IOStateListArrow, Control.Arrow.ListArrow, Control.Arrow.ListArrows, Control.Arrow.NTreeEdit, Control.Arrow.StateListArrow, Control.FlatSeq, Data.AssocList, Data.Atom, Data.Function.Selector, Data.Tree.Class, Data.Tree.NTree.TypeDefs, Data.Tree.NTree.Edit, Data.Tree.NTree.Zipper.TypeDefs, Data.Tree.NavigatableTree.Class, Data.Tree.NavigatableTree.XPathAxis, Text.XML.HXT.Arrow.Binary, Text.XML.HXT.Arrow.DTDProcessing, Text.XML.HXT.Arrow.DocumentInput, Text.XML.HXT.Arrow.DocumentOutput, Text.XML.HXT.Arrow.Edit, Text.XML.HXT.Arrow.GeneralEntitySubstitution, Text.XML.HXT.Arrow.Namespace, Text.XML.HXT.Arrow.ParserInterface, Text.XML.HXT.Arrow.Pickle, Text.XML.HXT.Arrow.Pickle.DTD, Text.XML.HXT.Arrow.Pickle.Schema, Text.XML.HXT.Arrow.Pickle.Xml, Text.XML.HXT.Arrow.ProcessDocument, Text.XML.HXT.Arrow.ReadDocument, Text.XML.HXT.Arrow.WriteDocument, Text.XML.HXT.Arrow.XmlArrow, Text.XML.HXT.Arrow.XmlOptions, Text.XML.HXT.Arrow.XmlRegex, Text.XML.HXT.Arrow.XmlState, Text.XML.HXT.Arrow.XmlState.ErrorHandling, Text.XML.HXT.Arrow.XmlState.MimeTypeTable, Text.XML.HXT.Arrow.XmlState.RunIOStateArrow, Text.XML.HXT.Arrow.XmlState.TraceHandling, Text.XML.HXT.Arrow.XmlState.TypeDefs, Text.XML.HXT.Arrow.XmlState.URIHandling, Text.XML.HXT.Arrow.XmlState.SystemConfig, Text.XML.HXT.Core, Text.XML.HXT.DOM.FormatXmlTree, Text.XML.HXT.DOM.Interface, Text.XML.HXT.DOM.MimeTypeDefaults, Text.XML.HXT.DOM.MimeTypes, Text.XML.HXT.DOM.QualifiedName, Text.XML.HXT.DOM.ShowXml, Text.XML.HXT.DOM.TypeDefs, Text.XML.HXT.DOM.Util, Text.XML.HXT.DOM.XmlKeywords, Text.XML.HXT.DOM.XmlNode, Text.XML.HXT.DTDValidation.AttributeValueValidation, Text.XML.HXT.DTDValidation.DTDValidation, Text.XML.HXT.DTDValidation.DocTransformation, Text.XML.HXT.DTDValidation.DocValidation, Text.XML.HXT.DTDValidation.IdValidation, Text.XML.HXT.DTDValidation.RE, Text.XML.HXT.DTDValidation.TypeDefs, Text.XML.HXT.DTDValidation.Validation, Text.XML.HXT.DTDValidation.XmlRE, Text.XML.HXT.IO.GetFILE, Text.XML.HXT.Parser.HtmlParsec, Text.XML.HXT.Parser.ProtocolHandlerUtil, Text.XML.HXT.Parser.XhtmlEntities, Text.XML.HXT.Parser.XmlCharParser, Text.XML.HXT.Parser.XmlDTDParser, Text.XML.HXT.Parser.XmlDTDTokenParser, Text.XML.HXT.Parser.XmlEntities, Text.XML.HXT.Parser.XmlParsec, Text.XML.HXT.Parser.XmlTokenParser, Text.XML.HXT.XMLSchema.DataTypeLibW3CNames, Text.XML.HXT.Version hs-source-dirs: src ghc-options: -Wall ghc-prof-options: -caf-all extensions: MultiParamTypeClasses DeriveDataTypeable FunctionalDependencies FlexibleInstances CPP build-depends: base >= 4 && < 5, containers >= 0.2 && < 1, directory >= 1 && < 2, filepath >= 1 && < 2, parsec >= 2.1 && < 4, HUnit >= 1.2 && < 2, mtl >= 2.0.1 && < 3, deepseq >= 1.1 && < 2, bytestring >= 0.9 && < 1, binary >= 0.5 && < 1, hxt-charproperties >= 9.1 && < 10, hxt-unicode >= 9.0.1 && < 10, hxt-regex-xmlschema >= 9.2 && < 10 if flag(network-uri) build-depends: network-uri >= 2.6 else if impl(ghc >= 7.10) build-depends: network-uri >= 2.6 else build-depends: network >= 2.4 && < 2.6 Source-Repository head Type: git Location: git://github.com/UweSchmidt/hxt.git hxt-9.3.1.15/LICENSE0000644000000000000000000000212012465166667011764 0ustar0000000000000000The MIT License Copyright (c) 2005 Uwe Schmidt, Martin Schmidt, Torben Kuseler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hxt-9.3.1.15/examples/0000755000000000000000000000000012465166667012602 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/0000755000000000000000000000000012465166667014117 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/HelloWorld/0000755000000000000000000000000012465166667016172 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/HelloWorld/hello.xml0000644000000000000000000000006612465166667020021 0ustar0000000000000000 Hello World! hxt-9.3.1.15/examples/arrows/HelloWorld/HelloWorld.hs0000644000000000000000000000057712465166667020612 0ustar0000000000000000module Main where import Text.XML.HXT.Core import System.Exit main :: IO() main = do [rc] <- runX ( readDocument [ withTrace 1 , withValidate no ] "hello.xml" >>> writeDocument [ withOutputEncoding utf8 ] "-" >>> getErrStatus ) exitWith ( if rc >= c_err then ExitFailure 1 else ExitSuccess ) hxt-9.3.1.15/examples/arrows/HelloWorld/Makefile0000644000000000000000000000124012465166667017627 0ustar0000000000000000# $Id: Makefile,v 1.3 2005/04/14 12:52:50 hxml Exp $ # # hello world application of Haskell XML Toolbox HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -Wall -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/HelloWorld all : mini hello force : $(MAKE) distclean all test : ./mini ./hello dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp Mini.hs HelloWorld.hs hello.xml Makefile $(DIST_DIR) clean : rm -f *.o *.hi distclean : $(MAKE) clean rm -f mini hello .PHONY : all test dist clean distclean force hello : HelloWorld.hs $(GHC) --make -o $@ $< mini : Mini.hs $(GHC) --make -o $@ $< hxt-9.3.1.15/examples/arrows/HelloWorld/Mini.hs0000644000000000000000000000037012465166667017422 0ustar0000000000000000module Main where import Text.XML.HXT.Core main :: IO() main = runX ( configSysVars [ withTrace 1 ] >>> readDocument [ withValidate no ] "hello.xml" >>> writeDocument [ ] "bye.xml" ) >> return () hxt-9.3.1.15/examples/arrows/HelloWorld/bye.xml0000644000000000000000000000010712465166667017471 0ustar0000000000000000 Hello World! hxt-9.3.1.15/examples/arrows/absurls/0000755000000000000000000000000012465166667015572 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/absurls/lousy.html0000644000000000000000000000120612465166667017632 0ustar0000000000000000 A HTML Documents with some Errors

HTML

html

some text and a &xxx;

more text

123

Uwe Schmidt
Last modified: Mon May 12 13:11:29 CEST 2003 hxt-9.3.1.15/examples/arrows/absurls/AbsURIs.hs0000644000000000000000000000725112465166667017403 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : AbsURIs Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt Maintainer : uwe@fh-wedel.de Stability : experimental Portability: portable AbsURIs - Conversion references into absolute URIs in HTML pages The commandline interface -} -- ------------------------------------------------------------ module Main where import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML import System.IO -- import the IO and commandline option stuff import System.Environment import System.Console.GetOpt import System.Exit import ProcessDocument -- ------------------------------------------------------------ -- | -- the main program of the Haskell XML Validating Parser main :: IO () main = do argv <- getArgs -- get the commandline arguments (al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list [rc] <- runX (parser al src) -- run the parser arrow exitProg (rc >= c_err) -- set return code and terminate -- ------------------------------------------------------------ exitProg :: Bool -> IO a exitProg True = exitWith (ExitFailure 1) exitProg False = exitWith ExitSuccess -- ------------------------------------------------------------ -- | -- the /real/ main program -- -- get wellformed document, validates document, propagates and check namespaces -- and controls output parser :: SysConfigList -> String -> IOSArrow b Int parser config src = configSysVars config -- set all global config options >>> readDocument [withParseHTML yes] src -- use HTML parser >>> traceMsg 1 "start processing" >>> processDocument >>> traceMsg 1 "processing finished" >>> traceSource >>> traceTree >>> ( writeDocument [] $< getSysAttr "output-file" ) >>> getErrStatus -- ------------------------------------------------------------ -- -- the options definition part -- see doc for System.Console.GetOpt progName :: String progName = "AbsURIs" options :: [OptDescr SysConfig] options = generalOptions ++ inputOptions ++ [ Option "f" ["output-file"] (ReqArg (withSysAttr "output-file") "FILE") "output file for resulting document (default: stdout)" ] ++ outputOptions ++ showOptions usage :: [String] -> IO a usage errl | null errl = do hPutStrLn stdout use exitProg False | otherwise = do hPutStrLn stderr (concat errl ++ "\n" ++ use) exitProg True where header = progName ++ " - Convert all references in an HTML document into absolute URIs\n\n" ++ "Usage: " ++ progName ++ " [OPTION...] [URI or FILE]" use = usageInfo header options cmdlineOpts :: [String] -> IO (SysConfigList, String) cmdlineOpts argv = case (getOpt Permute options argv) of (scfg,n,[]) -> do sa <- src n help (getConfigAttr a_help scfg) sa return (scfg, sa) (_,_,errs) -> usage errs where src [] = return [] src [uri] = return uri src _ = usage ["only one input uri or file allowed\n"] help "1" _ = usage [] help _ [] = usage ["no input uri or file given\n"] help _ _ = return () -- ------------------------------------------------------------ hxt-9.3.1.15/examples/arrows/absurls/Makefile0000644000000000000000000000170012465166667017230 0ustar0000000000000000# $Id: Makefile,v 1.1 2005/05/12 16:41:38 hxml Exp $ HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -Wall -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/absurls prog = ./AbsURIs all : $(prog) AbsURIs : AbsURIs.hs ProcessDocument.hs $(GHC) --make -o $@ $< force : $(GHC) --make -o $(prog) $(prog).hs test : $(prog) @echo "===> run a few simple test cases" $(MAKE) test0 EX = ./lousy.html test0 : @echo "===> the source of a lousy html document" ; echo ; sleep 2 cat $(EX) @sleep 2 ; echo ; echo "===> all refs (href, src attributes) are transformed into absolute URIs with respect to the base element" ; echo ; sleep 2 $(prog) --trace=0 --encoding=ISO-8859-1 --output-encoding=ISO-8859-1 --indent --do-not-issue-warnings $(EX) @echo dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp $(EX) Makefile $(prog).hs ProcessDocument.hs $(DIST_DIR) clean : rm -f $(prog) *.o *.hi hxt-9.3.1.15/examples/arrows/absurls/ProcessDocument.hs0000644000000000000000000000441612465166667021250 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : ProcessDocument Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt Maintainer : uwe@fh-wedel.de Stability : experimental Portability: portable AbsURIs - Conversion references into absolute URIs in HTML pages The REAL processing functions -} -- ------------------------------------------------------------ module ProcessDocument ( processDocument ) where import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML import Data.Maybe -- ------------------------------------------------------------ -- simple example of a processing arrow processDocument :: IOSArrow XmlTree XmlTree processDocument = processChildren (mkAbs `when` isElem) where mkAbs = mkAbsURIs $< compBase compBase :: IOSArrow XmlTree String compBase = single searchBaseElem -- search in element (only for wrong input: make the arrow deterministic) `orElse` getBaseURI -- use document base where searchBaseElem = hasName "html" >>> getChildren >>> hasName "head" >>> getChildren >>> hasName "base" >>> getAttrValue "href" >>> mkAbsURI mkAbsURIs :: String -> IOSArrow XmlTree XmlTree mkAbsURIs base = processTopDown editURIs -- edit all refs in documnt where -- build the edit filter from the list of element-attribute names editURIs = seqA . map (uncurry mkAbs) $ hrefAttrs -- HTML elements and attributes, that contain references (possibly not yet complete) hrefAttrs = [ ("a", "href" ) , ("img", "src" ) , ("frame", "src" ) , ("iframe", "src" ) , ("link", "href" ) , ("script", "src" ) ] -- change the reference in attribute attrName of element elemName mkAbs elemName attrName = processAttrl ( changeAttrValue (mkAbsURIString base) `when` hasName attrName ) `when` hasName elemName -- | compute an absolute URI, if not possible leave URI unchanged mkAbsURIString :: String -> String -> String mkAbsURIString base uri = fromMaybe uri . expandURIString uri $ base -- ------------------------------------------------------------ hxt-9.3.1.15/examples/arrows/performance/0000755000000000000000000000000012465166667016420 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/performance/GenDoc.hs0000644000000000000000000003130212465166667020112 0ustar0000000000000000{-# LANGUAGE BangPatterns#-} {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} -- ---------------------------------------- module Main where import Text.XML.HXT.Core hiding (trace) -- import Text.XML.HXT.TagSoup -- import Text.XML.HXT.Expat import Data.Char (isDigit) import Data.List (foldl') import Data.String.Unicode ( unicodeToXmlEntity ) import Control.Monad.State.Strict hiding (when) import Control.DeepSeq import Control.FlatSeq import Data.Maybe import System.IO hiding (utf8) -- import the IO and commandline option stuff import System.Environment import qualified Text.XML.HXT.DOM.XmlNode as XN import qualified Data.Tree.Class as T import Data.Tree.NTree.TypeDefs -- as T import Debug.Trace -- ------------------------------------------------------------ main :: IO () main = do p <- getProgName (is : _) <- getArgs let i = (read is)::Int main' p i where main' p' = fromMaybe main0 . lookup (pn p') $ mpt mpt = [ ("GenDoc", main1) , ("ReadDoc", main2) , ("PruneRight", main3 False) , ("PruneLeft", main3 True) , ("MemTest", main4 True) , ("MemTest1", main4 False) ] -- ---------------------------------------- -- generate a document containing a binary tree of 2^i leafs (= 2^(i-1) XML elements) main1 :: Int -> IO () main1 i = runX (genDoc i (fn i)) >> return () -- ---------------------------------------- -- read a document containing a binary tree of 2^i leafs main2 :: Int -> IO () main2 i = do [x] <- runX (setTraceLevel 2 >>> readDoc (fn i) >>> {- traceMsg 1 "start rnfA" >>> rnfA this >>> -} traceMsg 1 "start unpickle" >>> unpickleTree >>> traceMsg 1 "start fold" >>> arr (foldT1 max) ) putStrLn ( "maximum value in tree is " ++ show x ++ ", expected value was " ++ show ((2::Int)^i) ) -- ---------------------------------------- -- test on lazyness, is the whole tree read or only the first child of every child node? main3 :: Bool -> Int -> IO () main3 l i = do [t] <- runX ( readDoc (fn i) >>> fromLA (xshow ( getChildren >>> if l then pruneForkLeft else pruneForkRight ) ) ) putStrLn ("pruned binary tree is : " ++ show t) -- ---------------------------------------- main4 :: Bool -> Int -> IO () main4 wnf i = do [x] <- runX ( setTraceLevel 1 >>> traceMsg 1 ("generate tree of depth " ++ show i) >>> ( if wnf then fromLA (genTree'' 0 i) else fromLA (rnfA $ genTree 0 i) ) >>> perform ( traceMsg 1 ("deep hasAttrValue") >>> deep (hasName "leaf" >>> hasAttrValue "value" (== "1")) >>> getAttrValue "value" >>> arrIO putStrLn ) >>> perform ( traceMsg 1 ("deep hasAttrValue") >>> deep (hasName "leaf" >>> hasAttrValue "value" (== "1")) >>> getAttrValue "value" >>> arrIO putStrLn ) {- perform (traceMsg 1 ("write doc") >>> putDoc "./tmp.xml" ) >>> traceMsg 1 ("compute maximum and minimum") >>> fromLA ( foldBTree maximum &&& foldBTree minimum ) -- 2 traversals: complete tree in mem >>> arr2 (\ ma mi -> "maximum value = " ++ show ma ++ ", minimum = " ++ show mi ) >>> traceValue 1 id -} >>> traceMsg 1 "done" >>> constA "0" ) putStrLn x foldBTree :: ([Int] -> Int) -> LA XmlTree Int foldBTree f = choiceA [ hasName "leaf" :-> ( getAttrValue "value" >>^ read ) , hasName "fork" :-> ( (getChildren >>> foldBTree f) >. f ) , this :-> none ] -- ---------------------------------------- -- just to check how much memory is used for the tree main0 :: Int -> IO () main0 i = do let t = mkBTree i let m = show . foldT1 max $ t putStrLn ("maximum value in tree is " ++ m ++ ", minimum value is " ++ show (foldT1 min t)) return () -- ---------------------------------------- pn :: String -> String pn = reverse . takeWhile (/= '/') . reverse fn :: Int -> String fn = ("tree-" ++) . (++ ".xml") . reverse . take 4 . reverse . ((replicate 4 '0') ++ ) . show -- ---------------------------------------- genTree :: Int -> Int -> LA XmlTree XmlTree genTree !n !d | d == 0 = aelem "leaf" [sattr "value" (show (n + 1))] | otherwise = selem "fork" [ genTree (2*n) (d-1) , genTree (2*n+1) (d-1) ] -- ---------------------------------------- genTree' :: Int -> Int -> LA XmlTree XmlTree genTree' !n !d | d == 0 = aelem' "leaf" [sattr' "value" (show (n + 1))] | otherwise = selem' "fork" [ genTree' (2*n) (d-1) , genTree' (2*n+1) (d-1) ] -- ---------------------------------------- genTree'' :: Int -> Int -> LA XmlTree XmlTree genTree'' !n !d | d == 0 = rwnfA $ -- trace ("+leaf " ++ show (n+1)) $ aelem "leaf" $ -- trace ("++leaf " ++ show (n+1)) $ [rwnf2A $ -- trace ("+valu " ++ show (n+1)) $ sattr "value" $ show $ -- trace ("++valu " ++ show (n+1)) $ (n + 1) ] | otherwise = rwnfA $ -- trace ("+fork " ++ show (n+1)) $ selem "fork" $ -- trace ("++fork " ++ show (n+1)) $ [ genTree'' (2*n) (d-1) , genTree'' (2*n+1) (d-1) ] -- ---------------------------------------- -- (hopefully) strict constructors mkText' s = rwnf s `seq` T.mkLeaf tn where tn = XN.mkText s mkAttr' n al = n `seq` rwnf al `seq` T.mkTree an al where an = XN.mkAttrNode n mkElem' n al cl = n `seq` en `seq` rwnf al `seq` rwnf cl `seq` T.mkTree en cl where en = XN.mkElementNode n al -- ---------------------------------------- -- strict arrows mkElement' :: String -> LA n XmlTree -> LA n XmlTree -> LA n XmlTree mkElement' n af cf = (listA af &&& listA cf) >>> arr2 (mkElem' (mkName n)) selem' :: String -> [LA n XmlTree] -> LA n XmlTree selem' n cfs = mkElement' n none (catA cfs) aelem' :: String -> [LA n XmlTree] -> LA n XmlTree aelem' n afs = mkElement' n (catA afs) none sattr' :: String -> String -> LA n XmlTree sattr' an av = constA (mkAttr' (mkName an) [mkText' av]) -- ---------------------------------------- genDoc :: Int -> String -> IOSArrow b XmlTree genDoc d out = constA (let t = mkBTree d in rnf t `seq` t) >>> xpickleVal xpickle >>> {- strictA >>> perform (writeBinaryValue (out ++ ".bin")) >>> readBinaryValue (out ++ ".bin") >>> strictA >>> -} putDoc out -- ---------------------------------------- readDoc :: String -> IOSArrow b XmlTree readDoc src = readDocument [ withParseHTML no , withTrace 2 , withValidate no , withInputEncoding isoLatin1 , withWarnings yes , withStrictInput no , withCanonicalize yes , withRemoveWS no -- , withExpat yes -- , withTagSoup ] src {- >>> perform ( writeDocument [ withShowTree yes , withOutputHTML ] "" ) >>> perform ( writeDocument [ withShowTree no , withOutputHTML ] "" ) -} -- ---------------------------------------- unpickleTree :: ArrowXml a => a XmlTree BTree unpickleTree = xunpickleVal xpickle -- ---------------------------------------- pruneForkRight :: LA XmlTree XmlTree pruneForkRight = ( replaceChildren ( ( getChildren >>. take 1 ) >>> pruneForkRight ) ) `when` (hasName "fork") pruneForkLeft :: LA XmlTree XmlTree pruneForkLeft = ( replaceChildren ( ( getChildren >>. drop 1 ) >>> pruneForkLeft ) ) `when` (hasName "fork") -- ---------------------------------------- type Counter a = State Int a incr :: Counter Int incr = do modify (+1) get -- ---------------------------------------- data BTree = Leaf Int | Fork BTree BTree deriving (Show) instance NFData BTree where rnf (Leaf i) = rnf i rnf (Fork t1 t2) = rnf t1 `seq` rnf t2 instance XmlPickler BTree where xpickle = xpAlt tag ps where tag (Leaf _ ) = 0 tag (Fork _ _ ) = 1 ps = [ xpWrap ( Leaf, \ (Leaf i) -> i) ( xpElem "leaf" $ xpAttr "value" $ xpInt ) -- xpWrap (const 0, const ">&\"äöü") $ xpText ) , xpWrap ( uncurry Fork, \ (Fork l r) -> (l, r)) ( xpElem "fork" $ xpPair xpickle xpickle ) ] -- ---------------------------------------- mkBTree :: Int -> BTree mkBTree depth = evalState (mkT depth) 0 where mkT :: Int -> Counter BTree mkT 0 = do i <- incr return (Leaf i) mkT n = do l <- mkT (n-1) r <- mkT (n-1) return (Fork l r) bTreeToNTree :: BTree -> NTree Int bTreeToNTree (Leaf i) = NTree i [] bTreeToNTree (Fork l r) = NTree j [l',r'] where l' = bTreeToNTree l r' = bTreeToNTree r j = T.getNode l' + T.getNode r' mkNTree = bTreeToNTree . mkBTree -- ---------------------------------------- foldT1 :: (Int -> Int -> Int) -> BTree -> Int foldT1 _ (Leaf v) = v foldT1 op (Fork l r) = foldT1 op l `op` foldT1 op r -- ---------------------------------------- -- output is done with low level ops to write the -- document i a lazy manner -- adding an xml pi and encoding is done "by hand" -- latin1 decoding is the identity, so please generate the -- docs with latin1 encoding. Here ist done even with ASCCI -- every none ASCII char is represented by a char ref (&nnn;) putDoc :: String -> IOStateArrow s XmlTree XmlTree putDoc dst = writeDocument [ withOutputEncoding isoLatin1 , withOutputXML ] dst -- ---------------------------------------- {- putDoc :: String -> IOStateArrow s XmlTree XmlTree putDoc dst = addXmlPi >>> addXmlPiEncoding isoLatin1 >>> xshow getChildren >>> arr unicodeToXmlEntity >>> arrIO (\ s -> hPutDocument (\h -> hPutStrLn h s)) >>> none where isStdout = null dst || dst == "-" hPutDocument :: (Handle -> IO()) -> IO() hPutDocument action | isStdout = action stdout | otherwise = do handle <- openBinaryFile dst WriteMode action handle hClose handle -} -- ---------------------------------------- hxt-9.3.1.15/examples/arrows/performance/Makefile0000644000000000000000000000513512465166667020064 0ustar0000000000000000# $Id: Makefile,v 1.9 2006/11/11 15:36:03 hxml Exp $ HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -Wall -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/performance CNT = 3 ropts = +RTS -s -RTS popts = +RTS -P -hy -RTS prog = ./GenDoc prog2 = ./ReadDoc prog3 = ./PruneRight prog4 = ./PruneLeft prog5 = ./MemTest prog6 = ./MemTest1 progs = $(prog) $(prog2) $(prog3) $(prog4) $(prog5) $(prog6) all : $(progs) prof : $(MAKE) all PKGFLAGS="-prof -auto-all -caf-all" $(prog) : $(prog).hs $(GHC) --make -o $@ $< local : $(GHC) --make -o $(prog) -fglasgow-exts -ignore-package hxt -i../../../src $(prog).hs $(prog2) : $(prog) ln -f $(prog) $(prog2) $(prog3) : $(prog) ln -f $(prog) $(prog3) $(prog4) : $(prog) ln -f $(prog) $(prog4) $(prog5) : $(prog) ln -f $(prog) $(prog5) $(prog6) : $(prog) ln -f $(prog) $(prog6) # generate and read documents containing a binary tree # with 2^i leaf nodes containing the numbers 1 to 2^i # for i up to at least 22 (8M XML elements) output works fine # for i up to 19 (1M XML elements) input works without swapping # with i=20 swapping starts, but the program it still terminates # the size of the XML file for i=20 is about 36Mb # these tests have run on a box with 1Gb memory tests = 2 3 10 11 12 ptests = 16 test : $(prog) $(MAKE) genfiles tests="$(tests)" $(MAKE) readfiles tests="$(tests)" $(MAKE) pruneright tests="$(tests)" $(MAKE) pruneleft tests="$(tests)" perftest : $(prog) $(MAKE) test tests="2 3 10 11 12 13 14 15 16 17 18 19 20" pgenfiles : rm -f $(prog).aux $(prog).hp $(prog).ps $(prog).prof $(MAKE) genfiles ropts="$(popts)" tests=$(ptests) hp2ps -c $(prog).hp preadfiles : $(MAKE) readfiles ropts="$(popts)" tests=$(ptests) hp2ps -c $(prog2).hp genfiles : @for i in $(tests) ; \ do \ echo time $(prog) $(ropts) $$i ; \ time $(prog) $(ropts) $$i ; \ ls -l tree-*$$i.xml ; \ echo ; \ done readfiles : @for i in $(tests) ; \ do \ echo time $(prog2) $(ropts) $$i ; \ time $(prog2) $(ropts) $$i ; \ echo ; \ done pruneright : @for i in $(tests) ; \ do \ echo time $(prog3) $(ropts) $$i ; \ time $(prog3) $(ropts) $$i ; \ echo ; \ done pruneleft : @for i in $(tests) ; \ do \ echo time $(prog4) $(ropts) $$i ; \ time $(prog4) $(ropts) $$i ; \ echo ; \ done memtest : @for i in $(tests) ; \ do \ echo time $(prog5) $(ropts) $$i ; \ time $(prog5) $(ropts) $$i ; \ echo ; \ done dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp Makefile GenDoc.hs $(DIST_DIR) clean : rm -f $(progs) *.o *.hi *.xml hxt-9.3.1.15/examples/arrows/hparser/0000755000000000000000000000000012465166667015563 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/hparser/example1.xml0000644000000000000000000000071012465166667020017 0ustar0000000000000000 ]> hello world äöüß test hxt-9.3.1.15/examples/arrows/hparser/invalid3.rng0000644000000000000000000000070012465166667020001 0ustar0000000000000000 hxt-9.3.1.15/examples/arrows/hparser/lousy.html0000644000000000000000000000150212465166667017622 0ustar0000000000000000 A HTML Documents with some Errors

HTML

html

some text and a &xxx;

more text ' ' ' &yyy &unknown; A B A Ä

123
xxx <&"' end CDATA]]>
Ä Ö Ü A
Uwe Schmidt
Last modified: Thu Feb 3 13:27:19 CET 2011 ]> <:tag/><:/><:::/> hxt-9.3.1.15/examples/arrows/hparser/example1CRLF.xml0000644000000000000000000000073612465166667020476 0ustar0000000000000000 ]> hello world äöüß test hxt-9.3.1.15/examples/arrows/hparser/HXmlParser.hs0000644000000000000000000001227512465166667020153 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : HXmlParser Copyright : Copyright (C) 2005-2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt Maintainer : uwe@fh-wedel.de Stability : experimental Portability: portable HXmlParser - Minimal Validating XML Parser of the Haskell XML Toolbox, no HTTP supported XML well-formed checker and validator. this program may be used as example main program for the arrow API of the Haskell XML Toolbox commandline parameter evaluation and and return code is the most complicated part of this example application -} -- ------------------------------------------------------------ module Main where import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML import System.IO -- import the IO and commandline option stuff import System.Environment import System.Console.GetOpt import System.Exit -- ------------------------------------------------------------ -- | -- the main program of the Haskell XML Validating Parser main :: IO () main = do argv <- getArgs -- get the commandline arguments (al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list [rc] <- runX (parser al src) -- run the parser arrow exitProg (rc >= c_err) -- set return code and terminate -- ------------------------------------------------------------ exitProg :: Bool -> IO a exitProg True = exitWith (ExitFailure 1) exitProg False = exitWith ExitSuccess -- ------------------------------------------------------------ -- | -- the /real/ main program -- -- get wellformed document, validates document, propagates and check namespaces -- and controls output parser :: SysConfigList -> String -> IOSArrow b Int parser config src = configSysVars config -- set all global config options, the output file and the >>> -- other user options are stored as key-value pairs in the stystem state readDocument [] src -- no more special read options needed >>> ( ( traceMsg 1 "start processing document" >>> ( processDocument $< getSysAttr "action" ) -- ask for the action stored in the key-value list of user defined values >>> traceMsg 1 "document processing finished" ) `when` documentStatusOk ) >>> traceSource >>> traceTree >>> ( (writeDocument [] $< getSysAttr "output-file") -- ask for the output file stored in the system configuration `whenNot` ( getSysAttr "no-output" >>> isA (== "1") ) -- ask for the no-output attr value in the system key-value list ) >>> getErrStatus -- simple example of a processing arrow, selected by a command line option processDocument :: String -> IOSArrow XmlTree XmlTree processDocument "only-text" = traceMsg 1 "selecting plain text" >>> processChildren (deep isText) processDocument "indent" = traceMsg 1 "indent document" >>> indentDoc processDocument _action = traceMsg 1 "default action: do nothing" >>> this -- ------------------------------------------------------------ -- -- the options definition part -- see doc for System.Console.GetOpt progName :: String progName = "HXmlParser" options :: [OptDescr SysConfig] options = generalOptions ++ inputOptions ++ outputOptions ++ showOptions ++ [ Option "q" ["no-output"] (NoArg $ withSysAttr "no-output" "1") "no output of resulting document" , Option "x" ["action"] (ReqArg (withSysAttr "action") "ACTION") "actions are: only-text, indent, no-op" ] -- the last 2 option values will be stored by withAttr in the system key-value list -- and can be read by getSysAttr key usage :: [String] -> IO a usage errl | null errl = do hPutStrLn stdout use exitProg False | otherwise = do hPutStrLn stderr (concat errl ++ "\n" ++ use) exitProg True where header = "HXmlParser - Validating XML Parser of the Haskell XML Toolbox with Arrow Interface\n" ++ "XML well-formed checker, DTD validator, HTML parser.\n\n" ++ "Usage: " ++ progName ++ " [OPTION...] [URI or FILE]" use = usageInfo header options cmdlineOpts :: [String] -> IO (SysConfigList, String) cmdlineOpts argv = case (getOpt Permute options argv) of (scfg,n,[]) -> do sa <- src n help (getConfigAttr a_help scfg) sa return (scfg, sa) (_,_,errs) -> usage errs where src [] = return [] src [uri] = return uri src _ = usage ["only one input uri or file allowed\n"] help "1" _ = usage [] help _ [] = usage ["no input uri or file given\n"] help _ _ = return () -- ------------------------------------------------------------ hxt-9.3.1.15/examples/arrows/hparser/emptyElements.html0000644000000000000000000000047712465166667021314 0ustar0000000000000000 <meta name="description" value="a lousy HTML doc with empty elements"> <script href="javascript.js"/> </head> <body> <h1/> <h2/> <p/> <div/> <br/> <img src="pic.jpg"/> </body> </html> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/invalid.xml����������������������������������������������������0000644�0000000�0000000�00000000347�12465166667�017737� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> <!DOCTYPE a [ <!ATTLIST a att1 CDATA #IMPLIED> <!ELEMENT a (z, c?)> <!ELEMENT b EMPTY> <!ELEMENT c (#PCDATA)> ]> <a att2="test"> <y/> <c>hello world</c> </a> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/Makefile�������������������������������������������������������0000644�0000000�0000000�00000011221�12465166667�017220� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id: Makefile,v 1.9 2006/11/11 15:36:03 hxml Exp $ HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -Wall -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/hparser prog = ./HXmlParser all : $(prog) prof : ghc --make -o $(prog) -Wall -prof -auto-all -O -fglasgow-exts -ignore-package hxt -ignore-package HTTP -i../../../src $(prog).hs local : ghc --make -o $(prog) $(GHCFLAGS) -fglasgow-exts -ignore-package hxt -i../../../src $(prog).hs $(prog) : HXmlParser.hs $(GHC) --make -o $@ $< force : $(GHC) --make -o $(prog) $(prog).hs test : $(prog) @echo "===> run a few simple test cases with the validating parser" @echo "===> first see all command line options" $(prog) --help @echo $(MAKE) test0 test1 test2 test3 test4 EX1 = ./example1.xml EX1a = ./example1CRLF.xml EXi = ./invalid.xml EX2 = ../../xhtml/xhtml.xml EX3 = ./namespace0.xml EX3a = ./namespace1.xml EX4 = ./lousy.html EX4a = ./emptyElements.html EX = $(wildcard example*.xml) $(wildcard lousy*.html) $(wildcard empty*.html) $(wildcard *valid*.xml *valid*.rng) $(wildcard namespace*.xml) test0 : @echo "===> a 1. simple valid document" $(prog) $(EX1) @echo @echo "===> the dom tree of the same document (without any redundant whitespace)" $(prog) --show-tree --remove-whitespace $(EX1) @echo @echo "===> the next test case contains validation erors, it must fail" $(prog) --verbose $(EXi) || true @echo @echo "===> same source, but only wellformed check" $(prog) --do-not-validate $(EXi) || true @echo @echo "===> only validation, no output of an XHTML source" $(prog) --verbose --no-output $(EX2) @echo test1 : @echo "===> the source of a very simple valid document" ; echo ; sleep 2 cat $(EX1) @sleep 2 ; echo ; echo "===> parser will emit UTF-8" ; echo ; sleep 2 $(prog) --output-encoding=UTF-8 $(EX1) @echo @sleep 2 ; echo ; echo "===> once again with ISO-8859-1 (latin1) output" ; echo ; sleep 2 $(prog) --output-encoding=ISO-8859-1 $(EX1) @echo @sleep 2 ; echo ; echo "===> once again with US-ASCII output" ; echo ; sleep 2 $(prog) --output-encoding=US-ASCII $(EX1) @echo @sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2 $(prog) --show-tree --output-encoding=ISO-8859-1 $(EX1) @echo @sleep 2 ; echo ; echo "===> once again, but without any markup" ; echo ; sleep 2 $(prog) --action=only-text --output-encoding=ISO-8859-1 $(EX1) @echo @sleep 2 ; echo ; echo "===> same source, but with CRLF, parser will emit UTF-8" ; echo ; sleep 2 $(prog) --output-encoding=UTF-8 $(EX1a) @echo test2 : @echo "===> the source of a xhtml document" ; echo ; sleep 2 cat $(EX2) @echo "that document has" `cat $(EX2) | wc -l` "lines" @sleep 2 ; echo ; echo "===> parser will validate this document and try to indent the output" ; echo ; sleep 2 $(prog) --indent $(EX2) @sleep 2 ; echo ; echo "===> once again, but remove all markup" ; echo ; sleep 2 $(prog) --action=only-text --remove-whitespace $(EX2) @sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2 $(prog) --show-tree --remove-whitespace $(EX2) test3 : @echo "===> namespace processing examples" ; echo ; sleep 2 @echo "===> namespace propagation test" ; echo ; sleep 2 $(prog) --verbose --check-namespaces --indent --output-encoding=UTF-8 $(EX3) @echo @echo ; sleep 2 ; echo "===> namespace propagation test: tree output with attached namespaces" ; echo ; sleep 2 $(prog) --verbose --check-namespaces --remove-whitespace --show-tree --output-encoding=ISO-8859-1 $(EX3) @echo @echo ; sleep 2 ; echo "===> namespace validation test: this test produces namespace errors" ; echo ; sleep 2 $(prog) --verbose --do-not-validate --check-namespaces --indent --output-encoding=ISO-8859-1 $(EX3a) || true @echo test4 : @echo "===> HTML parsing examples" ; echo ; sleep 2 @echo "===> the source of a lousy html document" ; echo ; sleep 2 cat $(EX4) @sleep 2 ; echo ; echo "===> parser accepts this document and tries to build a document tree" ; echo ; sleep 2 $(prog) --indent --preserve-comment --parse-html $(EX4) @echo "===> the source of another lousy html document containing empty elements" ; echo ; sleep 2 cat $(EX4a) @sleep 2 ; echo ; echo "===> parser accepts this document and tries to format this as a HTML document without any dangarous empty elements" ; echo ; sleep 2 $(prog) --indent --preserve-comment --parse-html --output-xhtml $(EX4a) @echo dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp $(EX) Makefile $(prog).hs $(DIST_DIR) clean : rm -f $(prog) *.o *.hi .PHONY : all test test0 test1 test2 test3 test4 dist clean prof local force �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/valid1.rng�����������������������������������������������������0000644�0000000�0000000�00000000215�12465166667�017451� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> <element name="foo"> <empty/> </element> </start> </grammar> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/invalid1.xml���������������������������������������������������0000644�0000000�0000000�00000000006�12465166667�020010� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<bar/>��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/namespace0.xml�������������������������������������������������0000644�0000000�0000000�00000001465�12465166667�020327� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="ISO-8859-1" standalone="yes" ?> <!DOCTYPE a [ <!ATTLIST a x:att1 NMTOKENS #IMPLIED> <!ATTLIST a x:att2 NMTOKENS #IMPLIED> <!ATTLIST a y:att3 CDATA #FIXED "a fixed value"> <!ATTLIST a att4 CDATA "< default >"> <!ATTLIST a att5 CDATA #IMPLIED> <!ATTLIST a xmlns CDATA #IMPLIED> <!ATTLIST a xmlns:x CDATA #IMPLIED> <!ATTLIST a xmlns:y CDATA #IMPLIED> <!ELEMENT a (x:b, cü?)> <!ELEMENT x:b EMPTY> <!ELEMENT cü (#PCDATA)> ]> <!-- some namespace declarations for testing namespace propagation --> <a xmlns="default namespace" xmlns:x="namespace for x" xmlns:y="namespace for y" x:att2="xxx yyy" y:att3="a fixed value" x:att1=" test äöüß test " att5="< >" > <x:b/> <cü>hello world äöüß test</cü> </a> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/valid1.xml�����������������������������������������������������0000644�0000000�0000000�00000000007�12465166667�017462� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<foo/> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/hparser/invalid2.rng���������������������������������������������������0000644�0000000�0000000�00000000501�12465166667�017777� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<grammar xmlns:html="http://www.w3.org/TR/REC-html40" xmlns="http://relaxng.org/ns/structure/1.0" > <start> <element name="foo"> <zeroOrMore> <group> <attribute name="bar"/> <attribute name="baz"/> </group> </zeroOrMore> </element> </start> </grammar>�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/pickle/����������������������������������������������������������������0000755�0000000�0000000�00000000000�12465166667�015366� 5����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/pickle/Makefile��������������������������������������������������������0000644�0000000�0000000�00000001633�12465166667�017031� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id: Makefile,v 1.3 2005/04/14 12:52:50 hxml Exp $ # # hello world application of Haskell XML Toolbox HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -Wall -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/pickle PROG = ./pickleTest ./pickleTestWithNamespaces all : $(PROG) force : $(MAKE) distclean all test : ./pickleTest @sleep 1 @echo "the program p2 as XML document" @sleep 3 cat pickle.xml ./pickleTestWithNamespaces @sleep 1 @echo "the program p2 as XML document" @sleep 3 cat pickle.xml dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp PickleTest.hs Makefile $(DIST_DIR) clean : rm -f *.o *.hi pickle.xml $(PROG) distclean : $(MAKE) clean .PHONY : all test dist clean distclean force pickleTest : PickleTest.hs $(GHC) --make -o $@ $< pickleTestWithNamespaces : PickleTestWithNamespaces.hs $(GHC) --make -o $@ $< �����������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/pickle/PickleTest.hs���������������������������������������������������0000644�0000000�0000000�00000020330�12465166667�017767� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������module Main where import Data.Maybe import System.Exit import Test.HUnit import Text.XML.HXT.Core -- ------------------------------------------------------------ -- -- a somewhat complex data structure -- for representing programs of a simple -- imperative language type Program = Stmt type StmtList = [Stmt] data Stmt = Assign Ident Expr | Stmts StmtList | If Expr Stmt (Maybe Stmt) | While Expr Stmt deriving (Eq, Show) type Ident = String data Expr = IntConst Int | BoolConst Bool | Var Ident | UnExpr UnOp Expr | BinExpr Op Expr Expr deriving (Eq, Show) data Op = Add | Sub | Mul | Div | Mod | Eq | Neq deriving (Eq, Ord, Enum, Show) data UnOp = UPlus | UMinus | Neg deriving (Eq, Ord, Read, Show) -- ------------------------------------------------------------ -- -- the pickler definition for the data types -- the main pickler xpProgram :: PU Program xpProgram = xpElem "program" $ xpAddFixedAttr "xmlns" "program42" $ xpickle xpMissingRootElement :: PU Program xpMissingRootElement = xpickle instance XmlPickler UnOp where xpickle = xpPrim instance XmlPickler Op where xpickle = xpWrap (toEnum, fromEnum) xpPrim instance XmlPickler Expr where xpickle = xpAlt tag ps where tag (IntConst _ ) = 0 tag (BoolConst _ ) = 1 tag (Var _ ) = 2 tag (UnExpr _ _ ) = 3 tag (BinExpr _ _ _ ) = 4 ps = [ xpWrap ( IntConst , \ (IntConst i ) -> i ) $ ( xpElem "int" $ xpAttr "value" $ xpickle ) , xpWrap ( BoolConst , \ (BoolConst b) -> b ) $ ( xpElem "bool" $ xpAttr "value" $ xpWrap (toEnum, fromEnum) xpickle ) , xpWrap ( Var , \ (Var n) -> n ) $ ( xpElem "var" $ xpAttr "name" $ xpText ) , xpWrap ( uncurry UnExpr , \ (UnExpr op e) -> (op, e) ) $ ( xpElem "unex" $ xpPair (xpAttr "op" xpickle) xpickle ) , xpWrap ( uncurry3 $ BinExpr , \ (BinExpr op e1 e2) -> (op, e1, e2) ) $ ( xpElem "binex" $ xpTriple (xpAttr "op" xpickle) xpickle xpickle ) ] instance XmlPickler Stmt where xpickle = xpAlt tag ps where tag ( Assign _ _ ) = 0 tag ( Stmts _ ) = 1 tag ( If _ _ _ ) = 2 tag ( While _ _ ) = 3 ps = [ xpWrap ( uncurry Assign , \ (Assign n v) -> (n, v) ) $ ( xpElem "assign" $ xpPair (xpAttr "name" xpText) xpickle ) , xpWrap ( Stmts , \ (Stmts sl) -> sl ) $ ( xpElem "block" $ xpList xpickle ) , xpWrap ( uncurry3 If , \ (If c t e) -> (c, t, e) ) $ ( xpElem "if" $ xpTriple xpickle xpickle xpickle ) , xpWrap ( uncurry While , \ (While c b) -> (c, b) ) $ ( xpElem "while" $ xpPair xpickle xpickle ) ] -- ------------------------------------------------------------ -- -- example programs progs :: [Program] progs = [p0, p1, p2] p0, p1, p2 :: Program p0 = Stmts [] -- the empty program p1 = Stmts [ Assign i ( UnExpr UMinus ( IntConst (-22) ) ) , Assign j ( IntConst 20 ) , While ( BinExpr Neq ( Var i ) ( IntConst 0 ) ) ( Stmts [ Assign i ( BinExpr Sub ( Var i ) ( IntConst 1 ) ) , Assign j ( BinExpr Add ( Var j ) ( IntConst 1 ) ) , If ( IntConst 0 ) (Stmts []) Nothing ] ) ] where i = "i" j = "j" p2 = Stmts [ Assign x (IntConst 6) , Assign y (IntConst 7) , Assign p (IntConst 0) , While ( BinExpr Neq (Var x) (IntConst 0) ) ( If ( BinExpr Neq ( BinExpr Mod (Var x) (IntConst 2) ) (IntConst 0) ) ( Stmts [ Assign x ( BinExpr Sub (Var x) (IntConst 1) ) , Assign p ( BinExpr Add (Var p) (Var y) ) ] ) ( Just ( Stmts [ Assign x ( BinExpr Div (Var x) (IntConst 2) ) , Assign y ( BinExpr Mul (Var y) (IntConst 2) ) ] ) ) ) ] where x = "x" y = "y" p = "p" -- ------------------------------------------------------------ -- | -- the complete set of test cases pickleUnpickleTests :: Test pickleUnpickleTests = TestLabel "pickle/unpickle tests with example programs" $ TestList $ map mkTests progs where mkTests p = TestList $ [ TestCase $ assertEqual "pickleDoc/unpickleDoc without XML serialisation: " [p] res1 , TestCase $ assertEqual "pickleDoc/unpickleDoc with xshow/xread: " [p] res2 , TestCase $ do res <- res4 assertEqual "pickle/unpickle with readFromString: " [p] res , TestCase $ res5 >>= assertEqual "pickle/unpickle with writeDocument/readDocument: " [p] , TestCase $ res6 >>= assertEqual "pickle/unpickle with xpickleDocument/xunpickleDocument: " [p] , TestCase $ res7 >>= assertEqual "pickle/unpickle with DTD validation xpickleDocument/xunpickleDocument: " [p] ] where res1 :: [Program] res1 = maybeToList . unpickleDoc xpProgram . pickleDoc xpProgram $ p res2 :: [Program] res2 = runLA ( xshow ( arr (pickleDoc xpProgram) >>> getChildren ) >>> root [] [xread] >>> arrL (maybeToList . unpickleDoc xpProgram) ) p res4 :: IO [Program] res4 = runX ( constA p >>> arr (pickleDoc xpProgram) -- Program => XmlTree >>> writeDocumentToString [] -- XmlTree => String >>> readFromString [ withValidate no ] -- String => XmlTree >>> arrL (maybeToList . unpickleDoc xpProgram) -- XmlTree => Program ) res5 :: IO [Program] -- the most important case -- for persistent data storage -- and message passing res5 = runX ( constA p -- take the Program value >>> arr (pickleDoc xpProgram) -- Program => XmlTree >>> writeDocument [ withIndent yes -- XmlTree => formated external XML document ] "pickle.xml" >>> readDocument [ withRemoveWS yes -- formated external XML document => XmlTree , withValidate no ] "pickle.xml" >>> arrL (maybeToList . unpickleDoc xpProgram) -- XmlTree => Program ) res6 :: IO [Program] -- the most important case -- for persistent data storage -- and message passing -- same as res5, but the convenient way res6 = runX ( constA p -- take the Program value >>> xpickleDocument xpProgram [ withIndent yes -- Program => formated external XML document ] "pickle.xml" >>> xunpickleDocument xpProgram [ withRemoveWS yes -- formated external XML document => Program , withValidate no ] "pickle.xml" ) res7 :: IO [Program] -- the most important case -- for persistent data storage -- and message passing -- same as res5, but the convenient way res7 = runX ( constA p -- take the Program value >>> xpickleDocument xpProgram [ withIndent yes -- Program => formated external XML document , withSysAttr a_addDTD v_1 -- with inline DTD ] "pickle.xml" >>> xunpickleDocument xpProgram [ withRemoveWS yes -- formated external XML document => Program , withValidate yes ] "pickle.xml" ) allTests :: Test allTests = TestList [ pickleUnpickleTests -- , pickleXshowTests ] main :: IO () main = do c <- runTestTT allTests putStrLn $ show c let errs = errors c fails = failures c exitWith (codeGet errs fails) codeGet :: Int -> Int -> ExitCode codeGet errs fails | fails > 0 = ExitFailure 2 | errs > 0 = ExitFailure 1 | otherwise = ExitSuccess -- ---------------------------------------------------------- ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/����������������������������������������������0000755�0000000�0000000�00000000000�12465166667�020727� 5����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/.ghci�����������������������������������������0000644�0000000�0000000�00000000103�12465166667�021634� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������:set -i../../../src :set -Wall -fglasgow-exts :load SimpleExamples �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/Makefile��������������������������������������0000644�0000000�0000000�00000002422�12465166667�022367� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id: Makefile,v 1.2 2006/11/17 17:16:24 hxml Exp $ # # hello world application of Haskell XML Toolbox EXAMPLES = PicklerExample HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -W -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/AGentleIntroductionToHXT src = SimpleExamples.hs prog = ./SimpleExamples tests = \ selectAllText \ selectAllTextAndAltValues \ selectAllTextAndRealAltValues \ addRefIcon \ helloWorld \ helloWorld2 \ imageTable \ imageTable0 \ imageTable1 \ imageTable2 \ imageTable3 \ toAbsHRefs \ toAbsRefs \ toAbsRefs1 all : $(MAKE) $(prog) $(foreach i,$(EXAMPLES),$(MAKE) -C $i PKGFLAGS="$(PKGFLAGS)" $@ ;) force : $(MAKE) distclean all test : $(prog) $(foreach op,$(tests),echo $(prog) $(op) "http://www.haskell.org/" "-" ; $(prog) $(op) "http://www.haskell.org/" "-" ;) $(foreach i,$(EXAMPLES),$(MAKE) -C $i $@ ;) dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) $(foreach i,$(EXAMPLES),$(MAKE) -C $i dist DIST=../$(DIST_DIR) ;) cp $(src) Makefile $(DIST_DIR) clean : $(foreach i,$(EXAMPLES),$(MAKE) -C $i $@ ;) rm -f *.o *.hi distclean : $(MAKE) clean rm -f mini hello .PHONY : all test dist clean distclean force $(prog) : $(src) $(GHC) --make -o $@ $< ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/SimpleExamples.hs�����������������������������0000644�0000000�0000000�00000024353�12465166667�024222� 0����������������������������������������������������������������������������������������������������ustar�00����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������{- | The examples from the HXT tutorial at haskell.org "http://www.haskell.org/haskellwiki/HXT" -} module Main where import Text.XML.HXT.Core -- basic HXT stuff import Text.XML.HXT.XPath -- additional XPath functions import Text.XML.HXT.Curl -- Curl HTTP handler import Data.List -- auxiliary functions import Data.Maybe import System.Environment import System.Console.GetOpt() import System.Exit -- | call this program with 3 arguments, -- the function name, see list of examples, -- the input URL or file -- and the output file, - for stdout -- -- example: SimpleExamples selectAllText http://www.haskell.org/ - main :: IO () main = do argv <- getArgs (al, fct, src, dst) <- cmdlineOpts argv [rc] <- runX (application al fct src dst) if rc >= c_err then exitWith (ExitFailure 1) else exitWith ExitSuccess application :: SysConfigList -> String -> String -> String -> IOSArrow b Int application config fct src dst = configSysVars config -- set all global config options >>> readDocument [] src >>> processChildren (processRootElement fct `when` isElem) >>> writeDocument [ withIndent yes, withOutputEncoding isoLatin1 ] dst >>> getErrStatus -- | the dummy for the boring stuff of option evaluation, -- usually done with 'System.Console.GetOpt' cmdlineOpts :: [String] -> IO (SysConfigList, String, String, String) cmdlineOpts argv = return ( [ withValidate no , withParseHTML yes , withCurl [] ] , argv!!0 , argv!!1 , argv!!2 ) -- | the processing examples examples :: [ (String, IOSArrow XmlTree XmlTree) ] examples = [ ( "selectAllText", selectAllText ) , ( "selectAllTextAndAltValues", selectAllTextAndAltValues ) , ( "selectAllTextAndRealAltValues", selectAllTextAndRealAltValues ) , ( "addRefIcon", addRefIcon ) , ( "helloWorld", helloWorld ) , ( "helloWorld2", helloWorld2 ) , ( "imageTable", imageTable ) , ( "imageTable0", imageTable0 ) , ( "imageTable1", imageTable1 ) , ( "imageTable2", imageTable2 ) , ( "imageTable3", imageTable3 ) , ( "toAbsHRefs", toAbsHRefs ) , ( "toAbsRefs", toAbsRefs ) , ( "toAbsRefs1", toAbsRefs1 ) ] processRootElement :: String -> IOSArrow XmlTree XmlTree processRootElement fct = fromMaybe this . lookup fct $ examples -- | selection arrows selectAllText :: ArrowXml a => a XmlTree XmlTree selectAllText = selem "the-plain-text" [ deep isText ] -- create a root element, neccessary for wellformed XML output selectAllTextAndAltValues :: ArrowXml a => a XmlTree XmlTree selectAllTextAndAltValues = selem "the-plain-text" [ deep ( isText <+> ( isElem >>> hasName "img" >>> getAttrValue "alt" >>> mkText ) ) ] selectAllTextAndRealAltValues :: ArrowXml a => a XmlTree XmlTree selectAllTextAndRealAltValues = selem "the-plain-text" [ deep ( isText <+> ( isElem >>> hasName "img" >>> getAttrValue "alt" >>> isA significant >>> arr addBrackets >>> mkText ) ) ] where significant :: String -> Bool significant = not . all (`elem` " \n\r\t") addBrackets :: String -> String addBrackets s = " [[ " ++ s ++ " ]] " -- | transformation arrows addRefIcon :: ArrowXml a => a XmlTree XmlTree addRefIcon = processTopDown ( addImg `when` isExternalRef ) where isExternalRef = isElem >>> hasName "a" >>> hasAttr "href" >>> getAttrValue "href" >>> isA isExtRef where isExtRef = isPrefixOf "http:" addImg = replaceChildren ( getChildren <+> imgElement ) imgElement = mkelem "img" [ sattr "src" "/icons/ref.png" , sattr "alt" "external ref" ] [] -- | construction examples helloWorld :: ArrowXml a => a XmlTree XmlTree helloWorld = mkelem "html" [] [ mkelem "head" [] [ mkelem "title" [] [ txt "Hello World" ] ] , mkelem "body" [ sattr "class" "haskell" ] [ mkelem "h1" [] [ txt "Hello World" ] ] ] helloWorld2 :: ArrowXml a => a XmlTree XmlTree helloWorld2 = selem "html" [ selem "head" [ selem "title" [ txt "Hello World" ] ] , mkelem "body" [ sattr "class" "haskell" ] [ selem "h1" [ txt "Hello World" ] ] ] imageTable :: ArrowXml a => a XmlTree XmlTree imageTable = selem "html" [ selem "head" [ selem "title" [ txt "Images in Page" ] ] , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] where genTableRows = selem "tr" [ selem "td" [ getAttrValue "src" >>> mkText ] ] imageTable0 :: ArrowXml a => a XmlTree XmlTree imageTable0 = selem "html" [ pageHeader , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] where pageHeader = constA "<head><title>Images in Page" >>> xread genTableRows = selem "tr" [ selem "td" [ getAttrValue "src" >>> mkText ] ] imageTable1 :: ArrowXml a => a XmlTree XmlTree imageTable1 = selem "html" [ selem "head" [ selem "title" [ txt "Images in Page" ] ] , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] imageTable2 :: IOStateArrow s XmlTree XmlTree imageTable2 = selem "html" [ selem "head" [ selem "title" [ txt "Images in Page" ] ] , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> mkAbsImageRef >>> genTableRows ] ] ] imageTable3 :: IOStateArrow s XmlTree XmlTree imageTable3 = insertTreeTemplate pageTemplate -- the page template [ hasText (=="ImageList") :-> images] -- fill hole "ImageList" with image descriptions where images = collectImages >>> mkAbsImageRef >>> genTableRows pageTemplate = constA "Images in Page

Images in Page

ImageList
" >>> xread collectImages :: ArrowXml a => a XmlTree XmlTree collectImages = deep ( isElem >>> hasName "img" ) genTableRows :: ArrowXml a => a XmlTree XmlTree genTableRows = selem "tr" [ selem "td" -- (1) [ this -- (1.1) ] , selem "td" -- (2) [ getAttrValue "src" >>> mkText >>> mkelem "a" -- (2.1) [ attr "href" this ] [ this ] ] , selem "td" -- (3) [ ( getAttrValue "width" &&& -- (3.1) getAttrValue "height" ) >>> arr2 geometry -- (3.2) >>> mkText ] , selem "td" -- (4) [ getAttrValue "alt" >>> mkText ] ] where geometry :: String -> String -> String geometry "" "" = "" geometry w h = w ++ "x" ++ h mkAbsImageRef :: IOStateArrow s XmlTree XmlTree mkAbsImageRef = processAttrl (mkAbsRef `when` hasName "src") where mkAbsRef = replaceChildren ( xshow getChildren >>> ( mkAbsURI `orElse` this ) >>> mkText ) toAbsHRefs :: IOStateArrow s XmlTree XmlTree toAbsHRefs = ( mkAbsHRefs $< computeBaseRef ) >>> removeBaseElement removeBaseElement :: ArrowXml a => a XmlTree XmlTree removeBaseElement = processChildren ( processChildren ( none `when` ( isElem >>> hasName "base" ) ) `when` ( isElem >>> hasName "head" ) ) mkAbsHRefs :: ArrowXml a => String -> a XmlTree XmlTree mkAbsHRefs base = processTopDown editHRef where editHRef = processAttrl ( changeAttrValue (absHRef base) `when` hasName "href" ) `when` ( isElem >>> hasName "a" ) where absHRef :: String -> String -> String absHRef base url = fromMaybe url . expandURIString url $ base toAbsRefs :: IOStateArrow s XmlTree XmlTree toAbsRefs = ( mkAbsRefs $< computeBaseRef ) >>> removeBaseElement mkAbsRefs0 :: ArrowXml a => String -> a XmlTree XmlTree mkAbsRefs0 base = processTopDown ( editRef "a" "href" >>> editRef "img" "src" >>> editRef "link" "href" >>> editRef "script" "src" ) where editRef en an = processAttrl ( changeAttrValue (absHRef base) `when` hasName an ) `when` ( isElem >>> hasName en ) where absHRef :: String -> String -> String absHRef base url = fromMaybe url . expandURIString url $ base mkAbsRefs :: ArrowXml a => String -> a XmlTree XmlTree mkAbsRefs base = processTopDown editRefs where editRefs = seqA . map (uncurry editRef) $ [ ("a", "href") , ("img", "src") , ("link", "href") , ("script", "src") -- and more ] editRef en an = processAttrl ( changeAttrValue (absHRef base) `when` hasName an ) `when` ( isElem >>> hasName en ) where absHRef :: String -> String -> String absHRef base url = fromMaybe url . expandURIString url $ base computeBaseRef :: IOStateArrow s XmlTree String computeBaseRef = ( ( ( isElem >>> hasName "html" >>> getChildren >>> isElem >>> hasName "head" >>> getChildren >>> isElem >>> hasName "base" >>> getAttrValue "href" ) &&& getBaseURI ) >>> expandURI ) `orElse` getBaseURI getDescendends :: ArrowXml a => [String] -> a XmlTree XmlTree getDescendends = foldl1 (\ x y -> x >>> getChildren >>> y) . map (\ n -> isElem >>> hasName n) computeBaseRef1 :: IOStateArrow s XmlTree String computeBaseRef1 = ( ( ( getDescendends ["html","head","base"] >>> getAttrValue "href" ) &&& getBaseURI ) >>> expandURI ) `orElse` getBaseURI computeBaseRef2 :: IOStateArrow s XmlTree String computeBaseRef2 = ( ( xshow (getXPathTrees "/html/head/base@href") &&& getBaseURI ) >>> expandURI ) `orElse` getBaseURI toAbsRefs1 :: IOStateArrow s XmlTree XmlTree toAbsRefs1 = ( mkAbsRefs $< computeBaseRef1 ) >>> removeBaseElement hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/PicklerExample/0000755000000000000000000000000012465166667023634 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/PicklerExample/simple2.xml0000644000000000000000000001135512465166667025736 0ustar0000000000000000 hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/PicklerExample/new-simple2.xml0000644000000000000000000000517612465166667026531 0ustar0000000000000000 hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/PicklerExample/Makefile0000644000000000000000000000127412465166667025300 0ustar0000000000000000# $Id: Makefile,v 1.2 2006/11/17 17:16:24 hxml Exp $ # # hello world application of Haskell XML Toolbox HXT_HOME = ../../../.. PKGFLAGS = GHCFLAGS = -W -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) DIST = $(HXT_HOME)/dist/examples/arrows/AGentleIntroductionToHXT DIST_DIR = $(DIST)/PicklerExample src = Baseball.hs prog = ./Baseball all : $(MAKE) $(prog) force : $(MAKE) distclean all test : $(prog) $(prog) dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp $(src) simple2.xml new-simple2.xml Makefile $(DIST_DIR) clean : rm -f *.o *.hi distclean : $(MAKE) clean rm -f $(prog) .PHONY : all test dist clean distclean force $(prog) : $(src) $(GHC) --make -o $@ $< hxt-9.3.1.15/examples/arrows/AGentleIntroductionToHXT/PicklerExample/Baseball.hs0000644000000000000000000002561612465166667025707 0ustar0000000000000000{- | Example for usage of pickler functions to de-/serialise from/to XML Example data is taken from haskell wiki http://www.haskell.org/haskellwiki/HXT/Practical/Simple2 -} module Main where import Data.Map (Map, fromList, toList) import Text.XML.HXT.Core -- Example data taken from: -- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml -- ------------------------------------------------------------ -- the data modell data Season = Season { sYear :: Int , sLeagues :: Leagues } deriving (Show, Eq) type Leagues = Map String Divisions type Divisions = Map String [Team] data Team = Team { teamName :: String , city :: String , players :: [Player] } deriving (Show, Eq) data Player = Player { firstName :: String , lastName :: String , position :: String , atBats :: Maybe Int , hits :: Maybe Int , era :: Maybe Float } deriving (Show, Eq) -- ------------------------------------------------------------ -- the pickler instance declarations -- in this case just for uniform naming instance XmlPickler Season where xpickle = xpSeason instance XmlPickler Team where xpickle = xpTeam instance XmlPickler Player where xpickle = xpPlayer -- ------------------------------------------------------------ -- for every data type there is a pickler -- the XML root element xpSeason :: PU Season xpSeason = xpElem "SEASON" $ xpWrap ( uncurry Season , \ s -> (sYear s, sLeagues s)) $ xpPair (xpAttr "YEAR" xpickle) xpLeagues xpLeagues :: PU Leagues xpLeagues = xpWrap ( fromList , toList ) $ xpList $ xpElem "LEAGUE" $ xpPair (xpAttr "NAME" xpText) xpDivisions xpDivisions :: PU Divisions xpDivisions = xpWrap ( fromList , toList ) $ xpList $ xpElem "DIVISION" $ xpPair (xpAttr "NAME" xpText) xpickle xpTeam :: PU Team xpTeam = xpElem "TEAM" $ xpWrap ( uncurry3 Team , \ t -> (teamName t, city t, players t) ) $ xpTriple (xpAttr "NAME" xpText) (xpAttr "CITY" xpText) (xpList xpickle) xpPlayer :: PU Player xpPlayer = xpElem "PLAYER" $ xpWrap ( \ ((f,l,p,a,h,e)) -> Player f l p a h e , \ t -> (firstName t, lastName t , position t, atBats t , hits t, era t ) ) $ xp6Tuple (xpAttr "GIVEN_NAME" xpText) (xpAttr "SURNAME" xpText) (xpAttr "POSITION" xpText) (xpOption (xpAttr "AT_BATS" xpickle)) (xpOption (xpAttr "HITS" xpickle)) (xpOption (xpAttr "ERA" xpPrim )) -- ------------------------------------------------------------ -- a simple pickle/unpickle application main :: IO () main = do runX ( xunpickleDocument xpSeason [ withValidate no , withTrace 1 , withRemoveWS yes , withPreserveComment no ] "simple2.xml" >>> processSeason >>> xpickleDocument xpSeason [ withIndent yes ] "new-simple2.xml" ) return () -- the dummy for processing the unpickled data processSeason :: IOSArrow Season Season processSeason = arrIO ( \ x -> do {print x ; return x}) -- ------------------------------------------------------------ -- the internal data of "simple2.xml" season1998 :: Season season1998 = Season { sYear = 1998 , sLeagues = fromList [ ( "American League" , fromList [ ( "Central" , [ Team { teamName = "White Sox" , city = "Chicago" , players = []} , Team { teamName = "Royals" , city = "Kansas City" , players = []} , Team { teamName = "Tigers" , city = "Detroit" , players = []} , Team { teamName = "Indians" , city = "Cleveland" , players = []} , Team { teamName = "Twins" , city = "Minnesota" , players = []} ]) , ( "East" , [ Team { teamName = "Orioles" , city = "Baltimore" , players = []} , Team { teamName = "Red Sox" , city = "Boston" , players = []} , Team { teamName = "Yankees" , city = "New York" , players = []} , Team { teamName = "Devil Rays" , city = "Tampa Bay" , players = []} , Team { teamName = "Blue Jays" , city = "Toronto" , players = []} ]) , ( "West" , [ Team { teamName = "Angels" , city = "Anaheim" , players = []} , Team { teamName = "Athletics" , city = "Oakland" , players = []} , Team { teamName = "Mariners" , city = "Seattle" , players = []} , Team { teamName = "Rangers" , city = "Texas" , players = []} ]) ]) , ( "National League" , fromList [ ( "Central" , [ Team { teamName = "Cubs" , city = "Chicago" , players = []} , Team { teamName = "Reds" , city = "Cincinnati" , players = []} , Team { teamName = "Astros" , city = "Houston" , players = []} , Team { teamName = "Brewers" , city = "Milwaukee" , players = []} , Team { teamName = "Pirates" , city = "Pittsburgh" , players = []} , Team { teamName = "Cardinals" , city = "St. Louis" , players = []} ]) , ( "East" , [ Team { teamName = "Braves" , city = "Atlanta" , players = [ Player { firstName = "Marty" , lastName = "Malloy" , position = "Second Base" , atBats = Just 28 , hits = Just 5 , era = Nothing} , Player { firstName = "Ozzie" , lastName = "Guillen" , position = "Shortstop" , atBats = Just 264 , hits = Just 73 , era = Nothing} , Player { firstName = "Danny" , lastName = "Bautista" , position = "Outfield" , atBats = Just 144 , hits = Just 36 , era = Nothing} , Player { firstName = "Gerald" , lastName = "Williams" , position = "Outfield" , atBats = Just 266 , hits = Just 81 , era = Nothing} , Player { firstName = "Tom" , lastName = "Glavine" , position = "Starting Pitcher" , atBats = Nothing , hits = Nothing , era = Just 2.47} , Player { firstName = "Javier" , lastName = "Lopez" , position = "Catcher" , atBats = Just 489 , hits = Just 139 , era = Nothing} , Player { firstName = "Ryan" , lastName = "Klesko" , position = "Outfield" , atBats = Just 427 , hits = Just 117 , era = Nothing} , Player { firstName = "Andres" , lastName = "Galarraga" , position = "First Base" , atBats = Just 555 , hits = Just 169 , era = Nothing} , Player { firstName = "Wes" , lastName = "Helms" , position = "Third Base" , atBats = Just 13 , hits = Just 4 , era = Nothing} ]} , Team { teamName = "Marlins" , city = "Florida" , players = []} , Team { teamName = "Expos" , city = "Montreal" , players = []} , Team { teamName = "Mets" , city = "New York" , players = []} , Team { teamName = "Phillies" , city = "Philadelphia" , players = []} ]) , ( "West" , [ Team { teamName = "Diamondbacks" , city = "Arizona" , players = []} , Team { teamName = "Rockies" , city = "Colorado" , players = []} , Team { teamName = "Dodgers" , city = "Los Angeles" , players = []} , Team { teamName = "Padres" , city = "San Diego" , players = []} , Team { teamName = "Giants" , city = "San Francisco" , players = []} ]) ]) ] } -- ------------------------------------------------------------ hxt-9.3.1.15/examples/arrows/dtd2hxt/0000755000000000000000000000000012465166667015500 5ustar0000000000000000hxt-9.3.1.15/examples/arrows/dtd2hxt/.ghci0000644000000000000000000000013212465166667016407 0ustar0000000000000000:set -package-conf ../../lib/hxt/package.conf :set -package hxt :set -Wall :load DTDtoHXT hxt-9.3.1.15/examples/arrows/dtd2hxt/Makefile0000644000000000000000000000237012465166667017142 0ustar0000000000000000# $Id: Makefile,v 1.4 2005/05/15 17:01:04 hxml Exp $ HXT_HOME = ../../.. PKGFLAGS = GHCFLAGS = -Wall -O2 GHC = ghc $(GHCFLAGS) $(PKGFLAGS) prog = ./DTDtoHXT all : $(prog) $(prog) : $(prog).hs $(GHC) --make -o $@ $< force : $(GHC) --make -o $(prog) $(prog).hs test : @echo "===> run a few generation examples" $(MAKE) XHTML.o XHTML2.o @echo "===> the generated modules" ls -l XHTML*.hs XHTML*.o EX1 = ../../xhtml/xhtml.xml EX2 = ../../photoalbum/photos.xml XHTML.o : $(prog) $(EX1) @echo "===> generate a module for XHTML access function from the XHTML DTD with naming convention is, get, ..." $(prog) --output-file XHTML.hs --uppercase-initials $(EX1) $(GHC) -c XHTML.hs XHTML2.o : $(prog) $(EX1) @echo "===> generate a module for XHTML access function from the XHTML DTD with naming convention is_, get_, ..." $(prog) --output-file XHTML2.hs --prefix-underline $(EX1) $(GHC) -c XHTML2.hs Photo.hs : $(prog) $(EX2) $(prog) --output-file $@ --uppercase-initials $(EX2) DIST = $(HXT_HOME)/dist/examples/arrows DIST_DIR = $(DIST)/dtd2hxt DIST_FILES = $(prog).hs Makefile dist : [ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) cp $(DIST_FILES) $(DIST_DIR) clean : rm -f $(prog) XHTML*.hs *.o *.hi hxt-9.3.1.15/examples/arrows/dtd2hxt/DTDtoHXT.hs0000644000000000000000000002621212465166667017401 0ustar0000000000000000-- | -- DTDtoHXT - A program for generating access functions for the Haskell XML Toolbox -- from a DTD (Arrow Version) -- -- Author : Uwe Schmidt -- -- this program may be used as example main program for the -- Haskell XML Toolbox module Main where import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML import Text.XML.HXT.Curl import System.IO -- import the IO and commandline option stuff import System.Environment import System.Console.GetOpt import System.Exit import Data.Char import Data.List -- ------------------------------------------------------------ -- | -- the main program main :: IO () main = do argv <- getArgs -- get the commandline arguments (al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list [rc] <- runX (dtd2hxt al src) exitProg (rc >= c_err) -- ------------------------------------------------------------ exitProg :: Bool -> IO a exitProg True = exitWith (ExitFailure 1) exitProg False = exitWith ExitSuccess -- ------------------------------------------------------------ -- -- options uppercaseInitials, namespaceAware, prefixUnderline :: String uppercaseInitials = "uppercase-initials" namespaceAware = "namespace-aware" prefixUnderline = "prefix-underline" -- name prefixes tagPrefix, attPrefix, nsPrefix, isPrefix, mkPrefix, hasPrefix, getPrefix , mkAttPrefix, mkSAttPrefix , nsDefault :: String tagPrefix = "tag" attPrefix = "attr" nsPrefix = "ns" isPrefix = "is" mkPrefix = "e" hasPrefix = "has" getPrefix = "get" mkAttPrefix = "a" mkSAttPrefix = "sa" nsDefault = "default" -- ------------------------------------------------------------ -- | -- the /real/ main program -- -- get wellformed document, validates document, but not canonicalize -- (this would remove the DTD), -- and controls output dtd2hxt :: SysConfigList -> String -> IOSArrow b Int dtd2hxt config src = configSysVars config -- set all global config options >>> readDocument [withCanonicalize no ,withCurl [] ] src >>> traceMsg 1 "start processing DTD" >>> processChildren (isDTD `guards` genHXT) >>> traceMsg 1 "processing finished" >>> traceSource >>> traceTree >>> ( writeDocument [withOutputPLAIN] $< getSysAttr "output-file" ) >>> getErrStatus where genHXT = catA $ map (>>> mkText) $ [ getModuleName -- the module header >>> arr genModHead , constA $ comm "namespace declarations" , getNSAttr -- namespace constants >>> -- declared as "xmlns" or "xmlns:" attribute with #FIXED values arr2 genNSCode , constA $ comm "element arrows" , getElems >>. sort -- element processing >>> arr genElemCode , getAttrs >>. ( sort . nub ) -- attribute processing >>> arr genAttrCode , getModuleName -- module footer >>> arr genModFoot ] -- auxiliary arrows -------------------------------------------------- getModuleName :: (ArrowXml a, ArrowDTD a) => a XmlTree String getModuleName = isDTDDoctype >>> getDTDAttrValue a_name >>> arr moduleName -- filter namespace attributes ---------------------------------------- getNSAttr :: (ArrowXml a, ArrowDTD a) => a XmlTree (String, String) getNSAttr = deep isDTDAttlist >>> ( ( getDTDAttrValue a_value >>> isA (\ s -> s == "xmlns" || "xmlns:" `isPrefixOf` s) ) `guards` ( ( getDTDAttrValue a_kind >>> isA (== k_fixed) ) `guards` ( ( getDTDAttrValue a_value >>> arr (drop 6) ) -- remove "xmlns:" prefix &&& getDTDAttrValue a_default ) ) ) getElems :: (ArrowXml a, ArrowDTD a) => a XmlTree String getElems = deep isDTDElement >>> getDTDAttrValue a_name getAttrs :: (ArrowXml a, ArrowDTD a) => a XmlTree String getAttrs = deep isDTDAttlist >>> getDTDAttrValue a_value -- code generation ------------------------------------------------------------ genModHead :: String -> String genModHead rootElem = code [ sepl , "--" , "-- don't edit this module" , "-- generated with " ++ progName , "-- simple access function for Haskell XML Toolbox" , "-- generated from DTD of document: " ++ show src , "" , "module " ++ rootElem ++ " ( module " ++ rootElem ++ " )" , "where" , "" , "import Text.XML.HXT.Core (XmlTree, ArrowXml, (>>>))" , "import qualified Text.XML.HXT.Core as X (attr, eelem, getAttrValue, hasAttr, hasName, isElem, sattr)" ] genNSCode :: String -> String -> String genNSCode prefix ns = code [ ns' ++ "\t:: String" , ns' ++ "\t= " ++ show ns ] where ns' = nsPrefix ++ nn (if null prefix then nsDefault else prefix) genElemCode :: String -> String genElemCode n = code [ comm ("arrows for element " ++ show n) , tagN ++ "\t:: String" , tagN ++ "\t= " ++ show n , "" , isN ++ "\t:: ArrowXml a => a XmlTree XmlTree" , isN ++ "\t= X.isElem >>> X.hasName " ++ tagN , "" , mkN ++ "\t:: ArrowXml a => a n XmlTree" , mkN ++ "\t= X.eelem " ++ tagN ] where tagN = tagPrefix ++ nn n isN = isPrefix ++ nn n mkN = mkPrefix ++ nn n genAttrCode :: String -> String genAttrCode n = code [ comm ("arrows for attribute " ++ show n) , attN ++ "\t:: String" , attN ++ "\t= " ++ show n , "" , hasN ++ "\t:: ArrowXml a => a XmlTree XmlTree" , hasN ++ "\t= X.hasAttr " ++ attN , "" , getN ++ "\t:: ArrowXml a => a XmlTree String" , getN ++ "\t= X.getAttrValue " ++ attN , "" , mkN ++ "\t:: ArrowXml a => a n XmlTree -> a n XmlTree" , mkN ++ "\t= X.attr " ++ attN , "" , mksN ++ "\t:: ArrowXml a => String -> a n XmlTree" , mksN ++ "\t= X.sattr " ++ attN ] where attN = attPrefix ++ nn n hasN = hasPrefix ++ nn n getN = getPrefix ++ nn n ++ nn "value" mkN = mkAttPrefix ++ nn n mksN = mkSAttPrefix ++ nn n genModFoot :: String -> String genModFoot rootElem = comm ( "end of module " ++ rootElem) -- string manipulation -------------------------------------------------- code :: [String] -> String code = concatMap (++ "\n") comm :: String -> String comm cm = code [ "", sepl, "--", "-- " ++ cm, ""] sepl :: String sepl = "-- ----------------------------------------" moduleName :: String -> String moduleName rootElem = modname . (\ x -> if null x then rootElem else x) . getConfigAttr "output_file" $ config modname = (\ x -> toUpper (head x) : tail x) . reverse . (\ n -> if '.' `elem` n -- remove extension then drop 1 . dropWhile (/= '.') $ n else n ) . takeWhile (/= '/') -- remove dir path . reverse nn :: String -> String nn = trInitial . concatMap nc -- normalize names nc :: Char -> String nc c | c `elem` ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_" = [c] | c == ':' || c == '-' = "_" | otherwise = ("_" ++) . show . fromEnum $ c trInitial :: String -> String trInitial str | null str = str | underLn = '_' : str | upperCs = toUpper (head str) : tail str | otherwise = str upperCs, underLn {-, nsAware -} :: Bool upperCs = (== "1") . getConfigAttr uppercaseInitials $ config underLn = (== "1") . getConfigAttr prefixUnderline $ config _nsAware = (== "1") . getConfigAttr namespaceAware $ config -- ------------------------------------------------------------ -- -- the boring option definition and evaluation part -- -- see doc for System.Console.GetOpt progName :: String progName = "DTDtoHXT" options :: [OptDescr SysConfig] options = selectOptions [ a_help ] generalOptions ++ selectOptions [ a_trace , a_proxy , a_encoding , a_validate , a_check_namespaces ] inputOptions ++ selectOptions [ "output-file" ] outputOptions ++ [ Option "u" [prefixUnderline] (NoArg $ withSysAttr prefixUnderline "1") "separate tag and attribute names with a '_'" , Option "U" [uppercaseInitials] (NoArg $ withSysAttr uppercaseInitials "1") "transform the first char of tag and attribute names to uppercase" , Option "N" [namespaceAware] (NoArg $ withSysAttr namespaceAware "1") "filter are namespace aware, if namespace attributes occur in the DTD" ] ++ showOptions usage :: [String] -> IO a usage errl | null errl = do hPutStrLn stdout use exitProg False | otherwise = do hPutStrLn stderr (concat errl ++ "\n" ++ use) exitProg True where header = "DTDtoHXml - Generation of access function for the Haskell XML Toolbox from a DTD\n" ++ "Usage: " ++ progName ++ " [OPTION...] [URI or FILE]" use = usageInfo header options cmdlineOpts :: [String] -> IO (SysConfigList, String) cmdlineOpts argv = case (getOpt Permute options argv) of (ol,n,[] ) -> do sa <- src n help (getConfigAttr a_help ol) return (ol, sa) (_,_,errs) -> usage errs where src [uri] = return uri src [] = usage ["input file/uri missing"] src _ = usage ["only one input url or file allowed\n"] help "1" = usage [] help _ = return () -- ------------------------------------------------------------ hxt-9.3.1.15/examples/xhtml/0000755000000000000000000000000012465166667013736 5ustar0000000000000000hxt-9.3.1.15/examples/xhtml/xhtml1-strict.dtd0000644000000000000000000006270612465166667017171 0ustar0000000000000000 %HTMLlat1; %HTMLsymbol; %HTMLspecial; hxt-9.3.1.15/examples/xhtml/xhtml-symbol.ent0000644000000000000000000003345712465166667017121 0ustar0000000000000000 hxt-9.3.1.15/examples/xhtml/xhtml1-transitional.dtd0000644000000000000000000007676512465166667020402 0ustar0000000000000000 %HTMLlat1; %HTMLsymbol; %HTMLspecial; hxt-9.3.1.15/examples/xhtml/xhtml-special.ent0000644000000000000000000001006012465166667017215 0ustar0000000000000000 hxt-9.3.1.15/examples/xhtml/xhtml.xml0000644000000000000000000015503112465166667015621 0ustar0000000000000000 XHTML 1.0: The Extensible HyperText Markup Language

W3C

XHTML 1.0: The Extensible HyperText Markup Language

A Reformulation of HTML 4 in XML 1.0

W3C Recommendation 26 January 2000

This version:
http://www.w3.org/TR/2000/REC-xhtml1-20000126
(Postscript version, PDF version, ZIP archive, or Gzip'd TAR archive)
Latest version:
http://www.w3.org/TR/xhtml1
Previous version:
http://www.w3.org/TR/1999/PR-xhtml1-19991210
Authors:
See acknowledgements.

Abstract

This specification defines XHTML 1.0, a reformulation of HTML 4 as an XML 1.0 application, and three DTDs corresponding to the ones defined by HTML 4. The semantics of the elements and their attributes are defined in the W3C Recommendation for HTML 4. These semantics provide the foundation for future extensibility of XHTML. Compatibility with existing HTML user agents is possible by following a small set of guidelines.

Status of this document

This section describes the status of this document at the time of its publication. Other documents may supersede this document. The latest status of this document series is maintained at the W3C.

This document has been reviewed by W3C Members and other interested parties and has been endorsed by the Director as a W3C Recommendation. It is a stable document and may be used as reference material or cited as a normative reference from another document. W3C's role in making the Recommendation is to draw attention to the specification and to promote its widespread deployment. This enhances the functionality and interoperability of the Web.

This document has been produced as part of the W3C HTML Activity. The goals of the HTML Working Group (members only) are discussed in the HTML Working Group charter (members only).

A list of current W3C Recommendations and other technical documents can be found at http://www.w3.org/TR.

Public discussion on HTML features takes place on the mailing list www-html@w3.org (archive).

Please report errors in this document to www-html-editor@w3.org.

The list of known errors in this specification is available at http://www.w3.org/2000/01/REC-xhtml1-20000126-errata.

Contents

1. What is XHTML?

XHTML is a family of current and future document types and modules that reproduce, subset, and extend HTML 4 [HTML]. XHTML family document types are XML based, and ultimately are designed to work in conjunction with XML-based user agents. The details of this family and its evolution are discussed in more detail in the section on Future Directions.

XHTML 1.0 (this specification) is the first document type in the XHTML family. It is a reformulation of the three HTML 4 document types as applications of XML 1.0 [XML]. It is intended to be used as a language for content that is both XML-conforming and, if some simple guidelines are followed, operates in HTML 4 conforming user agents. Developers who migrate their content to XHTML 1.0 will realize the following benefits:

  • XHTML documents are XML conforming. As such, they are readily viewed, edited, and validated with standard XML tools.
  • XHTML documents can be written to to operate as well or better than they did before in existing HTML 4-conforming user agents as well as in new, XHTML 1.0 conforming user agents.
  • XHTML documents can utilize applications (e.g. scripts and applets) that rely upon either the HTML Document Object Model or the XML Document Object Model [DOM].
  • As the XHTML family evolves, documents conforming to XHTML 1.0 will be more likely to interoperate within and among various XHTML environments.

The XHTML family is the next step in the evolution of the Internet. By migrating to XHTML today, content developers can enter the XML world with all of its attendant benefits, while still remaining confident in their content's backward and future compatibility.

1.1 What is HTML 4?

HTML 4 [HTML] is an SGML (Standard Generalized Markup Language) application conforming to International Standard ISO 8879, and is widely regarded as the standard publishing language of the World Wide Web.

SGML is a language for describing markup languages, particularly those used in electronic document exchange, document management, and document publishing. HTML is an example of a language defined in SGML.

SGML has been around since the middle 1980's and has remained quite stable. Much of this stability stems from the fact that the language is both feature-rich and flexible. This flexibility, however, comes at a price, and that price is a level of complexity that has inhibited its adoption in a diversity of environments, including the World Wide Web.

HTML, as originally conceived, was to be a language for the exchange of scientific and other technical documents, suitable for use by non-document specialists. HTML addressed the problem of SGML complexity by specifying a small set of structural and semantic tags suitable for authoring relatively simple documents. In addition to simplifying the document structure, HTML added support for hypertext. Multimedia capabilities were added later.

In a remarkably short space of time, HTML became wildly popular and rapidly outgrew its original purpose. Since HTML's inception, there has been rapid invention of new elements for use within HTML (as a standard) and for adapting HTML to vertical, highly specialized, markets. This plethora of new elements has led to compatibility problems for documents across different platforms.

As the heterogeneity of both software and platforms rapidly proliferate, it is clear that the suitability of 'classic' HTML 4 for use on these platforms is somewhat limited.

1.2 What is XML?

XML is the shorthand for Extensible Markup Language, and is an acronym of Extensible Markup Language [XML].

XML was conceived as a means of regaining the power and flexibility of SGML without most of its complexity. Although a restricted form of SGML, XML nonetheless preserves most of SGML's power and richness, and yet still retains all of SGML's commonly used features.

While retaining these beneficial features, XML removes many of the more complex features of SGML that make the authoring and design of suitable software both difficult and costly.

1.3 Why the need for XHTML?

The benefits of migrating to XHTML 1.0 are described above. Some of the benefits of migrating to XHTML in general are:

  • Document developers and user agent designers are constantly discovering new ways to express their ideas through new markup. In XML, it is relatively easy to introduce new elements or additional element attributes. The XHTML family is designed to accommodate these extensions through XHTML modules and techniques for developing new XHTML-conforming modules (described in the forthcoming XHTML Modularization specification). These modules will permit the combination of existing and new feature sets when developing content and when designing new user agents.
  • Alternate ways of accessing the Internet are constantly being introduced. Some estimates indicate that by the year 2002, 75% of Internet document viewing will be carried out on these alternate platforms. The XHTML family is designed with general user agent interoperability in mind. Through a new user agent and document profiling mechanism, servers, proxies, and user agents will be able to perform best effort content transformation. Ultimately, it will be possible to develop XHTML-conforming content that is usable by any XHTML-conforming user agent.

2. Definitions

2.1 Terminology

The following terms are used in this specification. These terms extend the definitions in [RFC2119] in ways based upon similar definitions in ISO/IEC 9945-1:1990 [POSIX.1]:

Implementation-defined
A value or behavior is implementation-defined when it is left to the implementation to define [and document] the corresponding requirements for correct document construction.
May
With respect to implementations, the word "may" is to be interpreted as an optional feature that is not required in this specification but can be provided. With respect to Document Conformance, the word "may" means that the optional feature must not be used. The term "optional" has the same definition as "may".
Must
In this specification, the word "must" is to be interpreted as a mandatory requirement on the implementation or on Strictly Conforming XHTML Documents, depending upon the context. The term "shall" has the same definition as "must".
Reserved
A value or behavior is unspecified, but it is not allowed to be used by Conforming Documents nor to be supported by a Conforming User Agents.
Should
With respect to implementations, the word "should" is to be interpreted as an implementation recommendation, but not a requirement. With respect to documents, the word "should" is to be interpreted as recommended programming practice for documents and a requirement for Strictly Conforming XHTML Documents.
Supported
Certain facilities in this specification are optional. If a facility is supported, it behaves as specified by this specification.
Unspecified
When a value or behavior is unspecified, the specification defines no portability requirements for a facility on an implementation even when faced with a document that uses the facility. A document that requires specific behavior in such an instance, rather than tolerating any behavior when using that facility, is not a Strictly Conforming XHTML Document.

2.2 General Terms

Attribute
An attribute is a parameter to an element declared in the DTD. An attribute's type and value range, including a possible default value, are defined in the DTD.
DTD
A DTD, or document type definition, is a collection of XML declarations that, as a collection, defines the legal structure, elements, and attributes that are available for use in a document that complies to the DTD.
Document
A document is a stream of data that, after being combined with any other streams it references, is structured such that it holds information contained within elements that are organized as defined in the associated DTD. See Document Conformance for more information.
Element
An element is a document structuring unit declared in the DTD. The element's content model is defined in the DTD, and additional semantics may be defined in the prose description of the element.
Facilities
Functionality includes elements, attributes, and the semantics associated with those elements and attributes. An implementation supporting that functionality is said to provide the necessary facilities.
Implementation
An implementation is a system that provides collection of facilities and services that supports this specification. See User Agent Conformance for more information.
Parsing
Parsing is the act whereby a document is scanned, and the information contained within the document is filtered into the context of the elements in which the information is structured.
Rendering
Rendering is the act whereby the information in a document is presented. This presentation is done in the form most appropriate to the environment (e.g. aurally, visually, in print).
User Agent
A user agent is an implementation that retrieves and processes XHTML documents. See User Agent Conformance for more information.
Validation
Validation is a process whereby documents are verified against the associated DTD, ensuring that the structure, use of elements, and use of attributes are consistent with the definitions in the DTD.
Well-formed
A document is well-formed when it is structured according to the rules defined in Section 2.1 of the XML 1.0 Recommendation [XML]. Basically, this definition states that elements, delimited by their start and end tags, are nested properly within one another.

3. Normative Definition of XHTML 1.0

3.1 Document Conformance

This version of XHTML provides a definition of strictly conforming XHTML documents, which are restricted to tags and attributes from the XHTML namespace. See Section 3.1.2 for information on using XHTML with other namespaces, for instance, to include metadata expressed in RDF within XHTML documents.

3.1.1 Strictly Conforming Documents

A Strictly Conforming XHTML Document is a document that requires only the facilities described as mandatory in this specification. Such a document must meet all of the following criteria:

  1. It must validate against one of the three DTDs found in Appendix A.

  2. The root element of the document must be <html>.

  3. The root element of the document must designate the XHTML namespace using the xmlns attribute [XMLNAMES]. The namespace for XHTML is defined to be http://www.w3.org/1999/xhtml.

  4. There must be a DOCTYPE declaration in the document prior to the root element. The public identifier included in the DOCTYPE declaration must reference one of the three DTDs found in Appendix A using the respective Formal Public Identifier. The system identifier may be changed to reflect local system conventions.

    <!DOCTYPE html
         PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
         "DTD/xhtml1-strict.dtd">
    
    <!DOCTYPE html
         PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
         "DTD/xhtml1-transitional.dtd">
    
    <!DOCTYPE html
         PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
         "DTD/xhtml1-frameset.dtd">
    

Here is an example of a minimal XHTML document.

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html
     PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <title>Virtual Library</title>
  </head>
  <body>
    <p>Moved to <a href="http://vlib.org/">vlib.org</a>.</p>
  </body>
</html>

Note that in this example, the XML declaration is included. An XML declaration like the one above is not required in all XML documents. XHTML document authors are strongly encouraged to use XML declarations in all their documents. Such a declaration is required when the character encoding of the document is other than the default UTF-8 or UTF-16.

3.1.2 Using XHTML with other namespaces

The XHTML namespace may be used with other XML namespaces as per [XMLNAMES], although such documents are not strictly conforming XHTML 1.0 documents as defined above. Future work by W3C will address ways to specify conformance for documents involving multiple namespaces.

The following example shows the way in which XHTML 1.0 could be used in conjunction with the MathML Recommendation:

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <title>A Math Example</title>
  </head>
  <body>
    <p>The following is MathML markup:</p>
    <math xmlns="http://www.w3.org/1998/Math/MathML">
      <apply> <log/>
        <logbase>
          <cn> 3 </cn>
        </logbase>
        <ci> x </ci>
      </apply>
    </math>
  </body>
</html>

The following example shows the way in which XHTML 1.0 markup could be incorporated into another XML namespace:

<?xml version="1.0" encoding="UTF-8"?>
<!-- initially, the default namespace is "books" -->
<book xmlns='urn:loc.gov:books'
    xmlns:isbn='urn:ISBN:0-395-36341-6' xml:lang="en" lang="en">
  <title>Cheaper by the Dozen</title>
  <isbn:number>1568491379</isbn:number>
  <notes>
    <!-- make HTML the default namespace for a hypertext commentary -->
    <p xmlns='http://www.w3.org/1999/xhtml'>
        This is also available <a href="http://www.w3.org/">online</a>.
    </p>
  </notes>
</book>

3.2 User Agent Conformance

A conforming user agent must meet all of the following criteria:

  1. In order to be consistent with the XML 1.0 Recommendation [XML], the user agent must parse and evaluate an XHTML document for well-formedness. If the user agent claims to be a validating user agent, it must also validate documents against their referenced DTDs according to [XML].
  2. When the user agent claims to support facilities defined within this specification or required by this specification through normative reference, it must do so in ways consistent with the facilities' definition.
  3. When a user agent processes an XHTML document as generic XML, it shall only recognize attributes of type ID (e.g. the id attribute on most XHTML elements) as fragment identifiers.
  4. If a user agent encounters an element it does not recognize, it must render the element's content.
  5. If a user agent encounters an attribute it does not recognize, it must ignore the entire attribute specification (i.e., the attribute and its value).
  6. If a user agent encounters an attribute value it doesn't recognize, it must use the default attribute value.
  7. If it encounters an entity reference (other than one of the predefined entities) for which the User Agent has processed no declaration (which could happen if the declaration is in the external subset which the User Agent hasn't read), the entity reference should be rendered as the characters (starting with the ampersand and ending with the semi-colon) that make up the entity reference.
  8. When rendering content, User Agents that encounter characters or character entity references that are recognized but not renderable should display the document in such a way that it is obvious to the user that normal rendering has not taken place.
  9. The following characters are defined in [XML] as whitespace characters:
    • Space (&#x0020;)
    • Tab (&#x0009;)
    • Carriage return (&#x000D;)
    • Line feed (&#x000A;)

    The XML processor normalizes different system's line end codes into one single line-feed character, that is passed up to the application. The XHTML user agent in addition, must treat the following characters as whitespace:

    • Form feed (&#x000C;)
    • Zero-width space (&#x200B;)

    In elements where the 'xml:space' attribute is set to 'preserve', the user agent must leave all whitespace characters intact (with the exception of leading and trailing whitespace characters, which should be removed). Otherwise, whitespace is handled according to the following rules:

    • All whitespace surrounding block elements should be removed.
    • Comments are removed entirely and do not affect whitespace handling. One whitespace character on either side of a comment is treated as two white space characters.
    • Leading and trailing whitespace inside a block element must be removed.
    • Line feed characters within a block element must be converted into a space (except when the 'xml:space' attribute is set to 'preserve').
    • A sequence of white space characters must be reduced to a single space character (except when the 'xml:space' attribute is set to 'preserve').
    • With regard to rendition, the User Agent should render the content in a manner appropriate to the language in which the content is written. In languages whose primary script is Latinate, the ASCII space character is typically used to encode both grammatical word boundaries and typographic whitespace; in languages whose script is related to Nagari (e.g., Sanskrit, Thai, etc.), grammatical boundaries may be encoded using the ZW 'space' character, but will not typically be represented by typographic whitespace in rendered output; languages using Arabiform scripts may encode typographic whitespace using a space character, but may also use the ZW space character to delimit 'internal' grammatical boundaries (what look like words in Arabic to an English eye frequently encode several words, e.g. 'kitAbuhum' = 'kitAbu-hum' = 'book them' == their book); and languages in the Chinese script tradition typically neither encode such delimiters nor use typographic whitespace in this way.

    Whitespace in attribute values is processed according to [XML].

4. Differences with HTML 4

Due to the fact that XHTML is an XML application, certain practices that were perfectly legal in SGML-based HTML 4 [HTML] must be changed.

4.1 Documents must be well-formed

Well-formedness is a new concept introduced by [XML]. Essentially this means that all elements must either have closing tags or be written in a special form (as described below), and that all the elements must nest.

Although overlapping is illegal in SGML, it was widely tolerated in existing browsers.

CORRECT: nested elements.

<p>here is an emphasized <em>paragraph</em>.</p>

INCORRECT: overlapping elements

<p>here is an emphasized <em>paragraph.</p></em>

4.2 Element and attribute names must be in lower case

XHTML documents must use lower case for all HTML element and attribute names. This difference is necessary because XML is case-sensitive e.g. <li> and <LI> are different tags.

4.3 For non-empty elements, end tags are required

In SGML-based HTML 4 certain elements were permitted to omit the end tag; with the elements that followed implying closure. This omission is not permitted in XML-based XHTML. All elements other than those declared in the DTD as EMPTY must have an end tag.

CORRECT: terminated elements

<p>here is a paragraph.</p><p>here is another paragraph.</p>

INCORRECT: unterminated elements

<p>here is a paragraph.<p>here is another paragraph.

4.4 Attribute values must always be quoted

All attribute values must be quoted, even those which appear to be numeric.

CORRECT: quoted attribute values

<table rows="3">

INCORRECT: unquoted attribute values

<table rows=3>

4.5 Attribute Minimization

XML does not support attribute minimization. Attribute-value pairs must be written in full. Attribute names such as compact and checked cannot occur in elements without their value being specified.

CORRECT: unminimized attributes

<dl compact="compact">

INCORRECT: minimized attributes

<dl compact>

4.6 Empty Elements

Empty elements must either have an end tag or the start tag must end with />. For instance, <br/> or <hr></hr>. See HTML Compatibility Guidelines for information on ways to ensure this is backward compatible with HTML 4 user agents.

CORRECT: terminated empty tags

<br/><hr/>

INCORRECT: unterminated empty tags

<br><hr>

4.7 Whitespace handling in attribute values

In attribute values, user agents will strip leading and trailing whitespace from attribute values and map sequences of one or more whitespace characters (including line breaks) to a single inter-word space (an ASCII space character for western scripts). See Section 3.3.3 of [XML].

4.8 Script and Style elements

In XHTML, the script and style elements are declared as having #PCDATA content. As a result, < and & will be treated as the start of markup, and entities such as &lt; and &amp; will be recognized as entity references by the XML processor to < and & respectively. Wrapping the content of the script or style element within a CDATA marked section avoids the expansion of these entities.

<script>
 <![CDATA[
 ... unescaped script content ...
 ]]>
 </script>

CDATA sections are recognized by the XML processor and appear as nodes in the Document Object Model, see Section 1.3 of the DOM Level 1 Recommendation [DOM].

An alternative is to use external script and style documents.

4.9 SGML exclusions

SGML gives the writer of a DTD the ability to exclude specific elements from being contained within an element. Such prohibitions (called "exclusions") are not possible in XML.

For example, the HTML 4 Strict DTD forbids the nesting of an 'a' element within another 'a' element to any descendant depth. It is not possible to spell out such prohibitions in XML. Even though these prohibitions cannot be defined in the DTD, certain elements should not be nested. A summary of such elements and the elements that should not be nested in them is found in the normative Appendix B.

4.10 The elements with 'id' and 'name' attributes

HTML 4 defined the name attribute for the elements a, applet, form, frame, iframe, img, and map. HTML 4 also introduced the id attribute. Both of these attributes are designed to be used as fragment identifiers.

In XML, fragment identifiers are of type ID, and there can only be a single attribute of type ID per element. Therefore, in XHTML 1.0 the id attribute is defined to be of type ID. In order to ensure that XHTML 1.0 documents are well-structured XML documents, XHTML 1.0 documents MUST use the id attribute when defining fragment identifiers, even on elements that historically have also had a name attribute. See the HTML Compatibility Guidelines for information on ensuring such anchors are backwards compatible when serving XHTML documents as media type text/html.

Note that in XHTML 1.0, the name attribute of these elements is formally deprecated, and will be removed in a subsequent version of XHTML.

5. Compatibility Issues

Although there is no requirement for XHTML 1.0 documents to be compatible with existing user agents, in practice this is easy to accomplish. Guidelines for creating compatible documents can be found in Appendix C.

5.1 Internet Media Type

As of the publication of this recommendation, the general recommended MIME labeling for XML-based applications has yet to be resolved.

However, XHTML Documents which follow the guidelines set forth in Appendix C, "HTML Compatibility Guidelines" may be labeled with the Internet Media Type "text/html", as they are compatible with most HTML browsers. This document makes no recommendation about MIME labeling of other XHTML documents.

6. Future Directions

XHTML 1.0 provides the basis for a family of document types that will extend and subset XHTML, in order to support a wide range of new devices and applications, by defining modules and specifying a mechanism for combining these modules. This mechanism will enable the extension and sub-setting of XHTML 1.0 in a uniform way through the definition of new modules.

6.1 Modularizing HTML

As the use of XHTML moves from the traditional desktop user agents to other platforms, it is clear that not all of the XHTML elements will be required on all platforms. For example a hand held device or a cell-phone may only support a subset of XHTML elements.

The process of modularization breaks XHTML up into a series of smaller element sets. These elements can then be recombined to meet the needs of different communities.

These modules will be defined in a later W3C document.

6.2 Subsets and Extensibility

Modularization brings with it several advantages:

  • It provides a formal mechanism for sub-setting XHTML.

  • It provides a formal mechanism for extending XHTML.

  • It simplifies the transformation between document types.

  • It promotes the reuse of modules in new document types.

6.3 Document Profiles

A document profile specifies the syntax and semantics of a set of documents. Conformance to a document profile provides a basis for interoperability guarantees. The document profile specifies the facilities required to process documents of that type, e.g. which image formats can be used, levels of scripting, style sheet support, and so on.

For product designers this enables various groups to define their own standard profile.

For authors this will obviate the need to write several different versions of documents for different clients.

For special groups such as chemists, medical doctors, or mathematicians this allows a special profile to be built using standard HTML elements plus a group of elements geared to the specialist's needs.

Appendix A. DTDs

This appendix is normative.

These DTDs and entity sets form a normative part of this specification. The complete set of DTD files together with an XML declaration and SGML Open Catalog is included in the zip file for this specification.

A.1 Document Type Definitions

These DTDs approximate the HTML 4 DTDs. It is likely that when the DTDs are modularized, a method of DTD construction will be employed that corresponds more closely to HTML 4.

A.2 Entity Sets

The XHTML entity sets are the same as for HTML 4, but have been modified to be valid XML 1.0 entity declarations. Note the entity for the Euro currency sign (&euro; or &#8364; or &#x20AC;) is defined as part of the special characters.

Appendix B. Element Prohibitions

This appendix is normative.

The following elements have prohibitions on which elements they can contain (see Section 4.9). This prohibition applies to all depths of nesting, i.e. it contains all the descendant elements.

a
cannot contain other a elements.
pre
cannot contain the img, object, big, small, sub, or sup elements.
button
cannot contain the input, select, textarea, label, button, form, fieldset, iframe or isindex elements.
label
cannot contain other label elements.
form
cannot contain other form elements.

Appendix C. HTML Compatibility Guidelines

This appendix is informative.

This appendix summarizes design guidelines for authors who wish their XHTML documents to render on existing HTML user agents.

C.1 Processing Instructions

Be aware that processing instructions are rendered on some user agents. However, also note that when the XML declaration is not included in a document, the document can only use the default character encodings UTF-8 or UTF-16.

C.2 Empty Elements

Include a space before the trailing / and > of empty elements, e.g. <br />, <hr /> and <img src="karen.jpg" alt="Karen" />. Also, use the minimized tag syntax for empty elements, e.g. <br />, as the alternative syntax <br></br> allowed by XML gives uncertain results in many existing user agents.

C.3 Element Minimization and Empty Element Content

Given an empty instance of an element whose content model is not EMPTY (for example, an empty title or paragraph) do not use the minimized form (e.g. use <p> </p> and not <p />).

C.4 Embedded Style Sheets and Scripts

Use external style sheets if your style sheet uses < or & or ]]> or --. Use external scripts if your script uses < or & or ]]> or --. Note that XML parsers are permitted to silently remove the contents of comments. Therefore, the historical practice of "hiding" scripts and style sheets within comments to make the documents backward compatible is likely to not work as expected in XML-based implementations.

C.5 Line Breaks within Attribute Values

Avoid line breaks and multiple whitespace characters within attribute values. These are handled inconsistently by user agents.

C.6 Isindex

Don't include more than one isindex element in the document head. The isindex element is deprecated in favor of the input element.

C.7 The lang and xml:lang Attributes

Use both the lang and xml:lang attributes when specifying the language of an element. The value of the xml:lang attribute takes precedence.

C.8 Fragment Identifiers

In XML, URIs [RFC2396] that end with fragment identifiers of the form "#foo" do not refer to elements with an attribute name="foo"; rather, they refer to elements with an attribute defined to be of type ID, e.g., the id attribute in HTML 4. Many existing HTML clients don't support the use of ID-type attributes in this way, so identical values may be supplied for both of these attributes to ensure maximum forward and backward compatibility (e.g., <a id="foo" name="foo">...</a>).

Further, since the set of legal values for attributes of type ID is much smaller than for those of type CDATA, the type of the name attribute has been changed to NMTOKEN. This attribute is constrained such that it can only have the same values as type ID, or as the Name production in XML 1.0 Section 2.5, production 5. Unfortunately, this constraint cannot be expressed in the XHTML 1.0 DTDs. Because of this change, care must be taken when converting existing HTML documents. The values of these attributes must be unique within the document, valid, and any references to these fragment identifiers (both internal and external) must be updated should the values be changed during conversion.

Finally, note that XHTML 1.0 has deprecated the name attribute of the a, applet, form, frame, iframe, img, and map elements, and it will be removed from XHTML in subsequent versions.

C.9 Character Encoding

To specify a character encoding in the document, use both the encoding attribute specification on the xml declaration (e.g. <?xml version="1.0" encoding="EUC-JP"?>) and a meta http-equiv statement (e.g. <meta http-equiv="Content-type" content='text/html; charset="EUC-JP"' />). The value of the encoding attribute of the xml processing instruction takes precedence.

C.10 Boolean Attributes

Some HTML user agents are unable to interpret boolean attributes when these appear in their full (non-minimized) form, as required by XML 1.0. Note this problem doesn't affect user agents compliant with HTML 4. The following attributes are involved: compact, nowrap, ismap, declare, noshade, checked, disabled, readonly, multiple, selected, noresize, defer.

C.11 Document Object Model and XHTML

The Document Object Model level 1 Recommendation [DOM] defines document object model interfaces for XML and HTML 4. The HTML 4 document object model specifies that HTML element and attribute names are returned in upper-case. The XML document object model specifies that element and attribute names are returned in the case they are specified. In XHTML 1.0, elements and attributes are specified in lower-case. This apparent difference can be addressed in two ways:

  1. Applications that access XHTML documents served as Internet media type text/html via the DOM can use the HTML DOM, and can rely upon element and attribute names being returned in upper-case from those interfaces.
  2. Applications that access XHTML documents served as Internet media types text/xml or application/xml can also use the XML DOM. Elements and attributes will be returned in lower-case. Also, some XHTML elements may or may not appear in the object tree because they are optional in the content model (e.g. the tbody element within table). This occurs because in HTML 4 some elements were permitted to be minimized such that their start and end tags are both omitted (an SGML feature). This is not possible in XML. Rather than require document authors to insert extraneous elements, XHTML has made the elements optional. Applications need to adapt to this accordingly.

C.12 Using Ampersands in Attribute Values

When an attribute value contains an ampersand, it must be expressed as a character entity reference (e.g. "&amp;"). For example, when the href attribute of the a element refers to a CGI script that takes parameters, it must be expressed as http://my.site.dom/cgi-bin/myscript.pl?class=guest&amp;name=user rather than as http://my.site.dom/cgi-bin/myscript.pl?class=guest&name=user.

C.13 Cascading Style Sheets (CSS) and XHTML

The Cascading Style Sheets level 2 Recommendation [CSS2] defines style properties which are applied to the parse tree of the HTML or XML document. Differences in parsing will produce different visual or aural results, depending on the selectors used. The following hints will reduce this effect for documents which are served without modification as both media types:

  1. CSS style sheets for XHTML should use lower case element and attribute names.
  2. In tables, the tbody element will be inferred by the parser of an HTML user agent, but not by the parser of an XML user agent. Therefore you should always explicitly add a tbody element if it is referred to in a CSS selector.
  3. Within the XHTML name space, user agents are expected to recognize the "id" attribute as an attribute of type ID. Therefore, style sheets should be able to continue using the shorthand "#" selector syntax even if the user agent does not read the DTD.
  4. Within the XHTML name space, user agents are expected to recognize the "class" attribute. Therefore, style sheets should be able to continue using the shorthand "." selector syntax.
  5. CSS defines different conformance rules for HTML and XML documents; be aware that the HTML rules apply to XHTML documents delivered as HTML and the XML rules apply to XHTML documents delivered as XML.

Appendix D. Acknowledgements

This appendix is informative.

This specification was written with the participation of the members of the W3C HTML working group:

Steven Pemberton, CWI (HTML Working Group Chair)
Murray Altheim, Sun Microsystems
Daniel Austin, AskJeeves (CNET: The Computer Network through July 1999)
Frank Boumphrey, HTML Writers Guild
John Burger, Mitre
Andrew W. Donoho, IBM
Sam Dooley, IBM
Klaus Hofrichter, GMD
Philipp Hoschka, W3C
Masayasu Ishikawa, W3C
Warner ten Kate, Philips Electronics
Peter King, Phone.com
Paula Klante, JetForm
Shin'ichi Matsui, Panasonic (W3C visiting engineer through September 1999)
Shane McCarron, Applied Testing and Technology (The Open Group through August 1999)
Ann Navarro, HTML Writers Guild
Zach Nies, Quark
Dave Raggett, W3C/HP (W3C lead for HTML)
Patrick Schmitz, Microsoft
Sebastian Schnitzenbaumer, Stack Overflow
Peter Stark, Phone.com
Chris Wilson, Microsoft
Ted Wugofski, Gateway 2000
Dan Zigmond, WebTV Networks

Appendix E. References

This appendix is informative.

[CSS2]
"Cascading Style Sheets, level 2 (CSS2) Specification", B. Bos, H. W. Lie, C. Lilley, I. Jacobs, 12 May 1998.
Latest version available at: http://www.w3.org/TR/REC-CSS2
[DOM]
"Document Object Model (DOM) Level 1 Specification", Lauren Wood et al., 1 October 1998.
Latest version available at: http://www.w3.org/TR/REC-DOM-Level-1
[HTML]
"HTML 4.01 Specification", D. Raggett, A. Le Hors, I. Jacobs, 24 December 1999.
Latest version available at: http://www.w3.org/TR/html401
[POSIX.1]
"ISO/IEC 9945-1:1990 Information Technology - Portable Operating System Interface (POSIX) - Part 1: System Application Program Interface (API) [C Language]", Institute of Electrical and Electronics Engineers, Inc, 1990.
[RFC2046]
"RFC2046: Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types", N. Freed and N. Borenstein, November 1996.
Available at http://www.ietf.org/rfc/rfc2046.txt. Note that this RFC obsoletes RFC1521, RFC1522, and RFC1590.
[RFC2119]
"RFC2119: Key words for use in RFCs to Indicate Requirement Levels", S. Bradner, March 1997.
Available at: http://www.ietf.org/rfc/rfc2119.txt
[RFC2376]
"RFC2376: XML Media Types", E. Whitehead, M. Murata, July 1998.
Available at: http://www.ietf.org/rfc/rfc2376.txt
[RFC2396]
"RFC2396: Uniform Resource Identifiers (URI): Generic Syntax", T. Berners-Lee, R. Fielding, L. Masinter, August 1998.
This document updates RFC1738 and RFC1808.
Available at: http://www.ietf.org/rfc/rfc2396.txt
[XML]
"Extensible Markup Language (XML) 1.0 Specification", T. Bray, J. Paoli, C. M. Sperberg-McQueen, 10 February 1998.
Latest version available at: http://www.w3.org/TR/REC-xml
[XMLNAMES]
"Namespaces in XML", T. Bray, D. Hollander, A. Layman, 14 January 1999.
XML namespaces provide a simple method for qualifying names used in XML documents by associating them with namespaces identified by URI.
Latest version available at: http://www.w3.org/TR/REC-xml-names

Level Triple-A conformance icon, W3C-WAI Web Content Accessibility Guidelines 1.0

hxt-9.3.1.15/examples/xhtml/xhtml1-frameset.dtd0000644000000000000000000010033512465166667017456 0ustar0000000000000000 %HTMLlat1; %HTMLsymbol; %HTMLspecial; hxt-9.3.1.15/examples/xhtml/tmp.xml0000644000000000000000000000024112465166667015255 0ustar0000000000000000 Ä hxt-9.3.1.15/examples/xhtml/xhtml-lat1.ent0000644000000000000000000002701512465166667016446 0ustar0000000000000000 hxt-9.3.1.15/src/0000755000000000000000000000000012465166667011553 5ustar0000000000000000hxt-9.3.1.15/src/Text/0000755000000000000000000000000012465166667012477 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/0000755000000000000000000000000012465166667013137 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/0000755000000000000000000000000012465166667013602 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/Core.hs0000644000000000000000000000422612465166667015032 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Core Copyright : Copyright (C) 2006-2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable The HXT arrow interface The application programming interface to the arrow modules of the Haskell XML Toolbox. This module exports all important arrows for input, output, parsing, validating and transforming XML. It also exports all basic datatypes and functions of the toolbox. -} -- ------------------------------------------------------------ module Text.XML.HXT.Core ( module Control.Arrow.ListArrows , module Text.XML.HXT.DOM.Interface , module Text.XML.HXT.Arrow.XmlArrow , module Text.XML.HXT.Arrow.XmlState , module Text.XML.HXT.Arrow.DocumentInput , module Text.XML.HXT.Arrow.DocumentOutput , module Text.XML.HXT.Arrow.Edit , module Text.XML.HXT.Arrow.GeneralEntitySubstitution , module Text.XML.HXT.Arrow.Namespace , module Text.XML.HXT.Arrow.Pickle , module Text.XML.HXT.Arrow.ProcessDocument , module Text.XML.HXT.Arrow.ReadDocument , module Text.XML.HXT.Arrow.WriteDocument , module Text.XML.HXT.Arrow.Binary , module Text.XML.HXT.Arrow.XmlOptions , module Text.XML.HXT.Version ) where import Control.Arrow.ListArrows -- arrow classes import Data.Atom () -- import this explicitly import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.DocumentInput import Text.XML.HXT.Arrow.DocumentOutput import Text.XML.HXT.Arrow.Edit import Text.XML.HXT.Arrow.GeneralEntitySubstitution import Text.XML.HXT.Arrow.Namespace import Text.XML.HXT.Arrow.Pickle import Text.XML.HXT.Arrow.ProcessDocument import Text.XML.HXT.Arrow.ReadDocument import Text.XML.HXT.Arrow.WriteDocument import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlOptions import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlRegex () -- import this explicitly import Text.XML.HXT.Arrow.Binary import Text.XML.HXT.Version -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Version.hs0000644000000000000000000000011612465166667015561 0ustar0000000000000000module Text.XML.HXT.Version where hxt_version :: String hxt_version = "9.1.0" hxt-9.3.1.15/src/Text/XML/HXT/XMLSchema/0000755000000000000000000000000012465166667015363 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/XMLSchema/DataTypeLibW3CNames.hs0000644000000000000000000000730112465166667021363 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.XMLSchema.DataTypeLibW3C Copyright : Copyright (C) 2005-2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Version : $Id$ Datatype library for the W3C XML schema datatypes -} -- ------------------------------------------------------------ module Text.XML.HXT.XMLSchema.DataTypeLibW3CNames where -- ------------------------------------------------------------ -- | Namespace of the W3C XML schema datatype library w3cNS :: String w3cNS = "http://www.w3.org/2001/XMLSchema-datatypes" xsd_string , xsd_normalizedString , xsd_token , xsd_language , xsd_NMTOKEN , xsd_NMTOKENS , xsd_Name , xsd_NCName , xsd_ID , xsd_IDREF , xsd_IDREFS , xsd_ENTITY , xsd_ENTITIES , xsd_anyURI , xsd_QName , xsd_NOTATION , xsd_hexBinary , xsd_base64Binary , xsd_decimal , xsd_integer , xsd_nonPositiveInteger , xsd_negativeInteger , xsd_nonNegativeInteger , xsd_positiveInteger , xsd_long , xsd_int , xsd_short , xsd_byte , xsd_unsignedLong , xsd_unsignedInt , xsd_unsignedShort , xsd_unsignedByte , xsd_boolean , xsd_float , xsd_double , xsd_time , xsd_duration , xsd_date , xsd_dateTime , xsd_gDay , xsd_gMonth , xsd_gMonthDay , xsd_gYear , xsd_gYearMonth :: String xsd_string = "string" xsd_normalizedString = "normalizedString" xsd_token = "token" xsd_language = "language" xsd_NMTOKEN = "NMTOKEN" xsd_NMTOKENS = "NMTOKENS" xsd_Name = "Name" xsd_NCName = "NCName" xsd_ID = "ID" xsd_IDREF = "IDREF" xsd_IDREFS = "IDREFS" xsd_ENTITY = "ENTITY" xsd_ENTITIES = "ENTITIES" xsd_anyURI = "anyURI" xsd_QName = "QName" xsd_NOTATION = "NOTATION" xsd_hexBinary = "hexBinary" xsd_base64Binary = "base64Binary" xsd_decimal = "decimal" xsd_integer = "integer" xsd_nonPositiveInteger = "nonPositiveInteger" xsd_negativeInteger = "negativeInteger" xsd_nonNegativeInteger = "nonNegativeInteger" xsd_positiveInteger = "positiveInteger" xsd_long = "long" xsd_int = "int" xsd_short = "short" xsd_byte = "byte" xsd_unsignedLong = "unsignedLong" xsd_unsignedInt = "unsignedInt" xsd_unsignedShort = "unsignedShort" xsd_unsignedByte = "unsignedByte" xsd_boolean = "boolean" xsd_float = "float" xsd_double = "double" xsd_time = "time" xsd_duration = "duration" xsd_date = "date" xsd_dateTime = "dateTime" xsd_gDay = "gDay" xsd_gMonth = "gMonth" xsd_gMonthDay = "gMonthDay" xsd_gYear = "gYear" xsd_gYearMonth = "gYearMonth" xsd_length , xsd_maxLength , xsd_minLength , xsd_maxExclusive , xsd_minExclusive , xsd_maxInclusive , xsd_minInclusive , xsd_totalDigits , xsd_fractionDigits , xsd_pattern , xsd_enumeration , xsd_whiteSpace :: String xsd_length = "length" xsd_maxLength = "maxLength" xsd_minLength = "minLength" xsd_maxExclusive = "maxExclusive" xsd_minExclusive = "minExclusive" xsd_maxInclusive = "maxInclusive" xsd_minInclusive = "minInclusive" xsd_totalDigits = "totalDigits" xsd_fractionDigits = "fractionDigits" xsd_pattern = "pattern" xsd_enumeration = "enumeration" xsd_whiteSpace = "whiteSpace" -- ---------------------------------------- hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/0000755000000000000000000000000012465166667016230 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/TypeDefs.hs0000644000000000000000000001214712465166667020314 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.TypeDefs Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides all datatypes for DTD validation -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.TypeDefs ( module Text.XML.HXT.DTDValidation.TypeDefs , module Text.XML.HXT.DOM.Interface , module Text.XML.HXT.Arrow.XmlArrow , module Control.Arrow , module Control.Arrow.ArrowList , module Control.Arrow.ArrowIf , module Control.Arrow.ArrowState , module Control.Arrow.ArrowTree , module Control.Arrow.ListArrow , module Control.Arrow.StateListArrow ) where import Control.Arrow -- classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowState import Control.Arrow.ArrowTree import Control.Arrow.ListArrow -- arrow types import Control.Arrow.StateListArrow import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.DOM.Interface -- ------------------------------------------------------------ infixr 0 $$ type XmlArrow = LA XmlTree XmlTree type XmlArrowS = LA XmlTree XmlTrees -- ------------------------------------------------------------ dtd_name , dtd_value , dtd_type , dtd_kind , dtd_modifier , dtd_default :: Attributes -> String dtd_name = lookup1 a_name dtd_value = lookup1 a_value dtd_type = lookup1 a_type dtd_kind = lookup1 a_kind dtd_modifier = lookup1 a_modifier dtd_default = lookup1 a_default -- ------------------------------------------------------------ isUnparsedEntity :: ArrowDTD a => a XmlTree XmlTree isUnparsedEntity = filterA $ getDTDAttrl >>> isA (hasEntry k_ndata) hasDTDAttrValue :: ArrowDTD a => String -> (String -> Bool) -> a XmlTree XmlTree hasDTDAttrValue an p = filterA $ getDTDAttrl >>> isA (p . lookup1 an) isRequiredAttrKind :: ArrowDTD a => a XmlTree XmlTree isRequiredAttrKind = hasDTDAttrValue a_kind (== k_required) isDefaultAttrKind :: ArrowDTD a => a XmlTree XmlTree isDefaultAttrKind = hasDTDAttrValue a_kind (== k_default) isFixedAttrKind :: ArrowDTD a => a XmlTree XmlTree isFixedAttrKind = hasDTDAttrValue a_kind (== k_fixed) isMixedContentElement :: ArrowDTD a => a XmlTree XmlTree isMixedContentElement = hasDTDAttrValue a_type (== v_mixed) isEmptyElement :: ArrowDTD a => a XmlTree XmlTree isEmptyElement = hasDTDAttrValue a_type (== k_empty) isEnumAttrType :: ArrowDTD a => a XmlTree XmlTree isEnumAttrType = hasDTDAttrValue a_type (== k_enumeration) isIdAttrType :: ArrowDTD a => a XmlTree XmlTree isIdAttrType = hasDTDAttrValue a_type (== k_id) isIdRefAttrType :: ArrowDTD a => a XmlTree XmlTree isIdRefAttrType = hasDTDAttrValue a_type (`elem` [k_idref, k_idrefs]) isNotationAttrType :: ArrowDTD a => a XmlTree XmlTree isNotationAttrType = hasDTDAttrValue a_type (== k_notation) isAttlistOfElement :: ArrowDTD a => String -> a XmlTree XmlTree isAttlistOfElement el = isDTDAttlist >>> hasDTDAttrValue a_name (== el) valueOfDTD :: String -> XmlTree -> String valueOfDTD n = concat . runLA ( getDTDAttrl >>^ lookup1 n ) valueOf :: String -> XmlTree -> String valueOf n = concat . runLA ( getAttrValue n ) getDTDAttributes :: XmlTree -> Attributes getDTDAttributes = concat . runLA getDTDAttrl isDTDDoctypeNode :: XmlTree -> Bool isDTDDoctypeNode = not . null . runLA isDTDDoctype isDTDElementNode :: XmlTree -> Bool isDTDElementNode = not . null . runLA isDTDElement isDTDAttlistNode :: XmlTree -> Bool isDTDAttlistNode = not . null . runLA isDTDAttlist isDTDContentNode :: XmlTree -> Bool isDTDContentNode = not . null . runLA isDTDContent isDTDNameNode :: XmlTree -> Bool isDTDNameNode = not . null . runLA isDTDName isElemNode :: XmlTree -> Bool isElemNode = not . null . runLA isElem nameOfAttr :: XmlTree -> String nameOfAttr = concat . runLA (getAttrName >>^ qualifiedName) nameOfElem :: XmlTree -> String nameOfElem = concat . runLA (getElemName >>^ qualifiedName) -- | -- infix operator for applying an arrow to a list of trees -- -- * 1.parameter f : the arrow -- -- - 2.parameter ts : the list of trees -- -- - returns : list of results ($$) :: XmlArrow -> XmlTrees -> XmlTrees f $$ l = runLA (unlistA >>> f) l -- | create an error message msgToErr :: (String -> String) -> LA String XmlTree msgToErr f = mkErr $< this where mkErr "" = none mkErr s = err (f s) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/IdValidation.hs0000644000000000000000000002170212465166667021135 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.IdValidation Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides functions for checking special ID/IDREF/IDREFS constraints. Checking special ID\/IDREF\/IDREFS constraints means: - checking that all ID values are unique. - checking that all IDREF\/IDREFS values match the value of some ID attribute ID-Validation should be started before or after validating the document. First all nodes with ID attributes are collected from the document, then it is validated that values of ID attributes do not occure more than once. During a second iteration over the document it is validated that there exists an ID attribute value for IDREF\/IDREFS attribute values. -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.IdValidation ( validateIds ) where import Data.Maybe import Text.XML.HXT.DTDValidation.TypeDefs import Text.XML.HXT.DTDValidation.AttributeValueValidation -- ------------------------------------------------------------ -- | -- Lookup-table which maps element names to their validation functions. The -- validation functions are XmlFilters. type IdEnvTable = [IdEnv] type IdEnv = (ElemName, IdFct) type ElemName = String type IdFct = XmlArrow -- ------------------------------------------------------------ -- | -- Perform the validation of the ID/IDREF/IDREFS constraints. -- -- * 1.parameter dtdPart : the DTD subset (Node @DOCTYPE@) of the XmlTree -- -- - 2.parameter doc : the document subset of the XmlTree -- -- - returns : a list of errors validateIds :: XmlTree -> XmlArrow validateIds dtdPart = validateIds' $< listA (traverseTree idEnv) where idAttrTypes = runLA (getChildren >>> isIdAttrType) dtdPart elements = runLA (getChildren >>> isDTDElement) dtdPart atts = runLA (getChildren >>> isDTDAttlist) dtdPart idEnv = buildIdCollectorFcts idAttrTypes validateIds' :: XmlTrees -> XmlArrow validateIds' idNodeList = ( constA idNodeList >>> checkForUniqueIds idAttrTypes ) <+> checkIdReferences idRefEnv where idRefEnv = buildIdrefValidationFcts idAttrTypes elements atts idNodeList -- | -- Traverse the XmlTree in preorder. -- -- * 1.parameter idEnv : lookup-table which maps element names to their validation functions -- -- - returns : list of errors traverseTree :: IdEnvTable -> XmlArrow traverseTree idEnv = multi (isElem `guards` (idFct $< getName)) where idFct :: String -> XmlArrow idFct name = fromMaybe none . lookup name $ idEnv -- | -- Returns the value of an element's ID attribute. The attribute name has to be -- retrieved first from the DTD. -- -- * 1.parameter dtdPart : list of ID attribute definitions from the DTD -- -- - 2.parameter n : element which ID attribute value should be returned -- -- - returns : normalized value of the ID attribute getIdValue :: XmlTrees -> XmlTree -> String getIdValue dns = concat . runLA (single getIdValue') where getIdValue' :: LA XmlTree String getIdValue' = isElem `guards` catA (map getIdVal dns) where getIdVal dn | isDTDAttlistNode dn = hasName elemName `guards` ( getAttrValue0 attrName >>> arr (normalizeAttributeValue (Just dn)) ) | otherwise = none where al = getDTDAttributes dn elemName = dtd_name al attrName = dtd_value al -- ------------------------------------------------------------ -- | -- Build collector functions which return XTag nodes with ID attributes from -- a document. -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - returns : lookup-table which maps element names to their collector function buildIdCollectorFcts :: XmlTrees -> IdEnvTable buildIdCollectorFcts idAttrTypes = concatMap buildIdCollectorFct idAttrTypes where buildIdCollectorFct :: XmlTree -> [IdEnv] buildIdCollectorFct dn | isDTDAttlistNode dn = [(elemName, hasAttr attrName)] | otherwise = [] where al = getDTDAttributes dn elemName = dtd_name al attrName = dtd_value al -- | -- Build validation functions for checking if IDREF\/IDREFS values match a value -- of some ID attributes. -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter idNodeList : list of all XTag nodes with ID attributes -- -- - returns : lookup-table which maps element names to their validation function buildIdrefValidationFcts :: XmlTrees -> XmlTrees -> XmlTrees -> XmlTrees -> IdEnvTable buildIdrefValidationFcts idAttrTypes elements atts idNodeList = concatMap buildElemValidationFct elements where idValueList = map (getIdValue idAttrTypes) idNodeList buildElemValidationFct :: XmlTree -> [IdEnv] buildElemValidationFct dn | isDTDElementNode dn = [(elemName, buildIdrefValidationFct idRefAttrTypes)] | otherwise = [] where al = getDTDAttributes dn elemName = dtd_name al idRefAttrTypes = (isAttlistOfElement elemName >>> isIdRefAttrType) $$ atts buildIdrefValidationFct :: XmlTrees -> XmlArrow buildIdrefValidationFct = catA . map buildIdref buildIdref :: XmlTree -> XmlArrow buildIdref dn | isDTDAttlistNode dn = isElem >>> (checkIdref $< getName) | otherwise = none where al = getDTDAttributes dn attrName = dtd_value al attrType = dtd_type al checkIdref :: String -> XmlArrow checkIdref name = hasAttr attrName `guards` ( checkIdVal $< getAttrValue attrName ) where checkIdVal :: String -> XmlArrow checkIdVal av | attrType == k_idref = checkValueDeclared attrValue | null valueList = err ( "Attribute " ++ show attrName ++ " of Element " ++ show name ++ " must have at least one name." ) | otherwise = catA . map checkValueDeclared $ valueList where valueList = words attrValue attrValue = normalizeAttributeValue (Just dn) av checkValueDeclared :: String -> XmlArrow checkValueDeclared attrValue = if attrValue `elem` idValueList then none else err ( "An Element with identifier " ++ show attrValue ++ " must appear in the document." ) -- ------------------------------------------------------------ -- | -- Validate that all ID values are unique within a document. -- Validity constraint: ID (3.3.1 \/p. 25 in Spec) -- -- * 1.parameter idNodeList : list of all XTag nodes with ID attributes -- -- - 2.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - returns : a list of errors checkForUniqueIds :: XmlTrees -> LA XmlTrees XmlTree checkForUniqueIds idAttrTypes -- idNodeList = fromSLA [] ( unlistA >>> isElem >>> (checkForUniqueId $<< getName &&& this) ) where checkForUniqueId :: String -> XmlTree -> SLA [String] XmlTree XmlTree checkForUniqueId name x = ifA ( getState >>> isA (attrValue `elem`) ) (err ( "Attribute value " ++ show attrValue ++ " of type ID for element " ++ show name ++ " must be unique within the document." )) (nextState (attrValue:) >>> none) where attrValue = getIdValue (isAttlistOfElement name $$ idAttrTypes) x -- | -- Validate that all IDREF\/IDREFS values match the value of some ID attribute. -- Validity constraint: IDREF (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter idRefEnv : lookup-table which maps element names to their validation function -- -- - 2.parameter doc : the document to validate -- -- - returns : a list of errors checkIdReferences :: IdEnvTable -> LA XmlTree XmlTree checkIdReferences idRefEnv = traverseTree idRefEnv -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/AttributeValueValidation.hs0000644000000000000000000002711312465166667023543 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.TypeDefs Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides functions for validating attributes. The main functions are: - Check if the attribute value meets the lexical constraints of its type - Normalization of an attribute value -} -- ------------------------------------------------------------ -- Special namings in source code: -- -- - nd - XDTD node -- -- - n - XTag node -- module Text.XML.HXT.DTDValidation.AttributeValueValidation ( checkAttributeValue , normalizeAttributeValue ) where import Text.XML.HXT.Parser.XmlParsec ( parseNMToken , parseName ) import Text.XML.HXT.DTDValidation.TypeDefs -- ------------------------------------------------------------ -- | -- Checks if the attribute value meets the lexical constraints of its type. -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter attrDecl : the declaration of the attribute from the DTD -- -- - returns : a function which takes an element (XTag or XDTD ATTLIST), -- checks if the attribute value meets the lexical constraints -- of its type and returns a list of errors checkAttributeValue :: XmlTrees -> XmlTree -> XmlArrow checkAttributeValue dtdPart attrDecl | isDTDAttlistNode attrDecl = choiceA [ isElem :-> ( checkAttrVal $< getAttrValue attrName ) , isDTDAttlist :-> ( checkAttrVal $< (getDTDAttrl >>^ dtd_default) ) , this :-> none ] | otherwise = none where al = getDTDAttributes attrDecl attrName = dtd_value al attrType = dtd_type al checkAttrVal attrValue = checkValue attrType dtdPart normalizedVal attrDecl where normalizedVal = normalizeAttributeValue (Just attrDecl) attrValue -- | -- Dispatches the attibute check by the attribute type. -- -- * 1.parameter typ : the attribute type -- -- - 2.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 3.parameter attrValue : the normalized attribute value to be checked -- -- - 4.parameter attrDecl : the declaration of the attribute from the DTD -- -- - returns : a functions which takes an element (XTag or XDTD ATTLIST), -- checks if the attribute value meets the lexical constraints -- of its type and returns a list of errors checkValue :: String -> XmlTrees -> String -> XmlTree -> XmlArrow checkValue typ dtdPart attrValue attrDecl | typ == k_cdata = none | typ == k_enumeration = checkValueEnumeration attrDecl attrValue | typ == k_entity = checkValueEntity dtdPart attrDecl attrValue | typ == k_entities = checkValueEntities dtdPart attrDecl attrValue | typ == k_id = checkValueId attrDecl attrValue | typ == k_idref = checkValueIdref attrDecl attrValue | typ == k_idrefs = checkValueIdrefs attrDecl attrValue | typ == k_nmtoken = checkValueNmtoken attrDecl attrValue | typ == k_nmtokens = checkValueNmtokens attrDecl attrValue | typ == k_notation = checkValueEnumeration attrDecl attrValue | otherwise = error ("Attribute type " ++ show typ ++ " unknown.") -- | -- Checks the value of Enumeration attribute types. (3.3.1 \/ p.27 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 2.parameter attrValue : the normalized attribute value to be checked checkValueEnumeration :: XmlTree -> String -> XmlArrow checkValueEnumeration attrDecl attrValue | isDTDAttlistNode attrDecl && attrValue `notElem` enumVals = err ( "Attribute " ++ show (dtd_value al) ++ " for element " ++ show (dtd_name al) ++ " must have a value from list "++ show enumVals {- ++ " but has value " ++ show attrValue-} ++ ".") | otherwise = none where al = getDTDAttributes attrDecl enumVals :: [String] enumVals = map (dtd_name . getDTDAttributes) $ (runLA getChildren attrDecl) -- | -- Checks the value of ENTITY attribute types. (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node, to get the -- unparsed entity declarations -- -- - 2.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 3.parameter attrValue : the normalized attribute value to be checked checkValueEntity :: XmlTrees -> XmlTree -> String -> XmlArrow checkValueEntity dtdPart attrDecl attrValue | isDTDAttlistNode attrDecl && attrValue `notElem` upEntities = err ( "Entity " ++ show attrValue ++ " of attribute " ++ show (dtd_value al) ++ " for element " ++ show (dtd_name al) ++ " is not unparsed. " ++ "The following unparsed entities exist: " ++ show upEntities ++ ".") | otherwise = none where al = getDTDAttributes attrDecl upEntities :: [String] upEntities = map (dtd_name . getDTDAttributes) (isUnparsedEntity $$ dtdPart) -- | -- Checks the value of ENTITIES attribute types. (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node, to get the -- unparsed entity declarations -- -- - 2.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 3.parameter attrValue : the normalized attribute value to be checked checkValueEntities ::XmlTrees -> XmlTree -> String -> XmlArrow checkValueEntities dtdPart attrDecl attrValue | isDTDAttlistNode attrDecl = if null valueList then err ("Attribute " ++ show (dtd_value al) ++ " of element " ++ show (dtd_name al) ++ " must be one or more names.") else catA . map (checkValueEntity dtdPart attrDecl) $ valueList | otherwise = none where al = getDTDAttributes attrDecl valueList = words attrValue -- | -- Checks the value of NMTOKEN attribute types. (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 2.parameter attrValue : the normalized attribute value to be checked checkValueNmtoken :: XmlTree -> String -> XmlArrow checkValueNmtoken attrDecl attrValue | isDTDAttlistNode attrDecl = constA attrValue >>> checkNmtoken | otherwise = none where al = getDTDAttributes attrDecl checkNmtoken = mkText >>> arrL (parseNMToken "") >>> isError >>> getErrorMsg >>> arr (\ s -> ( "Attribute value " ++ show attrValue ++ " of attribute " ++ show (dtd_value al) ++ " for element " ++ show (dtd_name al) ++ " must be a name token, "++ (lines s) !! 1 ++".") ) >>> mkError c_err -- | -- Checks the value of NMTOKENS attribute types. (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 2.parameter attrValue : the normalized attribute value to be checked checkValueNmtokens :: XmlTree -> String -> XmlArrow checkValueNmtokens attrDecl attrValue | isDTDAttlistNode attrDecl = if null valueList then err ( "Attribute "++ show (dtd_value al) ++" of element " ++ show (dtd_name al) ++ " must be one or more name tokens.") else catA . map (checkValueNmtoken attrDecl) $ valueList | otherwise = none where al = getDTDAttributes attrDecl valueList = words attrValue -- | -- Checks the value of ID attribute types. (3.3.1 \/ p.25 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 2.parameter attrValue : the normalized attribute value to be checked checkValueId :: XmlTree -> String -> XmlArrow checkValueId attrDecl attrValue = checkForName "Attribute value" attrDecl attrValue -- | -- Checks the value of IDREF attribute types. (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 2.parameter attrValue : the normalized attribute value to be checked checkValueIdref :: XmlTree -> String -> XmlArrow checkValueIdref attrDecl attrValue = checkForName "Attribute value" attrDecl attrValue -- | -- Checks the value of IDREFS attribute types. (3.3.1 \/ p.26 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 2.parameter attrValue : the normalized attribute value to be checked checkValueIdrefs :: XmlTree -> String -> XmlArrow checkValueIdrefs attrDecl attrValue = catA . map (checkValueIdref attrDecl) . words $ attrValue -- ----------------------------------------------------------------------------- -- General helper functions for checking attribute values -- -- | -- Checks if the value of an attribute is a name. -- -- * 1.parameter msg : error message, should be "Entity" or "Attribute value" -- -- - 2.parameter attrDecl : the declaration of the attribute from the DTD -- -- - 3.parameter attrValue : the normalized attribute value to be checked checkForName :: String -> XmlTree -> String -> XmlArrow checkForName msg attrDecl attrValue | isDTDAttlistNode attrDecl = constA attrValue >>> checkName | otherwise = none where al = getDTDAttributes attrDecl checkName = mkText >>> arrL (parseName "") >>> isError >>> getErrorMsg >>> arr (\s -> ( msg ++ " " ++ show attrValue ++" of attribute " ++ show (dtd_value al) ++ " for element "++ show (dtd_name al) ++" must be a name, " ++ (lines s) !! 1 ++ ".") ) >>> mkError c_err -- ----------------------------------------------------------------------------- -- | -- Normalizes an attribute value with respect to its type. (3.3.3 \/ p.29 in Spec) -- -- * 1.parameter attrDecl : the declaration of the attribute from the DTD. Expected -- is a list. If the list is empty, no declaration exists. -- -- - 2.parameter value : the attribute value to be normalized -- -- - returns : the normalized value -- normalizeAttributeValue :: Maybe XmlTree -> String -> String normalizeAttributeValue (Just attrDecl) value = normalizeAttribute attrType where al = getDTDAttributes attrDecl attrType = dtd_type al normalizeAttribute :: String -> String normalizeAttribute typ | typ == k_cdata = cdataNormalization value | otherwise = otherNormalization value -- Attribute not declared in DTD, normalization as CDATA normalizeAttributeValue Nothing value = cdataNormalization value -- ------------------------------------------------------------ -- Helper functions for normalization -- | -- Normalization of CDATA attribute values. -- is already done when parsing -- during entity substituion for attribute values cdataNormalization :: String -> String cdataNormalization = id -- | Normalization of attribute values other than CDATA. otherNormalization :: String -> String otherNormalization = reduceWSSequences . stringTrim . cdataNormalization -- | Reduce whitespace sequences to a single whitespace. reduceWSSequences :: String -> String reduceWSSequences str = unwords (words str) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/DocTransformation.hs0000644000000000000000000001503412465166667022223 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.DocTransformation Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides functions for transforming XML documents represented as XmlTree with respect to its DTD. Transforming an XML document with respect to its DTD means: - add all attributes with default values - normalize all attribute values - sort all attributes in lexical order Note: Transformation should be started after validation. Before the document is validated, a lookup-table is build on the basis of the DTD which maps element names to their transformation functions. After this initialization phase the whole document is traversed in preorder and every element is transformed by the XmlFilter from the lookup-table. -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.DocTransformation ( transform ) where import Text.XML.HXT.DTDValidation.TypeDefs import Text.XML.HXT.DTDValidation.AttributeValueValidation import Data.Maybe import Data.List import Data.Ord import qualified Data.Map as M -- ------------------------------------------------------------ -- | -- Lookup-table which maps element names to their transformation functions. The -- transformation functions are XmlArrows. type TransEnvTable = M.Map ElemName TransFct type ElemName = String type TransFct = XmlArrow -- ------------------------------------------------------------ -- | -- filter for transforming the document. -- -- * 1.parameter dtdPart : the DTD subset (Node @DOCTYPE@) of the XmlTree -- -- - 2.parameter doc : the document subset of the XmlTree -- -- - returns : a list of errors transform :: XmlTree -> XmlArrow transform dtdPart = traverseTree transTable where transTable = buildAllTransformationFunctions (runLA getChildren dtdPart) -- | -- Traverse the XmlTree in preorder. -- -- * 1.parameter transEnv : lookup-table which maps element names to their transformation functions -- -- - returns : the whole transformed document traverseTree :: TransEnvTable -> XmlArrow traverseTree transEnv = processTopDown ( (transFct $< getName) `when` isElem ) where transFct :: String -> XmlArrow transFct name = fromMaybe this . M.lookup name $ transEnv -- | -- Build all transformation functions. -- -- * 1.parameter dtdPart : the DTD subset, root node should be of type @DOCTYPE@ -- -- - returns : lookup-table which maps element names to their transformation functions buildAllTransformationFunctions :: XmlTrees -> TransEnvTable buildAllTransformationFunctions dtdNodes = M.fromList $ (t_root, this) : concatMap (buildTransformationFunctions dtdNodes) dtdNodes -- | -- Build transformation functions for an element. -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- * 1.parameter nd : element declaration for which the transformation functions are -- created -- -- - returns : entry for the lookup-table buildTransformationFunctions :: XmlTrees -> XmlTree -> [(ElemName, TransFct)] buildTransformationFunctions dtdPart dn | isDTDElementNode dn = [(name, transFct)] | otherwise = [] where al = getDTDAttributes dn name = dtd_name al transFct = setDefaultAttributeValues dtdPart dn >>> normalizeAttributeValues dtdPart dn >>> lexicographicAttributeOrder -- ------------------------------------------------------------ -- | -- Sort the attributes of an element in lexicographic order. -- -- * returns : a function which takes an element (XTag), sorts its -- attributes in lexicographic order and returns the changed element lexicographicAttributeOrder :: XmlArrow lexicographicAttributeOrder = setAttrl (getAttrl >>. sortAttrl) where sortAttrl :: XmlTrees -> XmlTrees sortAttrl = sortBy (comparing nameOfAttr) -- | -- Normalize attribute values. -- -- * returns : a function which takes an element (XTag), normalizes its -- attribute values and returns the changed element normalizeAttributeValues :: XmlTrees -> XmlTree -> XmlArrow normalizeAttributeValues dtdPart dn | isDTDElementNode dn = processAttrl (normalizeAttr $< getName) | otherwise = this where al = getDTDAttributes dn elemName = dtd_name al declaredAtts = isAttlistOfElement elemName $$ dtdPart normalizeAttr :: String -> XmlArrow normalizeAttr nameOfAtt = normalizeAttrValue ( if null attDescr then Nothing else Just (head attDescr) ) where attDescr = filter ((== nameOfAtt) . valueOfDTD a_value) declaredAtts normalizeAttrValue :: Maybe XmlTree -> XmlArrow normalizeAttrValue descr = replaceChildren ((xshow getChildren >>^ normalizeAttributeValue descr) >>> mkText) -- | -- Set default attribute values if they are not set. -- -- * returns : a function which takes an element (XTag), adds missing attribute -- defaults and returns the changed element setDefaultAttributeValues :: XmlTrees -> XmlTree -> XmlArrow setDefaultAttributeValues dtdPart dn | isDTDElementNode dn = seqA (map setDefault defaultAtts) | otherwise = this where elemName = dtd_name . getDTDAttributes $ dn defaultAtts = ( isAttlistOfElement elemName >>> ( isFixedAttrKind -- select attributes with default values `orElse` isDefaultAttrKind ) ) $$ dtdPart setDefault :: XmlTree -> XmlArrow setDefault attrDescr -- add the default attributes = ( addAttr attName defaultValue -- to tag nodes with missing attributes `whenNot` hasAttr attName ) `when` isElem where al = getDTDAttributes attrDescr attName = dtd_value al defaultValue = dtd_default al -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/DTDValidation.hs0000644000000000000000000004702612465166667021223 0ustar0000000000000000{-# LANGUAGE FlexibleContexts #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.TypeDefs Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides functions for validating the DTD of XML documents represented as XmlTree. Unlike other popular XML validation tools the validation process returns a list of errors instead of aborting after the first error was found. Unlike validation of the document, the DTD branch is traversed four times: - Validation of Notations - Validation of Unparsed Entities - Validation of Element declarations - Validation of Attribute declarations -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.DTDValidation ( removeDoublicateDefs , validateDTD ) where import Text.XML.HXT.DTDValidation.AttributeValueValidation import Text.XML.HXT.DTDValidation.TypeDefs -- | -- Validate a DTD. -- -- - returns : a functions which takes the DTD subset of the XmlTree, checks -- if the DTD is valid and returns a list of errors validateDTD :: XmlArrow validateDTD -- dtdPart = isDTDDoctype `guards` ( listA getChildren >>> ( validateParts $<< (getNotationNames &&& getElemNames) ) ) where validateParts notationNames elemNames = validateNotations <+> validateEntities notationNames <+> validateElements elemNames <+> validateAttributes elemNames notationNames getNotationNames :: LA [XmlTree] [String] getNotationNames = listA $ unlistA >>> isDTDNotation >>> getDTDAttrValue a_name getElemNames :: LA [XmlTree] [String] getElemNames = listA $ unlistA >>> isDTDElement >>> getDTDAttrValue a_name -- ------------------------------------------------------------ checkName :: String -> SLA [String] XmlTree XmlTree -> SLA [String] XmlTree XmlTree checkName name msg = ifA ( getState >>> isA (name `elem`) ) msg (nextState (name:) >>> none) -- ------------------------------------------------------------ -- | -- Validation of Notations, checks if all notation names are unique. -- Validity constraint: Unique Notation Name (4.7 \/ p.44 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - returns : a list of errors validateNotations :: LA XmlTrees XmlTree validateNotations = fromSLA [] ( unlistA >>> isDTDNotation >>> (checkForUniqueNotation $< getDTDAttrl) ) where checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueNotation al = checkName name $ err ( "Notation "++ show name ++ " was already specified." ) where name = dtd_name al -- | -- Validation of Entities. -- -- 1. Issues a warning if entities are declared multiple times. -- -- Optional warning: (4.2 \/ p.35 in Spec) -- -- -- 2. Validates that a notation is declared for an unparsed entity. -- -- Validity constraint: Notation Declared (4.2.2 \/ p.36 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter notationNames : list of all notation names declared in the DTD -- -- - returns : a list of errors validateEntities :: [String] -> LA XmlTrees XmlTree validateEntities notationNames = ( fromSLA [] ( unlistA >>> isDTDEntity >>> (checkForUniqueEntity $< getDTDAttrl) ) ) <+> ( unlistA >>> isUnparsedEntity >>> (checkNotationDecl $< getDTDAttrl) ) where -- Check if entities are declared multiple times checkForUniqueEntity :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueEntity al = checkName name $ warn ( "Entity "++ show name ++ " was already specified. " ++ "First declaration will be used." ) where name = dtd_name al -- Find unparsed entities for which no notation is specified checkNotationDecl :: Attributes -> XmlArrow checkNotationDecl al | notationName `elem` notationNames = none | otherwise = err ( "The notation " ++ show notationName ++ " must be declared " ++ "when referenced in the unparsed entity declaration for " ++ show upEntityName ++ "." ) where notationName = lookup1 k_ndata al upEntityName = dtd_name al -- | -- Validation of Element declarations. -- -- 1. Validates that an element is not declared multiple times. -- -- Validity constraint: Unique Element Type Declaration (3.2 \/ p.21 in Spec) -- -- -- 2. Validates that an element name only appears once in a mixed-content declaration. -- -- Validity constraint: No Duplicate Types (3.2 \/ p.21 in Spec) -- -- -- 3. Issues a warning if an element mentioned in a content model is not declared in the -- DTD. -- -- Optional warning: (3.2 \/ p.21 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter elemNames : list of all element names declared in the DTD -- -- - returns : a list of errors validateElements :: [String] -> LA XmlTrees XmlTree validateElements elemNames -- dtdPart = ( fromSLA [] ( unlistA >>> isDTDElement >>> (checkForUniqueElement $< getDTDAttrl) ) ) <+> ( unlistA >>> isMixedContentElement >>> (checkMixedContent $< getDTDAttrl) ) <+> ( unlistA >>> isDTDElement >>> (checkContentModel elemNames $< getDTDAttrl) ) where -- Validates that an element is not declared multiple times checkForUniqueElement :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueElement al = checkName name $ err ( "Element type " ++ show name ++ " must not be declared more than once." ) where name = dtd_name al -- Validates that an element name only appears once in a mixed-content declaration checkMixedContent :: Attributes -> XmlArrow checkMixedContent al = fromSLA [] ( getChildren >>> getChildren >>> isDTDName >>> (check $< getDTDAttrl) ) where elemName = dtd_name al check al' = checkName name $ err ( "The element type " ++ show name ++ " was already specified in the mixed-content model of the element declaration " ++ show elemName ++ "." ) where name = dtd_name al' -- Issues a warning if an element mentioned in a content model is not -- declared in the DTD. checkContentModel :: [String] -> Attributes -> XmlArrow checkContentModel names al | cm `elem` [v_children, v_mixed] = getChildren >>> checkContent | otherwise = none where elemName = dtd_name al cm = dtd_type al checkContent :: XmlArrow checkContent = choiceA [ isDTDName :-> ( checkName' $< getDTDAttrl ) , isDTDContent :-> ( getChildren >>> checkContent ) , this :-> none ] where checkName' al' | childElemName `elem` names = none | otherwise = warn ( "The element type "++ show childElemName ++ ", used in content model of element "++ show elemName ++ ", is not declared." ) where childElemName = dtd_name al' -- | -- Validation of Attribute declarations. -- -- (1) Issues a warning if an attribute is declared for an element type not itself -- decared. -- -- Optinal warning: (3.3 \/ p. 24 in Spec) -- -- -- 2. Issues a warning if more than one definition is provided for the same -- attribute of a given element type. Fist declaration is binding, later -- definitions are ignored. -- -- Optional warning: (3.3 \/ p.24 in Spec) -- -- -- 3. Issues a warning if the same Nmtoken occures more than once in enumerated -- attribute types of a single element type. -- -- Optional warning: (3.3.1 \/ p.27 in Spec) -- -- -- 4. Validates that an element type has not more than one ID attribute defined. -- -- Validity constraint: One ID per Element Type (3.3.1 \/ p.26 in Spec) -- -- -- 5. Validates that an element type has not more than one NOTATION attribute defined. -- -- Validity constraint: One Notation per Element Type (3.3.1 \/ p.27 in Spec) -- -- -- 6. Validates that an ID attributes has the type #IMPLIED or #REQUIRED. -- -- Validity constraint: ID Attribute Default (3.3.1 \/ p.26 in Spec) -- -- -- 7. Validates that all referenced notations are declared. -- -- Validity constraint: Notation Attributes (3.3.1 \/ p.27 in Spec) -- -- -- 8. Validates that notations are not declared for EMPTY elements. -- -- Validity constraint: No Notation on Empty Element (3.3.1 \/p.27 in Spec) -- -- -- 9. Validates that the default value matches the lexical constraints of it's type. -- -- Validity constraint: Attribute default legal (3.3.2 \/ p.28 in Spec) -- -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter elemNames : list of all element names declared in the DTD -- -- - 3.parameter notationNames : list of all notation names declared in the DTD -- -- - returns : a list of errors validateAttributes :: [String] -> [String] -> LA XmlTrees XmlTree validateAttributes elemNames notationNames = -- 1. Find attributes for which no elements are declared ( runCheck this (checkDeclaredElements elemNames) ) <+> -- 2. Find attributes which are declared more than once ( runNameCheck this checkForUniqueAttributeDeclaration ) <+> -- 3. Find enumerated attribute types which nmtokens are declared more than once ( runCheck (isEnumAttrType `orElse` isNotationAttrType) checkEnumeratedTypes ) <+> -- 4. Validate that there exists only one ID attribute for an element ( runNameCheck isIdAttrType checkForUniqueId ) <+> -- 5. Validate that there exists only one NOTATION attribute for an element ( runNameCheck isNotationAttrType checkForUniqueNotation ) <+> -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED ( runCheck isIdAttrType checkIdKindConstraint ) <+> -- 7. Validate that all referenced notations are declared ( runCheck isNotationAttrType (checkNotationDeclaration notationNames) ) <+> -- 8. Validate that notations are not declared for EMPTY elements ( checkNoNotationForEmptyElements $< listA ( unlistA >>> isEmptyElement >>> getDTDAttrValue a_name ) ) <+> -- 9. Validate that the default value matches the lexical constraints of it's type ( checkDefaultValueTypes $< this ) where -- ------------------------------------------------------------ -- control structures runCheck select check = unlistA >>> isDTDAttlist >>> select >>> (check $< getDTDAttrl) runNameCheck select check = fromSLA [] $ runCheck select check -------------------------------------------------------------------------- -- 1. Find attributes for which no elements are declared checkDeclaredElements :: [String] -> Attributes -> XmlArrow checkDeclaredElements elemNames' al | en `elem` elemNames' = none | otherwise = warn ( "The element type \""++ en ++ "\" used in dclaration "++ "of attribute \""++ an ++"\" is not declared." ) where en = dtd_name al an = dtd_value al -------------------------------------------------------------------------- -- 2. Find attributes which are declared more than once checkForUniqueAttributeDeclaration :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueAttributeDeclaration al = checkName name $ warn ( "Attribute \""++ aname ++"\" for element type \""++ ename ++"\" is already declared. First "++ "declaration will be used." ) where ename = dtd_name al aname = dtd_value al name = ename ++ "|" ++ aname -------------------------------------------------------------------------- -- 3. Find enumerated attribute types which nmtokens are declared more than once checkEnumeratedTypes :: Attributes -> XmlArrow checkEnumeratedTypes al = fromSLA [] ( getChildren >>> isDTDName >>> (checkForUniqueType $< getDTDAttrl) ) where checkForUniqueType :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueType al' = checkName nmtoken $ warn ( "Nmtoken \""++ nmtoken ++"\" should not "++ "occur more than once in attribute \""++ dtd_value al ++ "\" for element \""++ dtd_name al ++ "\"." ) where nmtoken = dtd_name al' -------------------------------------------------------------------------- -- 4. Validate that there exists only one ID attribute for an element checkForUniqueId :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueId al = checkName ename $ err ( "Element \""++ ename ++ "\" already has attribute of type "++ "ID, another attribute \""++ dtd_value al ++ "\" of type ID is "++ "not permitted." ) where ename = dtd_name al -------------------------------------------------------------------------- -- 5. Validate that there exists only one NOTATION attribute for an element checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree checkForUniqueNotation al = checkName ename $ err ( "Element \""++ ename ++ "\" already has attribute of type "++ "NOTATION, another attribute \""++ dtd_value al ++ "\" of type NOTATION "++ "is not permitted." ) where ename = dtd_name al -------------------------------------------------------------------------- -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED checkIdKindConstraint :: Attributes -> XmlArrow checkIdKindConstraint al | attKind `elem` [k_implied, k_required] = none | otherwise = err ( "ID attribute \""++ dtd_value al ++"\" must have a declared default "++ "of \"#IMPLIED\" or \"REQUIRED\"") where attKind = dtd_kind al -------------------------------------------------------------------------- -- 7. Validate that all referenced notations are declared checkNotationDeclaration :: [String] -> Attributes -> XmlArrow checkNotationDeclaration notations al = getChildren >>> isDTDName >>> (checkNotations $< getDTDAttrl) where checkNotations :: Attributes -> XmlArrow checkNotations al' | notation `elem` notations = none | otherwise = err ( "The notation \""++ notation ++"\" must be declared when "++ "referenced in the notation type list for attribute \""++ dtd_value al ++ "\" of element \""++ dtd_name al ++"\"." ) where notation = dtd_name al' -------------------------------------------------------------------------- -- 8. Validate that notations are not declared for EMPTY elements checkNoNotationForEmptyElements :: [String] -> LA XmlTrees XmlTree checkNoNotationForEmptyElements emptyElems = unlistA >>> isDTDAttlist >>> isNotationAttrType >>> (checkNoNotationForEmptyElement $< getDTDAttrl) where checkNoNotationForEmptyElement :: Attributes -> XmlArrow checkNoNotationForEmptyElement al | ename `elem` emptyElems = err ( "Attribute \""++ dtd_value al ++"\" of type NOTATION must not be "++ "declared on the element \""++ ename ++"\" declared EMPTY." ) | otherwise = none where ename = dtd_name al -------------------------------------------------------------------------- -- 9. Validate that default values meet the lexical constraints of the attribute types checkDefaultValueTypes :: XmlTrees -> LA XmlTrees XmlTree checkDefaultValueTypes dtdPart' = unlistA >>> isDTDAttlist >>> isDefaultAttrKind >>> (checkAttributeValue dtdPart' $< this) -- ------------------------------------------------------------ -- | -- Removes doublicate declarations from the DTD, which first declaration is -- binding. This is the case for ATTLIST and ENTITY declarations. -- -- - returns : A function that replaces the children of DOCTYPE nodes by a list -- where all multiple declarations are removed. removeDoublicateDefs :: XmlArrow removeDoublicateDefs = replaceChildren ( fromSLA [] ( getChildren >>> choiceA [ isDTDAttlist :-> (removeDoubleAttlist $< getDTDAttrl) , isDTDEntity :-> (removeDoubleEntity $< getDTDAttrl) , this :-> this ] ) ) `when` isDTDDoctype where checkName' n' = ifA ( getState >>> isA (n' `elem`) ) none (this >>> perform (nextState (n':))) removeDoubleAttlist :: Attributes -> SLA [String] XmlTree XmlTree removeDoubleAttlist al = checkName' elemAttr where elemAttr = elemName ++ "|" ++ attrName attrName = dtd_value al elemName = dtd_name al removeDoubleEntity :: Attributes -> SLA [String] XmlTree XmlTree removeDoubleEntity al = checkName' (dtd_name al) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/RE.hs0000644000000000000000000002773012465166667017103 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.RE Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable A module for regular expression matching based on derivatives of regular expressions. The code was taken from Joe English (). Tested and extended by Martin Schmidt. Further references for the algorithm: Janusz A. Brzozowski. Derivatives of Regular Expressions. Journal of the ACM, Volume 11, Issue 4, 1964. Mark Hopkins. Regular Expression Package. Posted to comp.compilers, 1994. Available per FTP at . -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.RE ( RE(..) , re_unit , re_zero , re_sym , re_rep , re_plus , re_opt , re_seq , re_alt , re_dot , checkRE , matches , nullable , printRE ) where import Data.List (foldl') -- | -- Data type for regular expressions. data RE a = RE_ZERO String --' L(0) = {} (empty set) | RE_UNIT --' L(1) = { [] } (empty sequence) | RE_SYM a --' L(x) = { [x] } | RE_DOT --' accept any single symbol | RE_REP (RE a) --' L(e*) = { [] } `union` L(e+) | RE_PLUS (RE a) --' L(e+) = { x ++ y | x <- L(e), y <- L(e*) } | RE_OPT (RE a) --' L(e?) = L(e) `union` { [] } | RE_SEQ (RE a) (RE a) --' L(e,f) = { x ++ y | x <- L(e), y <- L(f) } | RE_ALT (RE a) (RE a) --' L(e|f) = L(e) `union` L(f) deriving (Show, Eq, Ord) -- ------------------------------------------------------------ -- Constructor functions to simplify regular expressions when constructing them. -- | -- Constructs a regular expression for an empty set. -- -- * 1.parameter errMsg : error message -- -- - returns : regular expression for an empty set re_zero :: String -> RE a re_zero m = RE_ZERO m -- | -- Constructs a regular expression for an empty sequence. -- -- - returns : regular expression for an empty sequence re_unit :: RE a re_unit = RE_UNIT -- | -- Constructs a regular expression for accepting a symbol -- -- * 1.parameter sym : the symbol to be accepted -- -- - returns : regular expression for accepting a symbol re_sym :: a -> RE a re_sym x = RE_SYM x -- | -- Constructs a regular expression for accepting any singel symbol -- -- - returns : regular expression for accepting any singel symbol re_dot :: RE a re_dot = RE_DOT -- | -- Constructs an optional repetition (*) of a regular expression -- -- * 1.parameter re_a : regular expression to be repeted -- -- - returns : new regular expression re_rep :: RE a -> RE a re_rep RE_UNIT = RE_UNIT re_rep (RE_ZERO _) = RE_UNIT re_rep e@(RE_REP _) = RE_REP (rem_rep e) -- remove nested reps re_rep e@(RE_ALT _ _) = RE_REP (rem_rep e) -- remove nested reps in alternatives re_rep e = RE_REP e -- | -- remove redundant nested *'s in RE -- theoretically this is unneccessary, -- but without this simplification the runtime can increase exponentally -- when computing deltas, e.g. for a** or (a|b*)* which is the same as (a|b)* rem_rep :: RE a -> RE a rem_rep (RE_ALT RE_UNIT e2) = e2 rem_rep (RE_ALT e1 e2) = RE_ALT (rem_rep e1) (rem_rep e2) rem_rep (RE_REP e1) = rem_rep e1 rem_rep e1 = e1 -- | -- Constructs a repetition (+) of a regular expression -- -- * 1.parameter re_a : regular expression to be repeted -- -- - returns : new regular expression re_plus :: RE a -> RE a re_plus RE_UNIT = RE_UNIT re_plus (RE_ZERO m) = RE_ZERO m re_plus e | nullable e = re_rep e -- nullable e => e+ == e* | otherwise = re_seq e (re_rep e) -- | -- Constructs an option (?) of a regular expression -- -- * 1.parameter re_a : regular expression to be optional -- -- - returns : new regular expression re_opt :: (Ord a) => RE a -> RE a re_opt RE_UNIT = RE_UNIT re_opt (RE_ZERO _) = RE_UNIT re_opt e = re_alt RE_UNIT e -- | -- Constructs a sequence (,) of two regular expressions -- -- * 1.parameter re_a : first regular expression in sequence -- -- - 2.parameter re_b : second regular expression in sequence -- -- - returns : new regular expression re_seq :: RE a -> RE a -> RE a re_seq e1@(RE_ZERO _) _ = e1 -- simplification re_seq RE_UNIT e2 = e2 -- simplification re_seq _ e2@(RE_ZERO _) = e2 -- simplification re_seq e1 RE_UNIT = e1 -- simplification re_seq (RE_SEQ e11 e12) e2 = re_seq e11 (re_seq e12 e2) -- right assoc. re_seq e1 e2 = RE_SEQ e1 e2 -- | -- Constructs an alternative (|) of two regular expressions -- -- * 1.parameter re_a : first regular expression of alternative -- -- - 2.parameter re_b : second regular expression of alternative -- -- - returns : new regular expression re_alt :: (Ord a) => RE a -> RE a -> RE a re_alt (RE_ZERO _) e2 = e2 re_alt e1 (RE_ZERO _) = e1 re_alt (RE_ALT e11 e12) e2 = re_alt e11 (re_alt e12 e2) -- is right assoc re_alt e1 e2@(RE_ALT e21 e22) | e1 == e21 = e2 -- duplicates removed, the effective rule | e1 > e21 = re_alt e21 (re_alt e1 e22) -- sort alternatives | otherwise = RE_ALT e1 e2 re_alt e1 e2 | e1 == e2 = e2 -- simplification, the effective rule | e1 > e2 = re_alt e2 e1 -- sort alts for unique repr. | otherwise = RE_ALT e1 e2 -- ------------------------------------------------------------ -- | -- Checks if a regular expression matches the empty sequence. -- -- nullable e == [] `in` L(e) -- -- This check indicates if a regular expression fits to a sentence or not. -- -- * 1.parameter re : regular expression to be checked -- -- - returns : true if regular expression matches the empty sequence, -- otherwise false nullable :: RE a -> Bool nullable (RE_ZERO _) = False nullable RE_UNIT = True nullable (RE_SYM _) = False nullable (RE_REP _) = True nullable (RE_PLUS e) = nullable e nullable (RE_OPT _) = True nullable (RE_SEQ e f) = nullable e && nullable f nullable (RE_ALT e f) = nullable e || nullable f nullable RE_DOT = False -- | -- Derives a regular expression with respect to one symbol. -- -- L(delta e x) = x \ L(e) -- -- * 1.parameter re : regular expression to be derived -- -- - 2.parameter sym : the symbol on which the regular expression is applied -- -- - returns : the derived regular expression delta :: (Ord a, Show a) => RE a -> a -> RE a delta re x = case re of RE_ZERO _ -> re -- re_zero m RE_UNIT -> re_zero ("Symbol " ++ show x ++ " unexpected.") RE_SYM sym | x == sym -> re_unit | otherwise -> re_zero ("Symbol " ++ show sym ++ " expected, but symbol " ++ show x ++ " found.") RE_REP e -> re_seq (delta e x) re -- (re_rep e) RE_PLUS e -> re_seq (delta e x) (re_rep e) RE_OPT e -> delta e x RE_SEQ e f | nullable e -> re_alt (re_seq (delta e x) f) (delta f x) | otherwise -> re_seq (delta e x) f RE_ALT e f -> re_alt (delta e x) (delta f x) RE_DOT -> re_unit -- | -- Derives a regular expression with respect to a sentence. -- -- * 1.parameter re : regular expression -- -- - 2.parameter s : sentence to which the regular expression is applied -- -- - returns : the derived regular expression matches :: (Ord a, Show a) => RE a -> [a] -> RE a matches e = foldl' delta e -- | -- Checks if an input matched a regular expression. The function should be -- called after matches. -- -- Was the sentence used in @matches@ in the language of the regular expression? -- -> matches e s == s `in` L(e)? -- -- * 1.parameter re : the derived regular expression -- -- - returns : empty String if input matched the regular expression, otherwise -- an error message is returned checkRE :: (Eq a, Show a) => RE a -> String checkRE (RE_UNIT) = "" checkRE (RE_ZERO m) = m checkRE re | nullable re = "" | otherwise = "Input must match " ++ printRE re -- ------------------------------------------------------------ -- | -- Constructs a string representation of a regular expression. -- -- * 1.parameter re : a regular expression -- -- - returns : the string representation of the regular expression printRE :: (Eq a, Show a) => RE a -> String printRE re' = "( " ++ printRE1 re' ++ " )" where -- printRE1 :: (Eq a, Show a) => RE a -> String printRE1 re = case re of RE_ZERO m -> "ERROR: " ++ m RE_UNIT -> "" RE_SYM sym -> show sym RE_DOT -> "." RE_REP e | isSingle e -> printRE1 e ++ "*" | otherwise -> "(" ++ printRE1 e ++ ")*" RE_PLUS e | isSingle e -> printRE1 e ++ "+" | otherwise -> "(" ++ printRE1 e ++ ")+" RE_OPT e | isSingle e -> printRE1 e ++ "?" | otherwise -> "(" ++ printRE1 e ++ ")?" RE_SEQ e1 (RE_REP e2) | e1 == e2 -> printRE1 (RE_PLUS e1) RE_SEQ e1 (RE_SEQ (RE_REP e2) e3) | e1 == e2 -> printRE1 (RE_SEQ (RE_PLUS e1) e3) RE_SEQ e f | isAlt e && not (isAlt f) -> "(" ++ printRE1 e ++ ") , " ++ printRE1 f | not (isAlt e) && isAlt f -> printRE1 e ++ " , (" ++ printRE1 f ++ ")" | isAlt e && isAlt f -> "(" ++ printRE1 e ++ ") , (" ++ printRE1 f ++ ")" | otherwise -> printRE1 e ++ " , " ++ printRE1 f RE_ALT RE_UNIT f -> printRE1 (RE_OPT f) RE_ALT e f | isSeq e && not (isSeq f) -> "(" ++ printRE1 e ++ ") | " ++ printRE1 f | not (isSeq e) && isSeq f -> printRE1 e ++ " | (" ++ printRE1 f ++ ")" | isSeq e && isSeq f -> "(" ++ printRE1 e ++ ") | (" ++ printRE1 f ++ ")" | otherwise -> printRE1 e ++ " | " ++ printRE1 f isSingle :: RE a -> Bool isSingle (RE_ZERO _) = True isSingle RE_UNIT = True isSingle (RE_SYM _) = True isSingle _ = False isSeq :: (Eq a) => RE a -> Bool isSeq (RE_SEQ e1 (RE_REP e2)) | e1 == e2 = False -- is transformed back into RE_PLUS isSeq (RE_SEQ _ _) = True isSeq _ = False isAlt :: RE a -> Bool isAlt (RE_ALT RE_UNIT _)= False -- is transformed back into a RE_OPT isAlt (RE_ALT _ _) = True isAlt _ = False hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/DocValidation.hs0000644000000000000000000004271612465166667021316 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.TypeDefs Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides functions for validating XML Documents represented as XmlTree. Unlike other popular XML validation tools the validation process returns a list of errors instead of aborting after the first error was found. Before the document is validated, a lookup-table is build on the basis of the DTD which maps element names to their validation functions. After this initialization phase the whole document is traversed in preorder and every element is validated by the XmlFilter from the lookup-table. -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.DocValidation ( validateDoc ) where import Text.XML.HXT.DTDValidation.TypeDefs import Text.XML.HXT.DTDValidation.AttributeValueValidation import Text.XML.HXT.DTDValidation.XmlRE -- ------------------------------------------------------------ -- | -- Lookup-table which maps element names to their validation functions. The -- validation functions are XmlArrows. type ValiEnvTable = [ValiEnv] type ValiEnv = (ElemName, ValFct) type ElemName = String type ValFct = XmlArrow -- ------------------------------------------------------------ -- | -- Validate a document. -- -- * 1.parameter dtdPart : the DTD subset (Node @DOCTYPE@) of the XmlTree -- -- - 2.parameter doc : the document subset of the XmlTree -- -- - returns : a list of errors validateDoc :: XmlTree -> XmlArrow validateDoc dtdPart = traverseTree valTable where valTable = buildAllValidationFunctions dtdPart -- | -- Traverse the XmlTree in preorder. -- -- * 1.parameter valiEnv : lookup-table which maps element names to their validation functions -- -- - returns : list of errors traverseTree :: ValiEnvTable -> XmlArrow traverseTree valiEnv = choiceA [ isElem :-> (valFct $< getQName) , this :-> none ] <+> ( getChildren >>> traverseTree valiEnv ) where valFct :: QName -> XmlArrow valFct name = case (lookup (qualifiedName name) valiEnv) of Nothing -> err ("Element " ++ show (qualifiedName name) ++ " not declared in DTD.") Just f -> f -- ------------------------------------------------------------ -- | -- Build all validation functions. -- -- * 1.parameter dtdPart : DTD subset, root node should be of type @DOCTYPE@ -- -- - returns : lookup-table which maps element names to their validation functions buildAllValidationFunctions :: XmlTree -> ValiEnvTable buildAllValidationFunctions dtdPart = concat $ buildValidateRoot dtdPart : -- construct a list of validation filters for all element declarations map (buildValidateFunctions dtdNodes) dtdNodes where dtdNodes = runLA getChildren dtdPart -- | -- Build a validation function for the document root. By root node @\/@ -- is meant, which is the topmost dummy created by the parser. -- -- * 1.parameter dtdPart : DTD subset, root node should be of type @DOCTYPE@ -- -- - returns : entry for the lookup-table buildValidateRoot :: XmlTree -> [ValiEnv] buildValidateRoot dn | isDTDDoctypeNode dn = [(t_root, valFct)] | otherwise = [] where name = dtd_name . getDTDAttributes $ dn valFct :: XmlArrow valFct = isElem `guards` ( checkRegex (re_sym name) >>> msgToErr (("Root Element must be " ++ show name ++ ". ") ++) ) checkRegex :: RE String -> LA XmlTree String checkRegex re = listA getChildren >>> arr (\ cs -> checkRE (matches re cs)) -- | -- Build validation functions for an element. -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter nd : element declaration for which the validation functions are -- created -- -- - returns : entry for the lookup-table buildValidateFunctions :: XmlTrees -> XmlTree -> [ValiEnv] buildValidateFunctions dtdPart dn | isDTDElementNode dn = [(elemName, valFct)] | otherwise = [] where elemName = dtd_name . getDTDAttributes $ dn valFct :: XmlArrow valFct = buildContentValidation dn <+> buildAttributeValidation dtdPart dn -- ------------------------------------------------------------ -- | -- Build validation functions for the content model of an element. -- Validity constraint: Element Valid (3 \/ p.18 in Spec) -- -- * 1.parameter nd : element declaration for which the content validation functions -- are built -- -- - returns : a function which takes an element (XTag), checks if its -- children match its content model and returns a list of errors buildContentValidation :: XmlTree -> XmlArrow buildContentValidation nd = contentValidation attrType nd where attrType = dtd_type . getDTDAttributes $ nd -- Delegates construction of the validation function on the basis of the -- content model type contentValidation :: String -> XmlTree -> XmlArrow contentValidation typ dn | typ == k_pcdata = contentValidationPcdata | typ == k_empty = contentValidationEmpty | typ == k_any = contentValidationAny | typ == v_children = contentValidationChildren cs | typ == v_mixed = contentValidationMixed cs | otherwise = none where cs = runLA getChildren dn -- Checks #PCDATA content models contentValidationPcdata :: XmlArrow contentValidationPcdata = isElem `guards` (contentVal $< getQName) where contentVal name = checkRegex (re_rep (re_sym k_pcdata)) >>> msgToErr ( ( "The content of element " ++ show (qualifiedName name) ++ " must match (#PCDATA). " ) ++ ) -- Checks EMPTY content models contentValidationEmpty :: XmlArrow contentValidationEmpty = isElem `guards` (contentVal $< getQName) where contentVal name = checkRegex re_unit >>> msgToErr ( ( "The content of element " ++ show (qualifiedName name) ++ " must match EMPTY. " ) ++ ) -- Checks ANY content models contentValidationAny :: XmlArrow contentValidationAny = isElem `guards` (contentVal $< getName) where contentVal name = checkRegex (re_rep (re_dot)) >>> msgToErr ( ( "The content of element " ++ show name ++ " must match ANY. " ) ++ ) -- Checks "children" content models contentValidationChildren :: XmlTrees -> XmlArrow contentValidationChildren cm = isElem `guards` (contentVal $< getName) where contentVal name = checkRegex re >>> msgToErr ( ( "The content of element " ++ show name ++ " must match " ++ printRE re ++ ". " ) ++ ) re = createRE (head cm) -- Checks "mixed content" content models contentValidationMixed :: XmlTrees -> XmlArrow contentValidationMixed cm = isElem `guards` (contentVal $< getName) where contentVal name = checkRegex re >>> msgToErr ( ( "The content of element " ++ show name ++ " must match " ++ printRE re ++ ". " ) ++ ) re = re_rep (re_alt (re_sym k_pcdata) (createRE (head cm))) -- | -- Build a regular expression from the content model. The regular expression -- is provided by the module XmlRE. -- -- * 1.parameter nd : node of the content model. Expected: @CONTENT@ or -- @NAME@ -- -- - returns : regular expression of the content model createRE :: XmlTree -> RE String createRE dn | isDTDContentNode dn = processModifier modifier | isDTDNameNode dn = re_sym name | otherwise = error ("createRE: illegeal parameter:\n" ++ show dn) where al = getDTDAttributes dn name = dtd_name al modifier = dtd_modifier al kind = dtd_kind al cs = runLA getChildren dn processModifier :: String -> RE String processModifier m | m == v_plus = re_plus (processKind kind) | m == v_star = re_rep (processKind kind) | m == v_option = re_opt (processKind kind) | m == v_null = processKind kind | otherwise = error ("Unknown modifier: " ++ show m) processKind :: String -> RE String processKind k | k == v_seq = makeSequence cs | k == v_choice = makeChoice cs | otherwise = error ("Unknown kind: " ++ show k) makeSequence :: XmlTrees -> RE String makeSequence [] = re_unit makeSequence (x:xs) = re_seq (createRE x) (makeSequence xs) makeChoice :: XmlTrees -> RE String makeChoice [] = re_zero "" makeChoice (x:xs) = re_alt (createRE x) (makeChoice xs) -- ------------------------------------------------------------ -- | -- Build validation functions for the attributes of an element. -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter nd : element declaration for which the attribute validation functions -- are created -- -- - returns : a function which takes an element (XTag), checks if its -- attributes are valid and returns a list of errors buildAttributeValidation :: XmlTrees -> XmlTree -> XmlArrow buildAttributeValidation dtdPart nd = noDoublicateAttributes <+> checkNotDeclardAttributes attrDecls nd <+> checkRequiredAttributes attrDecls nd <+> checkFixedAttributes attrDecls nd <+> checkValuesOfAttributes attrDecls dtdPart nd where attrDecls = isDTDAttlist $$ dtdPart -- | -- Validate that all attributes of an element are unique. -- Well-formdness constraint: Unique AttSpec (3.1 \/ p.19 in Spec) -- -- - returns : a function which takes an element (XTag), checks if its -- attributes are unique and returns a list of errors noDoublicateAttributes :: XmlArrow noDoublicateAttributes = isElem `guards` ( noDoubles' $< getName ) where noDoubles' elemName = listA (getAttrl >>> getName) >>> applyA (arr (catA . map toErr . doubles . reverse)) where toErr n1 = err ( "Attribute " ++ show n1 ++ " was already specified for element " ++ show elemName ++ "." ) -- | -- Validate that all \#REQUIRED attributes are provided. -- Validity constraint: Required Attributes (3.3.2 \/ p.28 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter nd : element declaration which attributes have to be checked -- -- - returns : a function which takes an element (XTag), checks if all -- required attributes are provided and returns a list of errors checkRequiredAttributes :: XmlTrees -> XmlTree -> XmlArrow checkRequiredAttributes attrDecls dn | isDTDElementNode dn = isElem `guards` ( checkRequired $< getName ) | otherwise = none where elemName = dtd_name . getDTDAttributes $ dn requiredAtts = (isAttlistOfElement elemName >>> isRequiredAttrKind) $$ attrDecls checkRequired :: String -> XmlArrow checkRequired name = catA . map checkReq $ requiredAtts where checkReq :: XmlTree -> XmlArrow checkReq attrDecl = neg (hasAttr attName) `guards` err ( "Attribute " ++ show attName ++ " must be declared for element type " ++ show name ++ "." ) where attName = dtd_value . getDTDAttributes $ attrDecl -- | -- Validate that \#FIXED attributes match the default value. -- Validity constraint: Fixed Attribute Default (3.3.2 \/ p.28 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter nd : element declaration which attributes have to be checked -- -- - returns : a function which takes an element (XTag), checks if all -- fixed attributes match the default value and returns a list of errors checkFixedAttributes :: XmlTrees -> XmlTree -> XmlArrow checkFixedAttributes attrDecls dn | isDTDElementNode dn = isElem `guards` ( checkFixed $< getName ) | otherwise = none where elemName = dtd_name . getDTDAttributes $ dn fixedAtts = (isAttlistOfElement elemName >>> isFixedAttrKind) $$ attrDecls checkFixed :: String -> XmlArrow checkFixed name = catA . map checkFix $ fixedAtts where checkFix :: XmlTree -> XmlArrow checkFix an | isDTDAttlistNode an = checkFixedVal $< getAttrValue attName | otherwise = none where al' = getDTDAttributes an attName = dtd_value al' defa = dtd_default al' fixedValue = normalizeAttributeValue (Just an) defa checkFixedVal :: String -> XmlArrow checkFixedVal val = ( ( hasAttr attName >>> isA (const (attValue /= fixedValue)) ) `guards` err ( "Attribute " ++ show attName ++ " of element " ++ show name ++ " with value " ++ show attValue ++ " must have a value of " ++ show fixedValue ++ "." ) ) where attValue = normalizeAttributeValue (Just an) val -- | -- Validate that an element has no attributes which are not declared. -- Validity constraint: Attribute Value Type (3.1 \/ p.19 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter nd : element declaration which attributes have to be checked -- -- - returns : a function which takes an element (XTag), checks if all -- attributes are declared and returns a list of errors checkNotDeclardAttributes :: XmlTrees -> XmlTree -> XmlArrow checkNotDeclardAttributes attrDecls elemDescr = checkNotDeclared where elemName = valueOfDTD a_name elemDescr decls = isAttlistOfElement elemName $$ attrDecls checkNotDeclared :: XmlArrow checkNotDeclared = isElem `guards` ( getAttrl >>> searchForDeclaredAtt elemName decls ) searchForDeclaredAtt :: String -> XmlTrees -> XmlArrow searchForDeclaredAtt name (dn : xs) | isDTDAttlistNode dn = ( getName >>> isA ( (dtd_value . getDTDAttributes $ dn) /= ) ) `guards` searchForDeclaredAtt name xs | otherwise = searchForDeclaredAtt name xs searchForDeclaredAtt name [] = mkErr $< getName where mkErr n = err ( "Attribute " ++ show n ++ " of element " ++ show name ++ " is not declared in DTD." ) -- | -- Validate that the attribute value meets the lexical constraints of its type. -- Validity constaint: Attribute Value Type (3.1 \/ p.19 in Spec) -- -- * 1.parameter dtdPart : the children of the @DOCTYPE@ node -- -- - 2.parameter nd : element declaration which attributes have to be checked -- -- - returns : a function which takes an element (XTag), checks if all -- attributes meet the lexical constraints and returns a list of errors checkValuesOfAttributes :: XmlTrees -> XmlTrees -> XmlTree -> XmlArrow checkValuesOfAttributes attrDecls dtdPart elemDescr = checkValues where elemName = dtd_name . getDTDAttributes $ elemDescr decls = isAttlistOfElement elemName $$ attrDecls checkValues :: XmlArrow checkValues = isElem `guards` ( checkValue $< getAttrl ) checkValue att = catA . map checkVal $ decls where checkVal :: XmlTree -> XmlArrow checkVal attrDecl | isDTDAttlistNode attrDecl && nameOfAttr att == dtd_value al' = checkAttributeValue dtdPart attrDecl | otherwise = none where al' = getDTDAttributes attrDecl -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/Validation.hs0000644000000000000000000001102612465166667020656 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.Validation Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable This module provides functions for validating XML documents represented as XmlTree. Unlike other popular XML validation tools the validation functions return a list of errors instead of aborting after the first error was found. Note: The validation process has been split into validation and transformation! If @validate@ did not report any errors, @transform@ should be called, to change the document the way a validating parser is expected to do. -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.Validation ( getDTDSubset , generalEntitiesDefined , validate , validateDTD , validateDoc , removeDoublicateDefs , transform ) where import Text.XML.HXT.DTDValidation.TypeDefs import qualified Text.XML.HXT.DTDValidation.DocTransformation as DocTransformation import qualified Text.XML.HXT.DTDValidation.DocValidation as DocValidation import qualified Text.XML.HXT.DTDValidation.DTDValidation as DTDValidation import qualified Text.XML.HXT.DTDValidation.IdValidation as IdValidation -- | -- Main validation filter. Check if the DTD and the document are valid. -- -- -- - returns : a function which expects a complete document as XmlTree input -- and returns a list of all errors found. validate :: XmlArrow validate = validateDTD <+> validateDoc -- | -- Check if the DTD is valid. -- -- -- - returns : a function which expects an XmlTree from the parser as input -- and returns a list of all errors found in the DTD. validateDTD :: XmlArrow validateDTD = choiceA [ getDTDSubset :-> DTDValidation.validateDTD , this :-> err "Can't validate DTD: There is no DOCTYPE declaration in the document." ] -- | -- Check if the document corresponds to the given DTD. -- -- -- - returns : a function which expects a complete document as XmlTree input -- and returns a list of all errors found in the content part. validateDoc :: XmlArrow validateDoc = validateDoc' $< getDTD where validateDoc' [] = err "Can't validate document: There is no DOCTYPE declaration in the document." validateDoc' (dtdPart:_) = DocValidation.validateDoc dtdPart <+> IdValidation.validateIds dtdPart getDTD :: XmlArrowS getDTD = listA ( getDTDSubset >>> removeDoublicateDefs ) -- | -- filter for transforming a document with respect to the given DTD. -- -- Validating parsers -- are expected to normalize attribute values and add default values. -- This function should be called after a successful validation. -- -- -- - returns : a function which expects a complete XML document tree -- and returns the transformed XmlTree transform :: XmlArrow transform = choiceA [ isRoot :-> (transformDoc $< getDTD) , this :-> fatal "Can't transform document: No document root given" ] where transformDoc [] = this transformDoc dtd = DocTransformation.transform (head dtd) -- | -- Removes doublicate declarations from the DTD which first declaration is -- binding. This is the case for ATTLIST and ENTITY declarations. -- -- -- - returns : A function that replaces the children of DOCTYPE nodes by a list -- where all multiple declarations are removed. removeDoublicateDefs :: XmlArrow removeDoublicateDefs = DTDValidation.removeDoublicateDefs -- -- selects the DTD part of a document -- but only, if there is more than the internal part for the 4 predefined XML entities getDTDSubset :: XmlArrow getDTDSubset = getChildren >>> ( filterA $ isDTDDoctype >>> getDTDAttrl >>> isA (hasEntry a_name) ) generalEntitiesDefined :: XmlArrow generalEntitiesDefined = getDTDSubset >>> deep isDTDEntity -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DTDValidation/XmlRE.hs0000644000000000000000000000714012465166667017555 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DTDValidation.XmlRE Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable A module for regular expression matching, adapted for XML DTDs. This module is based on the module RE. -} -- ------------------------------------------------------------ module Text.XML.HXT.DTDValidation.XmlRE ( RE , checkRE , matches , printRE , re_unit , re_zero , re_sym , re_rep , re_plus , re_opt , re_seq , re_alt , re_dot ) where -- import Debug.Trace (trace) import Data.List (foldl') import Text.XML.HXT.DTDValidation.RE hiding (matches) import Text.XML.HXT.Arrow.Edit (removeComment, removeWhiteSpace) import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.DTDValidation.TypeDefs -- | -- Derives a regular expression with respect to a list of elements. -- -- * 1.parameter re : regular expression -- -- - 2.parameter list : list of elements to which the regular expression is applied -- -- - returns : the derived regular expression matches :: RE String -> XmlTrees -> RE String matches re list = foldl' delta re (removeUnimportantStuff $$ list) where removeUnimportantStuff :: XmlArrow removeUnimportantStuff = processBottomUp (removeWhiteSpace >>> removeComment) -- trace of growth of REs -- delta' re el = delta (trace (("RE : " ++) . (++ "\n" ) . show $ re) re) el -- | -- Derives a regular expression with respect to one element. -- -- L(delta e x) = x \ L(e) -- -- * 1.parameter re : regular expression to be derived -- -- - 2.parameter el : the element on which the regular expression is applied -- -- - returns : the derived regular expression delta :: RE String -> XmlTree -> RE String delta re el | not (allowed el) = re | otherwise = case re of RE_ZERO m -> re_zero m RE_UNIT -> re_zero (elemName el ++" unexpected.") RE_SYM sym | sym == k_pcdata -> if ((XN.isText el) || (XN.isCdata el)) then re_unit else re_zero ("Character data expected, but "++ elemName el ++" found.") | expectedNode el sym -> re_unit | otherwise -> re_zero ("Element "++ show sym ++" expected, but "++ elemName el ++" found.") RE_REP e -> re_seq (delta e el) (re_rep e) RE_PLUS e -> re_seq (delta e el) (re_rep e) RE_OPT e -> delta e el RE_SEQ e f | nullable e -> re_alt (re_seq (delta e el) f) (delta f el) | otherwise -> re_seq (delta e el) f RE_ALT e f -> re_alt (delta e el) (delta f el) RE_DOT -> re_unit where expectedNode :: XmlTree -> String -> Bool expectedNode n sym | XN.isElem n = nameOfElem n == sym | otherwise = False elemName :: XmlTree -> String elemName n | XN.isElem n = "element "++ show (nameOfElem n) | otherwise = "character data" allowed :: XmlTree -> Bool allowed n = XN.isElem n || XN.isText n || XN.isCdata n -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/0000755000000000000000000000000012465166667014221 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/DOM/XmlKeywords.hs0000644000000000000000000001214712465166667017052 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.XmlKeywords Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Constants for XML keywords, for special attribute names and special attribute values -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.XmlKeywords where -- ------------------------------------------------------------ -- -- string constants for representing DTD keywords and attributes t_xml, -- tag names t_root :: String a_default, -- attribute names a_contentLength, a_column, a_encoding, a_kind, a_line, a_module, a_modifier, a_name, a_output_encoding, a_peref, a_source, a_status, a_standalone, a_type, a_url, a_value, a_version, a_xml, a_xmlns :: String v_0, -- attribute values v_1, v_2, v_yes, v_no, v_any, v_children, v_choice, v_empty, v_mixed, v_seq, v_null, v_option, v_pcdata, v_star, v_plus :: String k_any, -- DTD keywords k_cdata, k_empty, k_entity, k_entities, k_id, k_idref, k_idrefs, k_include, k_ignore, k_nmtoken, k_nmtokens, k_peref, k_public, k_system, k_enumeration, k_fixed, k_implied, k_ndata, k_notation, k_pcdata, k_required, k_default :: String -- ------------------------------------------------------------ t_xml = "xml" t_root = "/" -- name of root node tag a_column = "column" a_contentLength = "Content-Length" a_default = "default" a_encoding = "encoding" a_kind = "kind" a_line = "line" a_module = "module" a_modifier = "modifier" a_name = "name" a_output_encoding = "output-encoding" a_peref = k_peref a_source = "source" a_standalone = "standalone" a_status = "status" a_type = "type" a_url = "url" a_value = "value" a_version = "version" a_xml = "xml" a_xmlns = "xmlns" v_yes = "yes" v_no = "no" v_0 = "0" v_1 = "1" v_2 = "2" v_any = k_any v_children = "children" v_choice = "choice" v_empty = k_empty v_pcdata = k_pcdata v_mixed = "mixed" v_seq = "seq" v_null = "" v_option = "?" v_star = "*" v_plus = "+" k_any = "ANY" k_cdata = "CDATA" k_empty = "EMPTY" k_entity = "ENTITY" k_entities = "ENTITIES" k_id = "ID" k_idref = "IDREF" k_idrefs = "IDREFS" k_include = "INCLUDE" k_ignore = "IGNORE" k_nmtoken = "NMTOKEN" k_nmtokens = "NMTOKENS" k_peref = "PERef" k_public = "PUBLIC" k_system = "SYSTEM" k_enumeration = "#ENUMERATION" k_fixed = "#FIXED" k_implied = "#IMPLIED" k_ndata = "NDATA" k_notation = "NOTATION" k_pcdata = "#PCDATA" k_required = "#REQUIRED" k_default = "#DEFAULT" dtdPrefix :: String dtdPrefix = "doctype-" -- ------------------------------------------------------------ -- -- attribute names for transfer protocol attributes -- used in XmlInput for describing header information -- of http and other requests transferPrefix , transferProtocol , transferMimeType , transferEncoding , transferURI , transferDefaultURI , transferStatus , transferMessage , transferVersion :: String transferPrefix = "transfer-" transferProtocol = transferPrefix ++ "Protocol" transferVersion = transferPrefix ++ "Version" transferMimeType = transferPrefix ++ "MimeType" transferEncoding = transferPrefix ++ "Encoding" transferDefaultURI = transferPrefix ++ "DefaultURI" transferStatus = transferPrefix ++ "Status" transferMessage = transferPrefix ++ "Message" transferURI = transferPrefix ++ "URI" -- ------------------------------------------------------------ -- httpPrefix :: String httpPrefix = "http-" stringProtocol :: String stringProtocol = "string:" -- ------------------------------------------------------------ -- -- known namespaces -- | -- the predefined namespace uri for xml: \"http:\/\/www.w3.org\/XML\/1998\/namespace\" xmlNamespace :: String xmlNamespace = "http://www.w3.org/XML/1998/namespace" -- | -- the predefined namespace uri for xmlns: \"http:\/\/www.w3.org\/2000\/xmlns\/\" xmlnsNamespace :: String xmlnsNamespace = "http://www.w3.org/2000/xmlns/" -- | Relax NG namespace relaxNamespace :: String relaxNamespace = "http://relaxng.org/ns/structure/1.0" -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/FormatXmlTree.hs0000644000000000000000000000300312465166667017302 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.FormatXmlTree Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Format a xml tree in tree representation -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.FormatXmlTree ( formatXmlTree , formatXmlContents ) where import Data.Maybe import Text.XML.HXT.DOM.Interface import Text.XML.HXT.DOM.ShowXml import Text.XML.HXT.DOM.XmlNode -- ------------------------------------------------------------ formatXmlContents :: XmlTree -> XmlTrees formatXmlContents t = [mkText (formatXmlTree t)] formatXmlTree :: XmlTree -> String formatXmlTree = formatTree xnode2String xnode2String :: XNode -> String xnode2String n | isElem n = "XTag " ++ showName n ++ showAtts n | isPi n = "XPi " ++ showName n ++ showAtts n | otherwise = show n where showName :: XNode -> String showName = maybe "" show . getName showAtts :: XNode -> String showAtts = concatMap showAl . fromMaybe [] . getAttrl showAl :: XmlTree -> String showAl t -- (NTree (XAttr an) av) | isAttr t = "\n| " ++ (maybe "" show . getName $ t) ++ "=" ++ show (xshow . getChildren $ t) | otherwise = show t -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/TypeDefs.hs0000644000000000000000000002536412465166667016312 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.TypeDefs Copyright : Copyright (C) 2008-2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable The core data types of the HXT DOM. -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.TypeDefs ( module Data.AssocList , module Text.XML.HXT.DOM.TypeDefs , module Text.XML.HXT.DOM.QualifiedName ) where import Control.DeepSeq import Control.FlatSeq import Data.AssocList import Data.Binary import qualified Data.ByteString.Lazy as BS import qualified Data.ByteString.Lazy.Char8 as CS import Data.Tree.NTree.TypeDefs import Data.Tree.NTree.Zipper.TypeDefs import Data.Typeable import Text.XML.HXT.DOM.QualifiedName -- ----------------------------------------------------------------------------- -- -- Basic types for xml tree and filters -- | Rose tree with XML nodes (XNode) type XmlTree = NTree XNode -- | List of rose trees with XML nodes type XmlTrees = NTrees XNode -- | Navigatable rose tree with XML nodes type XmlNavTree = NTZipper XNode -- | List of navigatable rose trees with XML nodes type XmlNavTrees = [NTZipper XNode] -- ----------------------------------------------------------------------------- -- -- XNode -- | Represents elements data XNode = XText String -- ^ ordinary text (leaf) | XBlob Blob -- ^ text represented more space efficient as bytestring (leaf) | XCharRef Int -- ^ character reference (leaf) | XEntityRef String -- ^ entity reference (leaf) | XCmt String -- ^ comment (leaf) | XCdata String -- ^ CDATA section (leaf) | XPi QName XmlTrees -- ^ Processing Instr with qualified name (leaf) -- with list of attributes. -- If tag name is xml, attributs are \"version\", \"encoding\", \"standalone\", -- else attribute list is empty, content is a text child node | XTag QName XmlTrees -- ^ tag with qualified name and list of attributes (inner node or leaf) | XDTD DTDElem Attributes -- ^ DTD element with assoc list for dtd element features | XAttr QName -- ^ attribute with qualified name, the attribute value is stored in children | XError Int String -- ^ error message with level and text deriving (Eq, Show, Typeable) instance NFData XNode where rnf (XText s) = rnf s rnf (XTag qn cs) = rnf qn `seq` rnf cs rnf (XAttr qn) = rnf qn rnf (XCharRef i) = rnf i rnf (XEntityRef n) = rnf n rnf (XCmt c) = rnf c rnf (XCdata s) = rnf s rnf (XPi qn ts) = rnf qn `seq` rnf ts rnf (XDTD de al) = rnf de `seq` rnf al rnf (XBlob b) = BS.length b `seq` () rnf (XError n e) = rnf n `seq` rnf e instance WNFData XNode where rwnf (XText s) = rwnf s rwnf (XTag qn cs) = rwnf qn `seq` rwnf cs rwnf (XAttr qn) = rwnf qn rwnf (XCharRef i) = i `seq` () rwnf (XEntityRef n) = rwnf n rwnf (XCmt c) = rwnf c rwnf (XCdata s) = rwnf s rwnf (XPi qn ts) = rwnf qn `seq` rwnf ts rwnf (XDTD de al) = rwnf de `seq` rwnfAttributes al rwnf (XBlob _b) = () -- BS.length b `seq` () -- lazy bytestrings are not evaluated rwnf (XError n e) = n `seq` rwnf e -- | Evaluate an assoc list of strings rwnfAttributes :: Attributes -> () rwnfAttributes [] = () rwnfAttributes ((k, v) : as) = rwnf k `seq` rwnf v `seq` rwnfAttributes as instance Binary XNode where put (XText s) = put ( 0::Word8) >> put s put (XTag qn cs) = put ( 6::Word8) >> put qn >> put cs put (XAttr qn) = put ( 8::Word8) >> put qn put (XCharRef i) = put ( 1::Word8) >> put i put (XEntityRef n) = put ( 2::Word8) >> put n put (XCmt c) = put ( 3::Word8) >> put c put (XCdata s) = put ( 4::Word8) >> put s put (XPi qn ts) = put ( 5::Word8) >> put qn >> put ts put (XDTD de al) = put ( 7::Word8) >> put de >> put al put (XError n e) = put ( 9::Word8) >> put n >> put e put (XBlob b) = put (10::Word8) >> put b get = do tag <- getWord8 case tag of 0 -> get >>= return . XText 1 -> get >>= return . XCharRef 2 -> get >>= return . XEntityRef 3 -> get >>= return . XCmt 4 -> get >>= return . XCdata 5 -> do qn <- get get >>= return . XPi qn 6 -> do qn <- get get >>= return . XTag qn 7 -> do de <- get get >>= return . XDTD de 8 -> get >>= return . XAttr 9 -> do n <- get get >>= return . XError n 10 -> get >>= return . XBlob _ -> error "XNode.get: error while decoding XNode" -- ----------------------------------------------------------------------------- -- -- DTDElem -- | Represents a DTD element data DTDElem = DOCTYPE -- ^ attr: name, system, public, XDTD elems as children | ELEMENT -- ^ attr: name, kind -- -- name: element name -- -- kind: \"EMPTY\" | \"ANY\" | \"\#PCDATA\" | children | mixed | CONTENT -- ^ element content -- -- attr: kind, modifier -- -- modifier: \"\" | \"?\" | \"*\" | \"+\" -- -- kind: seq | choice | ATTLIST -- ^ attributes: -- name - name of element -- -- value - name of attribute -- -- type: \"CDATA\" | \"ID\" | \"IDREF\" | \"IDREFS\" | \"ENTITY\" | \"ENTITIES\" | -- -- \"NMTOKEN\" | \"NMTOKENS\" |\"NOTATION\" | \"ENUMTYPE\" -- -- kind: \"#REQUIRED\" | \"#IMPLIED\" | \"DEFAULT\" | ENTITY -- ^ for entity declarations | PENTITY -- ^ for parameter entity declarations | NOTATION -- ^ for notations | CONDSECT -- ^ for INCLUDEs, IGNOREs and peRefs: attr: type -- -- type = INCLUDE, IGNORE or %...; | NAME -- ^ attr: name -- -- for lists of names in notation types or nmtokens in enumeration types | PEREF -- ^ for Parameter Entity References in DTDs deriving (Eq, Ord, Enum, Show, Read, Typeable) instance NFData DTDElem where rnf x = seq x () instance WNFData DTDElem instance Binary DTDElem where put de = put ((toEnum . fromEnum $ de)::Word8) -- DTDElem is not yet instance of Enum get = do tag <- getWord8 return $! (toEnum . fromEnum $ tag) -- ----------------------------------------------------------------------------- -- | Binary large object implemented as a lazy bytestring type Blob = BS.ByteString blobToString :: Blob -> String blobToString = CS.unpack {-# INLINE blobToString #-} stringToBlob :: String -> Blob stringToBlob = CS.pack {-# INLINE stringToBlob #-} -- ----------------------------------------------------------------------------- -- | Attribute list -- -- used for storing option lists and features of DTD parts type Attributes = AssocList String String -- ----------------------------------------------------------------------------- -- -- Constants for error levels -- | no error, everything is ok c_ok :: Int c_ok = 0 -- | Error level for XError, type warning c_warn :: Int c_warn = c_ok + 1 -- | Error level for XError, type error c_err :: Int c_err = c_warn + 1 -- | Error level for XError, type fatal error c_fatal :: Int c_fatal = c_err + 1 -- ----------------------------------------------------------------------------- -- | data type for representing a set of nodes as a tree structure -- -- this structure is e.g. used to repesent the result of an XPath query -- such that the selected nodes can be processed or selected later in -- processing a document tree data XmlNodeSet = XNS { thisNode :: Bool -- ^ is this node part of the set ? , attrNodes :: [QName] -- ^ the set of attribute nodes , childNodes :: ChildNodes -- ^ the set of child nodes, a list of pairs of index and node set } deriving (Eq, Show, Typeable) type ChildNodes = [(Int, XmlNodeSet)] -- ----------------------------------------------------------------------------- hxt-9.3.1.15/src/Text/XML/HXT/DOM/XmlNode.hs0000644000000000000000000004201512465166667016125 0ustar0000000000000000{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, FunctionalDependencies #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.XmlNode Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Interface for XmlArrow to basic data types NTree and XmlTree If this module must be used in code working with arrows, it should be imported qualified e.g. @as XN@, to prevent name clashes. For code working on the \"node and tree level\" this module is the interface for writing code without using the constructor functions of 'XNode' and 'NTree' directly -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.XmlNode ( module Text.XML.HXT.DOM.XmlNode , module Data.Tree.Class , module Data.Tree.NTree.TypeDefs ) where import Control.Monad import Control.FlatSeq import Data.Function ( on ) import Data.Maybe ( fromMaybe , fromJust ) import Data.Tree.Class import Data.Tree.NTree.TypeDefs import Text.XML.HXT.DOM.Interface class XmlNode a where -- discriminating predicates isText :: a -> Bool isBlob :: a -> Bool isCharRef :: a -> Bool isEntityRef :: a -> Bool isCmt :: a -> Bool isCdata :: a -> Bool isPi :: a -> Bool isElem :: a -> Bool isRoot :: a -> Bool isDTD :: a -> Bool isAttr :: a -> Bool isError :: a -> Bool -- constructor functions for leave nodes mkText :: String -> a mkBlob :: Blob -> a mkCharRef :: Int -> a mkEntityRef :: String -> a mkCmt :: String -> a mkCdata :: String -> a mkPi :: QName -> XmlTrees -> a mkError :: Int -> String -> a -- selectors getText :: a -> Maybe String getBlob :: a -> Maybe Blob getCharRef :: a -> Maybe Int getEntityRef :: a -> Maybe String getCmt :: a -> Maybe String getCdata :: a -> Maybe String getPiName :: a -> Maybe QName getPiContent :: a -> Maybe XmlTrees getElemName :: a -> Maybe QName getAttrl :: a -> Maybe XmlTrees getDTDPart :: a -> Maybe DTDElem getDTDAttrl :: a -> Maybe Attributes getAttrName :: a -> Maybe QName getErrorLevel :: a -> Maybe Int getErrorMsg :: a -> Maybe String -- derived selectors getName :: a -> Maybe QName getQualifiedName :: a -> Maybe String getUniversalName :: a -> Maybe String getUniversalUri :: a -> Maybe String getLocalPart :: a -> Maybe String getNamePrefix :: a -> Maybe String getNamespaceUri :: a -> Maybe String -- "modifier" functions changeText :: (String -> String) -> a -> a changeBlob :: (Blob -> Blob) -> a -> a changeCmt :: (String -> String) -> a -> a changeName :: (QName -> QName) -> a -> a changeElemName :: (QName -> QName) -> a -> a changeAttrl :: (XmlTrees -> XmlTrees) -> a -> a changeAttrName :: (QName -> QName) -> a -> a changePiName :: (QName -> QName) -> a -> a changeDTDAttrl :: (Attributes -> Attributes) -> a -> a setText :: String -> a -> a setBlob :: Blob -> a -> a setCmt :: String -> a -> a setName :: QName -> a -> a setElemName :: QName -> a -> a setElemAttrl :: XmlTrees -> a -> a setAttrName :: QName -> a -> a setPiName :: QName -> a -> a setDTDAttrl :: Attributes -> a -> a -- default implementations getName n = getElemName n `mplus` getAttrName n `mplus` getPiName n getQualifiedName n = getName n >>= return . qualifiedName getUniversalName n = getName n >>= return . universalName getUniversalUri n = getName n >>= return . universalUri getLocalPart n = getName n >>= return . localPart getNamePrefix n = getName n >>= return . namePrefix getNamespaceUri n = getName n >>= return . namespaceUri setText = changeText . const setBlob = changeBlob . const setCmt = changeCmt . const setName = changeName . const setElemName = changeElemName . const setElemAttrl = changeAttrl . const setAttrName = changeAttrName . const setPiName = changePiName . const setDTDAttrl = changeDTDAttrl . const -- XNode and XmlTree are instances of XmlNode instance XmlNode XNode where isText (XText _) = True isText (XBlob _) = True isText _ = False {-# INLINE isText #-} isBlob (XBlob _) = True isBlob _ = False {-# INLINE isBlob #-} isCharRef (XCharRef _) = True isCharRef _ = False {-# INLINE isCharRef #-} isEntityRef (XEntityRef _) = True isEntityRef _ = False {-# INLINE isEntityRef #-} isCmt (XCmt _) = True isCmt _ = False {-# INLINE isCmt #-} isCdata (XCdata _) = True isCdata _ = False {-# INLINE isCdata #-} isPi (XPi _ _) = True isPi _ = False {-# INLINE isPi #-} isElem (XTag _ _) = True isElem _ = False {-# INLINE isElem #-} isRoot t = isElem t && fromMaybe "" (getQualifiedName t) == t_root isDTD (XDTD _ _) = True isDTD _ = False {-# INLINE isDTD #-} isAttr (XAttr _) = True isAttr _ = False {-# INLINE isAttr #-} isError (XError _ _) = True isError _ = False {-# INLINE isError #-} mkText = XText {-# INLINE mkText #-} mkBlob = XBlob {-# INLINE mkBlob #-} mkCharRef = XCharRef {-# INLINE mkCharRef #-} mkEntityRef = XEntityRef {-# INLINE mkEntityRef #-} mkCmt = XCmt {-# INLINE mkCmt #-} mkCdata = XCdata {-# INLINE mkCdata #-} mkPi = XPi {-# INLINE mkPi #-} mkError = XError {-# INLINE mkError #-} getText (XText t) = Just t getText (XBlob b) = Just . blobToString $ b getText _ = Nothing {-# INLINE getText #-} getBlob (XBlob b) = Just b getBlob _ = Nothing {-# INLINE getBlob #-} getCharRef (XCharRef c) = Just c getCharRef _ = Nothing {-# INLINE getCharRef #-} getEntityRef (XEntityRef e) = Just e getEntityRef _ = Nothing {-# INLINE getEntityRef #-} getCmt (XCmt c) = Just c getCmt _ = Nothing {-# INLINE getCmt #-} getCdata (XCdata d) = Just d getCdata _ = Nothing {-# INLINE getCdata #-} getPiName (XPi n _) = Just n getPiName _ = Nothing {-# INLINE getPiName #-} getPiContent (XPi _ c) = Just c getPiContent _ = Nothing {-# INLINE getPiContent #-} getElemName (XTag n _) = Just n getElemName _ = Nothing {-# INLINE getElemName #-} getAttrl (XTag _ al) = Just al getAttrl (XPi _ al) = Just al getAttrl _ = Nothing {-# INLINE getAttrl #-} getDTDPart (XDTD p _) = Just p getDTDPart _ = Nothing {-# INLINE getDTDPart #-} getDTDAttrl (XDTD _ al) = Just al getDTDAttrl _ = Nothing {-# INLINE getDTDAttrl #-} getAttrName (XAttr n) = Just n getAttrName _ = Nothing {-# INLINE getAttrName #-} getErrorLevel (XError l _) = Just l getErrorLevel _ = Nothing {-# INLINE getErrorLevel #-} getErrorMsg (XError _ m) = Just m getErrorMsg _ = Nothing {-# INLINE getErrorMsg #-} changeText cf (XText t) = XText . cf $ t changeText cf (XBlob b) = XText . cf . blobToString $ b changeText _ _ = error "changeText undefined" {-# INLINE changeText #-} changeBlob cf (XBlob b) = XBlob . cf $ b changeBlob _ _ = error "changeBlob undefined" {-# INLINE changeBlob #-} changeCmt cf (XCmt c) = XCmt . cf $ c changeCmt _ _ = error "changeCmt undefined" {-# INLINE changeCmt #-} changeName cf (XTag n al) = XTag (cf n) al changeName cf (XAttr n) = XAttr . cf $ n changeName cf (XPi n al) = XPi (cf n) al changeName _ _ = error "changeName undefined" {-# INLINE changeName #-} changeElemName cf (XTag n al) = XTag (cf n) al changeElemName _ _ = error "changeElemName undefined" {-# INLINE changeElemName #-} changeAttrl cf (XTag n al) = XTag n (cf al) changeAttrl cf (XPi n al) = XPi n (cf al) changeAttrl _ _ = error "changeAttrl undefined" {-# INLINE changeAttrl #-} changeAttrName cf (XAttr n) = XAttr . cf $ n changeAttrName _ _ = error "changeAttrName undefined" {-# INLINE changeAttrName #-} changePiName cf (XPi n al) = XPi (cf n) al changePiName _ _ = error "changePiName undefined" {-# INLINE changePiName #-} changeDTDAttrl cf (XDTD p al) = XDTD p (cf al) changeDTDAttrl _ _ = error "changeDTDAttrl undefined" {-# INLINE changeDTDAttrl #-} mkElementNode :: QName -> XmlTrees -> XNode mkElementNode = XTag {-# INLINE mkElementNode #-} mkAttrNode :: QName -> XNode mkAttrNode = XAttr {-# INLINE mkAttrNode #-} mkDTDNode :: DTDElem -> Attributes -> XNode mkDTDNode = XDTD {-# INLINE mkDTDNode #-} instance (XmlNode a, Tree t) => XmlNode (t a) where isText = isText . getNode {-# INLINE isText #-} isBlob = isBlob . getNode {-# INLINE isBlob #-} isCharRef = isCharRef . getNode {-# INLINE isCharRef #-} isEntityRef = isEntityRef . getNode {-# INLINE isEntityRef #-} isCmt = isCmt . getNode {-# INLINE isCmt #-} isCdata = isCdata . getNode {-# INLINE isCdata #-} isPi = isPi . getNode {-# INLINE isPi #-} isElem = isElem . getNode {-# INLINE isElem #-} isRoot = isRoot . getNode {-# INLINE isRoot #-} isDTD = isDTD . getNode {-# INLINE isDTD #-} isAttr = isAttr . getNode {-# INLINE isAttr #-} isError = isError . getNode {-# INLINE isError #-} mkText = mkLeaf . mkText {-# INLINE mkText #-} mkBlob = mkLeaf . mkBlob {-# INLINE mkBlob #-} mkCharRef = mkLeaf . mkCharRef {-# INLINE mkCharRef #-} mkEntityRef = mkLeaf . mkEntityRef {-# INLINE mkEntityRef #-} mkCmt = mkLeaf . mkCmt {-# INLINE mkCmt #-} mkCdata = mkLeaf . mkCdata {-# INLINE mkCdata #-} mkPi n = mkLeaf . mkPi n {-# INLINE mkPi #-} mkError l = mkLeaf . mkError l {-# INLINE mkError #-} getText = getText . getNode {-# INLINE getText #-} getBlob = getBlob . getNode {-# INLINE getBlob #-} getCharRef = getCharRef . getNode {-# INLINE getCharRef #-} getEntityRef = getEntityRef . getNode {-# INLINE getEntityRef #-} getCmt = getCmt . getNode {-# INLINE getCmt #-} getCdata = getCdata . getNode {-# INLINE getCdata #-} getPiName = getPiName . getNode {-# INLINE getPiName #-} getPiContent = getPiContent . getNode {-# INLINE getPiContent #-} getElemName = getElemName . getNode {-# INLINE getElemName #-} getAttrl = getAttrl . getNode {-# INLINE getAttrl #-} getDTDPart = getDTDPart . getNode {-# INLINE getDTDPart #-} getDTDAttrl = getDTDAttrl . getNode {-# INLINE getDTDAttrl #-} getAttrName = getAttrName . getNode {-# INLINE getAttrName #-} getErrorLevel = getErrorLevel . getNode {-# INLINE getErrorLevel #-} getErrorMsg = getErrorMsg . getNode {-# INLINE getErrorMsg #-} changeText = changeNode . changeText {-# INLINE changeText #-} changeBlob = changeNode . changeBlob {-# INLINE changeBlob #-} changeCmt = changeNode . changeCmt {-# INLINE changeCmt #-} changeName = changeNode . changeName {-# INLINE changeName #-} changeElemName = changeNode . changeElemName {-# INLINE changeElemName #-} changeAttrl = changeNode . changeAttrl {-# INLINE changeAttrl #-} changeAttrName = changeNode . changeAttrName {-# INLINE changeAttrName #-} changePiName = changeNode . changePiName {-# INLINE changePiName #-} changeDTDAttrl = changeNode . changeDTDAttrl {-# INLINE changeDTDAttrl #-} mkElement :: QName -> XmlTrees -> XmlTrees -> XmlTree mkElement n al = mkTree (mkElementNode n al) {-# INLINE mkElement #-} mkRoot :: XmlTrees -> XmlTrees -> XmlTree mkRoot al = mkTree (mkElementNode (mkName t_root) al) mkAttr :: QName -> XmlTrees -> XmlTree mkAttr n = mkTree (mkAttrNode n) {-# INLINE mkAttr #-} mkDTDElem :: DTDElem -> Attributes -> XmlTrees -> XmlTree mkDTDElem e al = mkTree (mkDTDNode e al) addAttr :: XmlTree -> XmlTrees -> XmlTrees addAttr a al | isAttr a = add al | otherwise = al where an = (qualifiedName . fromJust . getAttrName) a add [] = [a] add (a1:al1) | isAttr a1 && (qualifiedName . fromJust . getAttrName) a1 == an = a : al1 | otherwise = a1 : add al1 mergeAttrl :: XmlTrees -> XmlTrees -> XmlTrees mergeAttrl = foldr addAttr -- ------------------------------------------------------------ -- | weak normalform versions of constructors mkElement' :: QName -> XmlTrees -> XmlTrees -> XmlTree mkElement' n al cl = id $!! mkElement n al cl {-# INLINE mkElement' #-} mkRoot' :: XmlTrees -> XmlTrees -> XmlTree mkRoot' al cl = id $!! mkRoot al cl {-# INLINE mkRoot' #-} mkAttr' :: QName -> XmlTrees -> XmlTree mkAttr' n av = id $!! mkAttr n av {-# INLINE mkAttr' #-} mkText' :: String -> XmlTree mkText' t = id $!! mkText t {-# INLINE mkText' #-} mkCharRef' :: Int -> XmlTree mkCharRef' i = id $!! mkCharRef i {-# INLINE mkCharRef' #-} mkEntityRef' :: String -> XmlTree mkEntityRef' n = id $!! mkEntityRef n {-# INLINE mkEntityRef' #-} mkCmt' :: String -> XmlTree mkCmt' c = id $!! mkCmt c {-# INLINE mkCmt' #-} mkCdata' :: String -> XmlTree mkCdata' d = id $!! mkCdata d {-# INLINE mkCdata' #-} mkPi' :: QName -> XmlTrees -> XmlTree mkPi' n v = id $!! mkPi n v {-# INLINE mkPi' #-} mkError' :: Int -> String -> XmlTree mkError' l m = id $!! mkError l m {-# INLINE mkError' #-} mkDTDElem' :: DTDElem -> Attributes -> XmlTrees -> XmlTree mkDTDElem' e al cl = id $!! mkDTDElem e al cl {-# INLINE mkDTDElem' #-} -- ------------------------------------------------------------ toText :: XmlTree -> XmlTree toText t | isCharRef t = mkText . (:[]) . toEnum . fromJust . getCharRef $ t | isCdata t = mkText . fromJust . getCdata $ t | otherwise = t concText :: XmlTree -> XmlTree -> XmlTrees concText t1 t2 | isText t1 && isText t2 = (:[]) . mkText $ fromJust (getText t1) ++ fromJust (getText t2) | otherwise = [t1, t2] mergeText :: XmlTree -> XmlTree -> XmlTrees mergeText = concText `on` toText -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/Util.hs0000644000000000000000000001751312465166667015501 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.Util Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Little useful things for strings, lists and other values -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.Util ( stringTrim , stringToLower , stringToUpper , stringAll , stringFirst , stringLast , normalizeNumber , normalizeWhitespace , normalizeBlanks , escapeURI , textEscapeXml , stringEscapeXml , attrEscapeXml , stringToInt , stringToHexString , charToHexString , intToHexString , hexStringToInt , decimalStringToInt , doubles , singles , noDoubles , swap , partitionEither , toMaybe , uncurry3 , uncurry4 ) where import Data.Char import Data.List import Data.Maybe -- ------------------------------------------------------------ -- | -- remove leading and trailing whitespace with standard Haskell predicate isSpace stringTrim :: String -> String stringTrim = reverse . dropWhile isSpace . reverse . dropWhile isSpace -- | -- convert string to uppercase with standard Haskell toUpper function stringToUpper :: String -> String stringToUpper = map toUpper -- | -- convert string to lowercase with standard Haskell toLower function stringToLower :: String -> String stringToLower = map toLower -- | find all positions where a string occurs within another string stringAll :: (Eq a) => [a] -> [a] -> [Int] stringAll x = map fst . filter ((x `isPrefixOf`) . snd) . zip [0..] . tails -- | find the position of the first occurence of a string stringFirst :: (Eq a) => [a] -> [a] -> Maybe Int stringFirst x = listToMaybe . stringAll x -- | find the position of the last occurence of a string stringLast :: (Eq a) => [a] -> [a] -> Maybe Int stringLast x = listToMaybe . reverse . stringAll x -- ------------------------------------------------------------ -- | Removes leading \/ trailing whitespaces and leading zeros normalizeNumber :: String -> String normalizeNumber = reverse . dropWhile (== ' ') . reverse . dropWhile (\x -> x == '0' || x == ' ') -- | Reduce whitespace sequences to a single whitespace normalizeWhitespace :: String -> String normalizeWhitespace = unwords . words -- | replace all whitespace chars by blanks normalizeBlanks :: String -> String normalizeBlanks = map (\ x -> if isSpace x then ' ' else x) -- ------------------------------------------------------------ -- | Escape all disallowed characters in URI -- references (see ) escapeURI :: String -> String escapeURI ref = concatMap replace ref where notAllowed :: Char -> Bool notAllowed c = c < '\31' || c `elem` ['\DEL', ' ', '<', '>', '\"', '{', '}', '|', '\\', '^', '`' ] replace :: Char -> String replace c | notAllowed c = '%' : charToHexString c | otherwise = [c] -- ------------------------------------------------------------ escapeXml :: String -> String -> String escapeXml escSet = concatMap esc where esc c | c `elem` escSet = "&#" ++ show (fromEnum c) ++ ";" | otherwise = [c] -- | -- escape XML chars <, >, ", and ampercent by transforming them into character references -- -- see also : 'attrEscapeXml' stringEscapeXml :: String -> String stringEscapeXml = escapeXml "<>\"\'&" -- | -- escape XML chars < and ampercent by transforming them into character references, used for escaping text nodes -- -- see also : 'attrEscapeXml' textEscapeXml :: String -> String textEscapeXml = escapeXml "<&" -- | -- escape XML chars in attribute values, same as stringEscapeXml, but none blank whitespace -- is also escaped -- -- see also : 'stringEscapeXml' attrEscapeXml :: String -> String attrEscapeXml = escapeXml "<>\"\'&\n\r\t" stringToInt :: Int -> String -> Int stringToInt base digits = sign * (foldl acc 0 $ concatMap digToInt digits1) where splitSign ('-' : ds) = ((-1), ds) splitSign ('+' : ds) = ( 1 , ds) splitSign ds = ( 1 , ds) (sign, digits1) = splitSign digits digToInt c | c >= '0' && c <= '9' = [ord c - ord '0'] | c >= 'A' && c <= 'Z' = [ord c - ord 'A' + 10] | c >= 'a' && c <= 'z' = [ord c - ord 'a' + 10] | otherwise = [] acc i1 i0 = i1 * base + i0 -- | -- convert a string of hexadecimal digits into an Int hexStringToInt :: String -> Int hexStringToInt = stringToInt 16 -- | -- convert a string of digits into an Int decimalStringToInt :: String -> Int decimalStringToInt = stringToInt 10 -- | -- convert a string into a hexadecimal string applying charToHexString -- -- see also : 'charToHexString' stringToHexString :: String -> String stringToHexString = concatMap charToHexString -- | -- convert a char (byte) into a 2-digit hexadecimal string -- -- see also : 'stringToHexString', 'intToHexString' charToHexString :: Char -> String charToHexString c = [ fourBitsToChar (c' `div` 16) , fourBitsToChar (c' `mod` 16) ] where c' = fromEnum c -- | -- convert a none negative Int into a hexadecimal string -- -- see also : 'charToHexString' intToHexString :: Int -> String intToHexString i | i == 0 = "0" | i > 0 = intToStr i | otherwise = error ("intToHexString: negative argument " ++ show i) where intToStr 0 = "" intToStr i' = intToStr (i' `div` 16) ++ [fourBitsToChar (i' `mod` 16)] fourBitsToChar :: Int -> Char fourBitsToChar i = "0123456789ABCDEF" !! i -- ------------------------------------------------------------ -- | -- take all elements of a list which occur more than once. The result does not contain doubles. -- (doubles . doubles == doubles) doubles :: Eq a => [a] -> [a] doubles = doubles' [] where doubles' acc [] = acc doubles' acc (e : s) | e `elem` s && e `notElem` acc = doubles' (e:acc) s | otherwise = doubles' acc s -- | -- drop all elements from a list which occur more than once. singles :: Eq a => [a] -> [a] singles = singles' [] where singles' acc [] = acc singles' acc (e : s) | e `elem` s || e `elem` acc = singles' acc s | otherwise = singles' (e : acc) s -- | -- remove duplicates from list noDoubles :: Eq a => [a] -> [a] noDoubles [] = [] noDoubles (e : s) | e `elem` s = noDoubles s | otherwise = e : noDoubles s -- ------------------------------------------------------------ swap :: (a,b) -> (b,a) swap (x,y) = (y,x) partitionEither :: [Either a b] -> ([a], [b]) partitionEither = foldr (\x ~(ls,rs) -> either (\l -> (l:ls,rs)) (\r -> (ls,r:rs)) x) ([],[]) toMaybe :: Bool -> a -> Maybe a toMaybe False _ = Nothing toMaybe True x = Just x -- ------------------------------------------------------------ -- | mothers little helpers for to much curry uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d uncurry3 f ~(a, b, c) = f a b c uncurry4 :: (a -> b -> c -> d -> e) -> (a, b, c, d) -> e uncurry4 f ~(a, b, c, d) = f a b c d -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/ShowXml.hs0000644000000000000000000004430212465166667016161 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.ShowXml Copyright : Copyright (C) 2008-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable XML tree conversion to external string representation -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.ShowXml ( xshow , xshowBlob , xshow' , xshow'' ) where import Prelude hiding (showChar, showString) import Data.Maybe import Data.Tree.Class import Data.Tree.NTree.TypeDefs import Text.XML.HXT.DOM.TypeDefs import Text.XML.HXT.DOM.XmlKeywords import Text.XML.HXT.DOM.XmlNode (getDTDAttrl, mkDTDElem) -- ----------------------------------------------------------------------------- -- -- the toString conversion functions -- | -- convert a list of trees into a string -- -- see also : 'xmlTreesToText' for filter version, 'Text.XML.HXT.Parser.XmlParsec.xread' for the inverse operation xshow :: XmlTrees -> String xshow [(NTree (XText s) _)] = s -- special case optimisation xshow [(NTree (XBlob b) _)] = blobToString b -- special case optimisation xshow ts = showXmlTrees showString showString ts "" -- | convert an XML tree into a binary large object (a bytestring) xshowBlob :: XmlTrees -> Blob xshowBlob [(NTree (XBlob b) _)] = b -- special case optimisation xshowBlob [(NTree (XText s) _)] = stringToBlob s -- special case optimisation xshowBlob ts = stringToBlob $ xshow ts -- | -- convert a list of trees into a blob. -- -- Apply a quoting function for XML quoting of content, -- a 2. quoting funtion for attribute values -- and an encoding function after tree conversion xshow' :: (Char -> StringFct) -> (Char -> StringFct) -> (Char -> StringFct) -> XmlTrees -> Blob xshow' cquot aquot enc ts = stringToBlob $ (concatMap' enc (showTrees ts "")) "" where showTrees = showXmlTrees (concatMap' cquot) (concatMap' aquot) xshow'' :: (Char -> StringFct) -> (Char -> StringFct) -> XmlTrees -> String xshow'' cquot aquot ts = showTrees ts "" where showTrees = showXmlTrees (concatMap' cquot) (concatMap' aquot) -- ------------------------------------------------------------ type StringFct = String -> String -- ------------------------------------------------------------ showXmlTrees :: (String -> StringFct) -> (String -> StringFct) -> XmlTrees -> StringFct showXmlTrees cf af = showTrees where -- ------------------------------------------------------------ showTrees :: XmlTrees -> StringFct showTrees = foldr (.) id . map showXmlTree {-# INLINE showTrees #-} showTrees' :: XmlTrees -> StringFct showTrees' = foldr (\ x y -> x . showNL . y) id . map showXmlTree {-# INLINE showTrees' #-} -- ------------------------------------------------------------ showXmlTree :: XmlTree -> StringFct showXmlTree (NTree (XText s) _) -- common cases first = cf s showXmlTree (NTree (XTag t al) []) = showLt . showQName t . showTrees al . showSlash . showGt showXmlTree (NTree (XTag t al) cs) = showLt . showQName t . showTrees al . showGt . showTrees cs . showLt . showSlash . showQName t . showGt showXmlTree (NTree (XAttr an) cs) = showBlank . showQName an . showEq . showQuot . af (xshow cs) . showQuot showXmlTree (NTree (XBlob b) _) = cf . blobToString $ b showXmlTree (NTree (XCharRef i) _) = showString "&#" . showString (show i) . showChar ';' showXmlTree (NTree (XEntityRef r) _) = showString "&" . showString r . showChar ';' showXmlTree (NTree (XCmt c) _) = showString "" showXmlTree (NTree (XCdata d) _) = showString "" showXmlTree (NTree (XPi n al) _) = showString "" where showPiAttr :: XmlTree -> StringFct showPiAttr a@(NTree (XAttr an) cs) | qualifiedName an == a_value = showBlank . showTrees cs | otherwise = showXmlTree a showPiAttr a = showXmlTree a -- id showXmlTree (NTree (XDTD de al) cs) = showXmlDTD de al cs showXmlTree (NTree (XError l e) _) = showString "" -- ------------------------------------------------------------ showXmlDTD :: DTDElem -> Attributes -> XmlTrees -> StringFct showXmlDTD DOCTYPE al cs = showString "" where showInternalDTD [] = id showInternalDTD ds = showString " [\n" . showTrees' ds . showChar ']' showXmlDTD ELEMENT al cs = showString "" showXmlDTD ATTLIST al cs = showString " ( showPEAttr . fromMaybe [] . getDTDAttrl . head ) cs Just a -> ( showString a . showAttrType (lookup1 a_type al) . showAttrKind (lookup1 a_kind al) ) ) ) . showString " >" where showAttrType t | t == k_peref = showBlank . showPEAttr al | t == k_enumeration = showAttrEnum | t == k_notation = showBlank . showString k_notation . showAttrEnum | otherwise = showBlank . showString t showAttrEnum = showString " (" . foldr1 (\ s1 s2 -> s1 . showString " | " . s2) (map (getEnum . fromMaybe [] . getDTDAttrl) cs) . showString ")" where getEnum :: Attributes -> StringFct getEnum l = showAttr a_name l . showPEAttr l showAttrKind k | k == k_default = showBlank . showQuoteString (lookup1 a_default al) | k == k_fixed = showBlank . showString k_fixed . showBlank . showQuoteString (lookup1 a_default al) | k == "" = id | otherwise = showBlank . showString k showXmlDTD NOTATION al _cs = showString "" showXmlDTD PENTITY al cs = showEntity "% " al cs showXmlDTD ENTITY al cs = showEntity "" al cs showXmlDTD PEREF al _cs = showPEAttr al showXmlDTD CONDSECT _ (c1 : cs) = showString "" showXmlDTD CONTENT al cs = showContent (mkDTDElem CONTENT al cs) showXmlDTD NAME al _cs = showAttr a_name al showXmlDTD de al _cs = showString "NOT YET IMPLEMETED: " . showString (show de) . showBlank . showString (show al) . showString " [...]\n" -- ------------------------------------------------------------ showEntity :: String -> Attributes -> XmlTrees -> StringFct showEntity kind al cs = showString "" showEntityValue :: XmlTrees -> StringFct showEntityValue [] = id showEntityValue cs = showBlank . showQuot . af (xshow cs) . showQuot -- ------------------------------------------------------------ showContent :: XmlTree -> StringFct showContent (NTree (XDTD de al) cs) = cont2String de where cont2String :: DTDElem -> StringFct cont2String NAME = showAttr a_name al cont2String PEREF = showPEAttr al cont2String CONTENT = showLpar . foldr1 (combine (lookup1 a_kind al)) (map showContent cs) . showRpar . showAttr a_modifier al cont2String n = error ("cont2string " ++ show n ++ " is undefined") combine k s1 s2 = s1 . showString ( if k == v_seq then ", " else " | " ) . s2 showContent n = showXmlTree n -- ------------------------------------------------------------ showElemType :: String -> XmlTrees -> StringFct showElemType t cs | t == v_pcdata = showLpar . showString v_pcdata . showRpar | t == v_mixed && (not . null) cs = showLpar . showString v_pcdata . ( foldr (.) id . map (mixedContent . selAttrl . getNode) ) cs1 . showRpar . showAttr a_modifier al1 | t == v_mixed -- incorrect tree, e.g. after erronius pe substitution = showLpar . showRpar | t == v_children && (not . null) cs = showContent (head cs) | t == v_children = showLpar . showRpar | t == k_peref = foldr (.) id . map showContent $ cs | otherwise = showString t where [(NTree (XDTD CONTENT al1) cs1)] = cs mixedContent :: Attributes -> StringFct mixedContent l = showString " | " . showAttr a_name l . showPEAttr l selAttrl (XDTD _ as) = as selAttrl (XText tex) = [(a_name, tex)] selAttrl _ = [] -- ------------------------------------------------------------ showQName :: QName -> StringFct showQName = qualifiedName' {-# INLINE showQName #-} -- ------------------------------------------------------------ showQuoteString :: String -> StringFct showQuoteString s = showQuot . showString s . showQuot -- ------------------------------------------------------------ showAttr :: String -> Attributes -> StringFct showAttr k al = showString (fromMaybe "" . lookup k $ al) -- ------------------------------------------------------------ showPEAttr :: Attributes -> StringFct showPEAttr al = showPE (lookup a_peref al) where showPE (Just pe) = showChar '%' . showString pe . showChar ';' showPE Nothing = id -- ------------------------------------------------------------ showExternalId :: Attributes -> StringFct showExternalId al = id2Str (lookup k_system al) (lookup k_public al) where id2Str Nothing Nothing = id id2Str (Just s) Nothing = showBlank . showString k_system . showBlank . showQuoteString s id2Str Nothing (Just p) = showBlank . showString k_public . showBlank . showQuoteString p id2Str (Just s) (Just p) = showBlank . showString k_public . showBlank . showQuoteString p . showBlank . showQuoteString s -- ------------------------------------------------------------ showNData :: Attributes -> StringFct showNData al = nd2Str (lookup k_ndata al) where nd2Str Nothing = id nd2Str (Just v) = showBlank . showString k_ndata . showBlank . showString v -- ------------------------------------------------------------ showBlank, showEq, showLt, showGt, showSlash, showQuot, showLpar, showRpar, showNL :: StringFct showBlank = showChar ' ' {-# INLINE showBlank #-} showEq = showChar '=' {-# INLINE showEq #-} showLt = showChar '<' {-# INLINE showLt #-} showGt = showChar '>' {-# INLINE showGt #-} showSlash = showChar '/' {-# INLINE showSlash #-} showQuot = showChar '\"' {-# INLINE showQuot #-} showLpar = showChar '(' {-# INLINE showLpar #-} showRpar = showChar ')' {-# INLINE showRpar #-} showNL = showChar '\n' {-# INLINE showNL #-} showChar :: Char -> StringFct showChar = (:) {-# INLINE showChar #-} showString :: String -> StringFct showString = (++) {-# INLINE showString #-} concatMap' :: (Char -> StringFct) -> String -> StringFct concatMap' f = foldr (\ x r -> f x . r) id {-# INLINE concatMap' #-} -- ----------------------------------------------------------------------------- hxt-9.3.1.15/src/Text/XML/HXT/DOM/Interface.hs0000644000000000000000000000204712465166667016460 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.Interface Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable The interface to the primitive DOM data types and constants and utility functions -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.Interface ( module Text.XML.HXT.DOM.XmlKeywords , module Text.XML.HXT.DOM.TypeDefs , module Text.XML.HXT.DOM.Util , module Text.XML.HXT.DOM.MimeTypes , module Data.String.EncodingNames ) where import Text.XML.HXT.DOM.XmlKeywords -- constants import Text.XML.HXT.DOM.TypeDefs -- XML Tree types import Text.XML.HXT.DOM.Util import Text.XML.HXT.DOM.MimeTypes -- mime types related stuff import Data.String.EncodingNames -- char encoding names for readDocument -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/MimeTypes.hs0000644000000000000000000001113112465166667016466 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.MimeTypes Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable mime type related data and functions -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.MimeTypes where import Control.Monad ( mplus ) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as C import Data.Char import Data.List import qualified Data.Map as M import Data.Maybe import Text.XML.HXT.DOM.MimeTypeDefaults -- ------------------------------------------------------------ type MimeTypeTable = M.Map String String -- ------------------------------------------------------------ -- mime types -- -- see RFC \"http:\/\/www.rfc-editor.org\/rfc\/rfc3023.txt\" application_xhtml, application_xml, application_xml_external_parsed_entity, application_xml_dtd, text_html, text_pdf, text_plain, text_xdtd, text_xml, text_xml_external_parsed_entity :: String application_xhtml = "application/xhtml+xml" application_xml = "application/xml" application_xml_external_parsed_entity = "application/xml-external-parsed-entity" application_xml_dtd = "application/xml-dtd" text_html = "text/html" text_pdf = "text/pdf" text_plain = "text/plain" text_xdtd = "text/x-dtd" text_xml = "text/xml" text_xml_external_parsed_entity = "text/xml-external-parsed-entity" isTextMimeType :: String -> Bool isTextMimeType = ("text/" `isPrefixOf`) isHtmlMimeType :: String -> Bool isHtmlMimeType t = t == text_html isXmlMimeType :: String -> Bool isXmlMimeType t = ( t `elem` [ application_xhtml , application_xml , application_xml_external_parsed_entity , application_xml_dtd , text_xml , text_xml_external_parsed_entity , text_xdtd ] || "+xml" `isSuffixOf` t -- application/mathml+xml ) -- or image/svg+xml defaultMimeTypeTable :: MimeTypeTable defaultMimeTypeTable = M.fromList mimeTypeDefaults extensionToMimeType :: String -> MimeTypeTable -> String extensionToMimeType e = fromMaybe "" . lookupMime where lookupMime t = M.lookup e t -- try exact match `mplus` M.lookup (map toLower e) t -- else try lowercase match `mplus` M.lookup (map toUpper e) t -- else try uppercase match -- ------------------------------------------------------------ readMimeTypeTable :: FilePath -> IO MimeTypeTable readMimeTypeTable inp = do cb <- B.readFile inp return . M.fromList . parseMimeTypeTable . C.unpack $ cb parseMimeTypeTable :: String -> [(String, String)] parseMimeTypeTable = concat . map buildPairs . map words . filter (not . ("#" `isPrefixOf`)) . filter (not . all (isSpace)) . lines where buildPairs :: [String] -> [(String, String)] buildPairs [] = [] buildPairs (mt:exts) = map (\ x -> (x, mt)) $ exts -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/DOM/MimeTypeDefaults.hs0000644000000000000000000005451512465166667020010 0ustar0000000000000000-- | default mime type table -- -- this file is generated from file /etc/mime.types module Text.XML.HXT.DOM.MimeTypeDefaults where -- | the table with the mapping from file name extensions to mime types mimeTypeDefaults :: [(String, String)] mimeTypeDefaults = [ ("123", "application/vnd.lotus-1-2-3") , ("3ds", "image/x-3ds") , ("3g2", "video/x-3gpp2") , ("3gp", "video/3gpp") , ("669", "audio/x-mod") , ("BAY", "image/x-dcraw") , ("BLEND", "application/x-blender") , ("BMQ", "image/x-dcraw") , ("C", "text/x-c++src") , ("CR2", "image/x-dcraw") , ("CRW", "image/x-dcraw") , ("CS1", "image/x-dcraw") , ("CSSL", "text/css") , ("DC2", "image/x-dcraw") , ("DCR", "image/x-dcraw") , ("FFF", "image/x-dcraw") , ("K25", "image/x-dcraw") , ("KDC", "image/x-dcraw") , ("MOS", "image/x-dcraw") , ("MRW", "image/x-dcraw") , ("NEF", "image/x-dcraw") , ("NSV", "video/x-nsv") , ("ORF", "image/x-dcraw") , ("PAR2", "application/x-par2") , ("PEF", "image/x-dcraw") , ("RAF", "image/x-dcraw") , ("RDC", "image/x-dcraw") , ("SRF", "image/x-dcraw") , ("TTC", "application/x-font-ttf") , ("X3F", "image/x-dcraw") , ("XM", "audio/x-mod") , ("Z", "application/x-compress") , ("a", "application/x-archive") , ("aac", "audio/x-aac") , ("abw", "application/x-abiword") , ("abw.CRASHED", "application/x-abiword") , ("abw.gz", "application/x-abiword") , ("ac3", "audio/ac3") , ("adb", "text/x-adasrc") , ("ads", "text/x-adasrc") , ("afm", "application/x-font-afm") , ("ag", "image/x-applix-graphics") , ("ai", "application/illustrator") , ("aif", "audio/x-aiff") , ("aif", "audio/x-aiff") , ("aifc", "audio/x-aiff") , ("aiff", "audio/x-aiff") , ("aiff", "audio/x-aiff") , ("al", "application/x-perl") , ("anim[1-9j]", "video/x-anim") , ("aop", "application/x-frontline") , ("arj", "application/x-arj") , ("as", "application/x-applix-spreadsheet") , ("asax", "application/x-asax") , ("asc", "text/plain") , ("ascx", "application/x-ascx") , ("asf", "video/x-ms-asf") , ("ashx", "application/x-ashx") , ("asix", "application/x-asix") , ("asmx", "application/x-asmx") , ("asp", "application/x-asp") , ("aspx", "application/x-aspx") , ("asx", "video/x-ms-asf") , ("au", "audio/basic") , ("avi", "video/x-msvideo") , ("aw", "application/x-applix-word") , ("axd", "application/x-axd") , ("bak", "application/x-trash") , ("bay", "image/x-dcraw") , ("bcpio", "application/x-bcpio") , ("bdf", "application/x-font-bdf") , ("bib", "text/x-bibtex") , ("bin", "application/octet-stream") , ("bin", "application/x-stuffit") , ("blend", "application/x-blender") , ("blender", "application/x-blender") , ("bmp", "image/bmp") , ("bmq", "image/x-dcraw") , ("boo", "text/x-boo") , ("bz", "application/x-bzip") , ("bz", "application/x-bzip") , ("bz2", "application/x-bzip") , ("bz2", "application/x-bzip") , ("c", "text/x-csrc") , ("c++", "text/x-c++src") , ("caves", "application/x-gnome-stones") , ("cc", "text/x-c++src") , ("cdf", "application/x-netcdf") , ("cdr", "application/vnd.corel-draw") , ("cer", "application/x-x509-ca-cert") , ("cert", "application/x-x509-ca-cert") , ("cgi", "application/x-cgi") , ("cgm", "image/cgm") , ("chm", "application/x-chm") , ("chrt", "application/x-kchart") , ("cht", "application/chemtool") , ("class", "application/x-java") , ("cls", "text/x-tex") , ("cmbx", "application/x-cmbx") , ("config", "application/x-config") , ("connection", "application/x-gnome-db-connection") , ("cpio", "application/x-cpio") , ("cpio.gz", "application/x-cpio-compressed") , ("cpp", "text/x-c++src") , ("cr2", "image/x-dcraw") , ("crt", "application/x-x509-ca-cert") , ("crw", "image/x-dcraw") , ("cs", "text/x-csharp") , ("cs1", "image/x-dcraw") , ("csh", "application/x-csh") , ("css", "text/css") , ("csv", "text/x-comma-separated-values") , ("cue", "application/x-cue") , ("cur", "image/x-win-bitmap") , ("cxx", "text/x-c++src") , ("d", "text/x-dsrc") , ("dat", "video/mpeg") , ("database", "application/x-gnome-db-database") , ("dbf", "application/x-dbase") , ("dc", "application/x-dc-rom") , ("dc2", "image/x-dcraw") , ("dcl", "text/x-dcl") , ("dcm", "application/dicom") , ("dcr", "image/x-dcraw") , ("deb", "application/x-deb") , ("der", "application/x-x509-ca-cert") , ("desktop", "application/x-desktop") , ("devhelp", "application/x-devhelp") , ("dia", "application/x-dia-diagram") , ("dif", "video/dv") , ("diff", "text/x-patch") , ("disco", "application/x-disco") , ("display", "application/x-gdesklets-display") , ("djv", "image/vnd.djvu") , ("djvu", "image/vnd.djvu") , ("doc", "application/msword") , ("docbook", "application/docbook+xml") , ("dsl", "text/x-dsl") , ("dtd", "text/x-dtd") , ("dv", "video/dv") , ("dvi", "application/x-dvi") , ("dwg", "image/vnd.dwg") , ("dxf", "image/vnd.dxf") , ("ear", "application/x-java-archive") , ("egon", "application/x-egon") , ("el", "text/x-emacs-lisp") , ("eps", "image/x-eps") , ("epsf", "image/x-eps") , ("epsi", "image/x-eps") , ("etheme", "application/x-e-theme") , ("etx", "text/x-setext") , ("exe", "application/x-executable") , ("exe", "application/x-ms-dos-executable") , ("ez", "application/andrew-inset") , ("f", "text/x-fortran") , ("fff", "image/x-dcraw") , ("fig", "image/x-xfig") , ("fits", "image/x-fits") , ("flac", "audio/x-flac") , ("flc", "video/x-flic") , ("fli", "video/x-flic") , ("flw", "application/x-kivio") , ("fo", "text/x-xslfo") , ("g3", "image/fax-g3") , ("gb", "application/x-gameboy-rom") , ("gcrd", "text/directory") , ("gen", "application/x-genesis-rom") , ("gf", "application/x-tex-gf") , ("gg", "application/x-sms-rom") , ("gif", "image/gif") , ("glabels", "application/x-glabels") , ("glade", "application/x-glade") , ("gmo", "application/x-gettext-translation") , ("gnc", "application/x-gnucash") , ("gnucash", "application/x-gnucash") , ("gnumeric", "application/x-gnumeric") , ("gpg", "application/pgp-encrypted") , ("gra", "application/x-graphite") , ("gsf", "application/x-font-type1") , ("gtar", "application/x-gtar") , ("gz", "application/x-gzip") , ("h", "text/x-chdr") , ("h++", "text/x-chdr") , ("hdf", "application/x-hdf") , ("hh", "text/x-c++hdr") , ("hp", "text/x-chdr") , ("hpgl", "application/vnd.hp-hpgl") , ("hs", "text/x-haskell") , ("htm", "text/html") , ("html", "text/html") , ("ica", "application/x-ica") , ("icb", "image/x-icb") , ("ico", "image/x-ico") , ("ics", "text/calendar") , ("idl", "text/x-idl") , ("ief", "image/ief") , ("iff", "image/x-iff") , ("il", "text/x-msil") , ("ilbm", "image/x-ilbm") , ("iso", "application/x-cd-image") , ("it", "audio/x-it") , ("jam", "application/x-jamin") , ("jar", "application/x-jar") , ("jar", "application/x-java-archive") , ("java", "text/x-java") , ("jng", "image/x-jng") , ("jnlp", "application/x-java-jnlp-file") , ("jp2", "image/jpeg2000") , ("jpe", "image/jpeg") , ("jpeg", "image/jpeg") , ("jpg", "image/jpeg") , ("jpr", "application/x-jbuilder-project") , ("jpx", "application/x-jbuilder-project") , ("js", "application/x-javascript") , ("js", "text/x-js") , ("k", "application/x-tex-pk") , ("k25", "image/x-dcraw") , ("karbon", "application/x-karbon") , ("kdc", "image/x-dcraw") , ("kdelnk", "application/x-desktop") , ("kfo", "application/x-kformula") , ("kil", "application/x-killustrator") , ("kino", "application/x-smil") , ("kon", "application/x-kontour") , ("kpm", "application/x-kpovmodeler") , ("kpr", "application/x-kpresenter") , ("kpt", "application/x-kpresenter") , ("kra", "application/x-krita") , ("ksp", "application/x-kspread") , ("kud", "application/x-kugar") , ("kwd", "application/x-kword") , ("kwt", "application/x-kword") , ("la", "application/x-shared-library-la") , ("lha", "application/x-lha") , ("lhs", "text/x-literate-haskell") , ("lhz", "application/x-lhz") , ("log", "text/x-log") , ("ltx", "text/x-tex") , ("lwo", "image/x-lwo") , ("lwob", "image/x-lwo") , ("lws", "image/x-lws") , ("lyx", "application/x-lyx") , ("lzh", "application/x-lha") , ("lzh", "application/x-lha") , ("lzo", "application/x-lzop") , ("m", "text/x-objcsrc") , ("m15", "audio/x-mod") , ("m3u", "audio/x-mpegurl") , ("m4a", "audio/x-m4a") , ("man", "application/x-troff-man") , ("master", "application/x-master-page") , ("md", "application/x-genesis-rom") , ("mdp", "application/x-mdp") , ("mds", "application/x-mds") , ("mdsx", "application/x-mdsx") , ("me", "text/x-troff-me") , ("mergeant", "application/x-mergeant") , ("mgp", "application/x-magicpoint") , ("mid", "audio/midi") , ("midi", "audio/midi") , ("mif", "application/x-mif") , ("mkv", "application/x-matroska") , ("mm", "text/x-troff-mm") , ("mml", "text/mathml") , ("mng", "video/x-mng") , ("moc", "text/x-moc") , ("mod", "audio/x-mod") , ("moov", "video/quicktime") , ("mos", "image/x-dcraw") , ("mov", "video/quicktime") , ("movie", "video/x-sgi-movie") , ("mp2", "video/mpeg") , ("mp3", "audio/mpeg") , ("mpe", "video/mpeg") , ("mpeg", "video/mpeg") , ("mpg", "video/mpeg") , ("mps", "application/x-mps") , ("mrproject", "application/x-planner") , ("mrw", "image/x-dcraw") , ("ms", "text/x-troff-ms") , ("msod", "image/x-msod") , ("msx", "application/x-msx-rom") , ("mtm", "audio/x-mod") , ("n", "text/x-nemerle") , ("n64", "application/x-n64-rom") , ("nb", "application/mathematica") , ("nc", "application/x-netcdf") , ("nef", "image/x-dcraw") , ("nes", "application/x-nes-rom") , ("nsv", "video/x-nsv") , ("o", "application/x-object") , ("obj", "application/x-tgif") , ("oda", "application/oda") , ("odb", "application/vnd.oasis.opendocument.database") , ("odc", "application/vnd.oasis.opendocument.chart") , ("odf", "application/vnd.oasis.opendocument.formula") , ("odg", "application/vnd.oasis.opendocument.graphics") , ("odi", "application/vnd.oasis.opendocument.image") , ("odm", "application/vnd.oasis.opendocument.text-master") , ("odp", "application/vnd.oasis.opendocument.presentation") , ("ods", "application/vnd.oasis.opendocument.spreadsheet") , ("odt", "application/vnd.oasis.opendocument.text") , ("ogg", "application/ogg") , ("old", "application/x-trash") , ("oleo", "application/x-oleo") , ("orf", "image/x-dcraw") , ("otg", "application/vnd.oasis.opendocument.graphics-template") , ("oth", "application/vnd.oasis.opendocument.text-web") , ("otp", "application/vnd.oasis.opendocument.presentation-template") , ("ots", "application/vnd.oasis.opendocument.spreadsheet-template") , ("ott", "application/vnd.oasis.opendocument.text-template") , ("p", "text/x-pascal") , ("p12", "application/x-pkcs12") , ("p7s", "application/pkcs7-signature") , ("par2", "application/x-par2") , ("pas", "text/x-pascal") , ("patch", "text/x-patch") , ("pbm", "image/x-portable-bitmap") , ("pcd", "image/x-photo-cd") , ("pcf", "application/x-font-pcf") , ("pcf.Z", "application/x-font-type1") , ("pcf.gz", "application/x-font-pcf") , ("pcl", "application/vnd.hp-pcl") , ("pdb", "application/vnd.palm") , ("pdb", "application/x-palm-database") , ("pdf", "application/pdf") , ("pef", "image/x-dcraw") , ("pem", "application/x-x509-ca-cert") , ("perl", "application/x-perl") , ("pfa", "application/x-font-type1") , ("pfb", "application/x-font-type1") , ("pfx", "application/x-pkcs12") , ("pgm", "image/x-portable-graymap") , ("pgn", "application/x-chess-pgn") , ("pgp", "application/pgp") , ("pgp", "application/pgp-encrypted") , ("php", "application/x-php") , ("php3", "application/x-php") , ("php4", "application/x-php") , ("pict", "image/x-pict") , ("pict1", "image/x-pict") , ("pict2", "image/x-pict") , ("pkr", "application/pgp-keys") , ("pl", "application/x-perl") , ("planner", "application/x-planner") , ("pln", "application/x-planperfect") , ("pls", "audio/x-scpls") , ("pls", "audio/x-scpls") , ("pm", "application/x-perl") , ("png", "image/png") , ("pnm", "image/x-portable-anymap") , ("po", "text/x-gettext-translation") , ("pot", "application/vnd.ms-powerpoint") , ("pot", "text/x-gettext-translation-template") , ("ppm", "image/x-portable-pixmap") , ("pps", "application/vnd.ms-powerpoint") , ("ppt", "application/vnd.ms-powerpoint") , ("ppz", "application/vnd.ms-powerpoint") , ("prc", "application/x-palm-database") , ("prj", "application/x-anjuta-project") , ("prjx", "application/x-prjx") , ("ps", "application/postscript") , ("ps.gz", "application/x-gzpostscript") , ("psd", "image/x-psd") , ("psf", "application/x-font-linux-psf") , ("psid", "audio/prs.sid") , ("pto", "application/x-ptoptimizer-script") , ("pw", "application/x-pw") , ("py", "text/x-python") , ("pyc", "application/x-python-bytecode") , ("pyo", "application/x-python-bytecode") , ("qif", "application/x-qw") , ("qt", "video/quicktime") , ("qtvr", "video/quicktime") , ("ra", "audio/vnd.rn-realaudio") , ("ra", "audio/x-pn-realaudio") , ("raf", "image/x-dcraw") , ("ram", "audio/x-pn-realaudio") , ("ram", "audio/x-pn-realaudio") , ("rar", "application/x-rar") , ("rar", "application/x-rar-compressed") , ("ras", "image/x-cmu-raster") , ("rdc", "image/x-dcraw") , ("rdf", "text/rdf") , ("rdp", "application/x-rdp") , ("rej", "application/x-reject") , ("rem", "application/x-remoting") , ("resources", "application/x-resources") , ("resx", "application/x-resourcesx") , ("rgb", "image/x-rgb") , ("rle", "image/rle") , ("rm", "application/vnd.rn-realmedia") , ("rm", "audio/x-pn-realaudio") , ("rmm", "audio/x-pn-realaudio") , ("rms", "application/vnd.rn-realmedia-secure") , ("rmvb", "application/vnd.rn-realmedia-vbr") , ("rng", "text/x-rng") , ("roff", "application/x-troff") , ("rpm", "application/x-rpm") , ("rss", "text/rss") , ("rt", "text/vnd.rn-realtext") , ("rtf", "application/rtf") , ("rtx", "text/richtext") , ("rv", "video/vnd.rn-realvideo") , ("s3m", "audio/x-s3m") , ("sam", "application/x-amipro") , ("sc", "application/x-sc") , ("scd", "application/x-scribus") , ("scd.gz", "application/x-scribus") , ("scm", "text/x-scheme") , ("sda", "application/vnd.stardivision.draw") , ("sdc", "application/vnd.stardivision.calc") , ("sdd", "application/vnd.stardivision.impress") , ("sdp", "application/sdp") , ("sdp", "application/vnd.stardivision.impress") , ("sds", "application/vnd.stardivision.chart") , ("sdw", "application/vnd.stardivision.writer") , ("sgi", "image/x-sgi") , ("sgl", "application/vnd.stardivision.writer") , ("sgm", "text/sgml") , ("sgml", "text/sgml") , ("sh", "application/x-shellscript") , ("shar", "application/x-shar") , ("siag", "application/x-siag") , ("sid", "audio/prs.sid") , ("sig", "application/pgp-signature") , ("sik", "application/x-trash") , ("sit", "application/stuffit") , ("sit", "application/x-stuffit") , ("skr", "application/pgp-keys") , ("sla", "application/x-scribus") , ("sla.gz", "application/x-scribus") , ("slk", "text/spreadsheet") , ("smd", "application/vnd.stardivision.mail") , ("smf", "application/vnd.stardivision.math") , ("smi", "application/smil") , ("smi", "application/x-smil") , ("smil", "application/smil") , ("smil", "application/x-smil") , ("sml", "application/smil") , ("sms", "application/x-sms-rom") , ("snd", "audio/basic") , ("so", "application/x-sharedlib") , ("soap", "application/x-soap-remoting") , ("spd", "application/x-font-speedo") , ("sql", "text/x-sql") , ("src", "application/x-wais-source") , ("srf", "image/x-dcraw") , ("ssm", "application/x-streamingmedia") , ("stc", "application/vnd.sun.xml.calc.template") , ("std", "application/vnd.sun.xml.draw.template") , ("sti", "application/vnd.sun.xml.impress.template") , ("stm", "audio/x-stm") , ("stw", "application/vnd.sun.xml.writer.template") , ("sty", "text/x-tex") , ("sun", "image/x-sun-raster") , ("sv4cpio", "application/x-sv4cpio") , ("sv4crc", "application/x-sv4crc") , ("svg", "image/svg+xml") , ("swf", "application/x-shockwave-flash") , ("sxc", "application/vnd.sun.xml.calc") , ("sxd", "application/vnd.sun.xml.draw") , ("sxg", "application/vnd.sun.xml.writer.global") , ("sxi", "application/vnd.sun.xml.impress") , ("sxm", "application/vnd.sun.xml.math") , ("sxw", "application/vnd.sun.xml.writer") , ("sylk", "text/spreadsheet") , ("t", "application/x-troff") , ("tar", "application/x-tar") , ("tar.Z", "application/x-compressed-tar") , ("tar.Z", "application/x-tarz") , ("tar.bz", "application/x-bzip-compressed-tar") , ("tar.bz", "application/x-bzip-compressed-tar") , ("tar.bz2", "application/x-bzip-compressed-tar") , ("tar.bz2", "application/x-bzip-compressed-tar") , ("tar.gz", "application/x-compressed-tar") , ("tar.gz", "application/x-compressed-tar") , ("tar.lzo", "application/x-lzop-compressed-tar") , ("tar.lzo", "application/x-tzo") , ("taz", "application/x-compressed-tar") , ("tbz", "application/x-bzip-compressed-tar") , ("tbz2", "application/x-bzip-compressed-tar") , ("tcl", "text/x-tcl") , ("tex", "text/x-tex") , ("texi", "text/x-texinfo") , ("texinfo", "text/x-texinfo") , ("tga", "image/x-tga") , ("tgz", "application/x-compressed-tar") , ("tgz", "application/x-compressed-tar") , ("theme", "application/x-theme") , ("tif", "image/tiff") , ("tiff", "image/tiff") , ("tk", "text/x-tcl") , ("tm", "text/x-texmacs") , ("toc", "application/x-toc") , ("torrent", "application/x-bittorrent") , ("tr", "application/x-troff") , ("ts", "application/x-linguist") , ("ts", "text/x-texmacs") , ("tsv", "text/tab-separated-values") , ("ttc", "application/x-font-ttf") , ("ttf", "application/x-font-ttf") , ("txt", "text/plain") , ("tzo", "application/x-lzop-compressed-tar") , ("tzo", "application/x-tzo") , ("ui", "application/x-designer") , ("uil", "text/x-uil") , ("ult", "audio/x-mod") , ("uni", "audio/x-mod") , ("uri", "text/x-uri") , ("url", "text/x-uri") , ("ustar", "application/x-ustar") , ("vb", "text/x-vb") , ("vcf", "text/directory") , ("vcs", "text/calendar") , ("vct", "text/directory") , ("vob", "video/mpeg") , ("voc", "audio/x-voc") , ("vor", "application/vnd.stardivision.writer") , ("war", "application/x-java-archive") , ("wav", "audio/x-wav") , ("wb1", "application/x-quattro-pro") , ("wb1", "application/x-quattropro") , ("wb2", "application/x-quattro-pro") , ("wb2", "application/x-quattropro") , ("wb3", "application/x-quattro-pro") , ("wb3", "application/x-quattropro") , ("wk1", "application/vnd.lotus-1-2-3") , ("wk3", "application/vnd.lotus-1-2-3") , ("wk4", "application/vnd.lotus-1-2-3") , ("wks", "application/vnd.lotus-1-2-3") , ("wmf", "image/x-wmf") , ("wml", "text/vnd.wap.wml") , ("wmv", "video/x-ms-wmv") , ("wpd", "application/vnd.wordperfect") , ("wpg", "application/x-wpg") , ("wri", "application/x-mswrite") , ("wrl", "model/vrml") , ("wsdl", "application/x-wsdl") , ("x3f", "image/x-dcraw") , ("xac", "application/x-gnucash") , ("xbel", "application/x-xbel") , ("xbm", "image/x-xbitmap") , ("xcf", "image/x-xcf") , ("xcf.bz2", "image/x-compressed-xcf") , ("xcf.gz", "image/x-compressed-xcf") , ("xds", "text/x-xds") , ("xhtml", "application/xhtml+xml") , ("xi", "audio/x-xi") , ("xla", "application/vnd.ms-excel") , ("xlc", "application/vnd.ms-excel") , ("xld", "application/vnd.ms-excel") , ("xll", "application/vnd.ms-excel") , ("xlm", "application/vnd.ms-excel") , ("xls", "application/vnd.ms-excel") , ("xlt", "application/vnd.ms-excel") , ("xlw", "application/vnd.ms-excel") , ("xm", "audio/x-xm") , ("xmi", "text/x-xmi") , ("xml", "text/xml") , ("xpl", "audio/x-scpls") , ("xpm", "image/x-xpixmap") , ("xsl", "text/x-xsl") , ("xsl", "text/x-xslt") , ("xslfo", "text/x-xslfo") , ("xslt", "text/x-xslt") , ("xul", "application/vnd.mozilla.xul+xml") , ("xwd", "image/x-xwindowdump") , ("zabw", "application/x-abiword") , ("zip", "application/zip") , ("zoo", "application/x-zoo") ] hxt-9.3.1.15/src/Text/XML/HXT/DOM/QualifiedName.hs0000644000000000000000000005451112465166667017267 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.DOM.QualifiedName Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable The types and functions for qualified names -} -- ------------------------------------------------------------ module Text.XML.HXT.DOM.QualifiedName ( QName , XName(unXN) , NsEnv , mkQName , mkName , mkNsName , mkSNsName , mkPrefixLocalPart , equivQName , equivUri , equalQNameBy , namePrefix , localPart , namespaceUri , newXName , nullXName , isNullXName , newQName , mkQName' , namePrefix' , localPart' , namespaceUri' , setNamePrefix' , setLocalPart' , setNamespaceUri' , qualifiedName , qualifiedName' , universalName , universalUri , buildUniversalName , normalizeNsUri , setNamespace -- namespace related functions , isNCName , isWellformedQualifiedName , isWellformedQName , isWellformedNSDecl , isWellformedNameSpaceName , isNameSpaceName , isDeclaredNamespace , xmlNamespaceXName , xmlXName , xmlnsNamespaceXName , xmlnsXName , xmlnsQN , toNsEnv ) where {- import Debug.Trace -} import Control.Arrow ((***)) import Control.DeepSeq import Control.FlatSeq import Data.AssocList import Data.Binary import Data.Char (toLower) import Data.IORef import Data.List (isPrefixOf) import qualified Data.Map as M import Data.Typeable import System.IO.Unsafe (unsafePerformIO) import Text.XML.HXT.DOM.XmlKeywords (a_xml, a_xmlns, xmlNamespace, xmlnsNamespace) import Data.Char.Properties.XMLCharProps (isXmlNCNameChar, isXmlNCNameStartChar) -- ----------------------------------------------------------------------------- -- | XML names are represented by Strings, but these strings do not mix up with normal strings. -- Names are always reduced to normal form, and they are stored internally in a name cache -- for sharing equal names by the same data structure data XName = XN { _idXN :: ! Int -- for optimization of equality test, see Eq instance , unXN :: String } deriving (Typeable) instance Eq XName where (XN id1 _) == (XN id2 _) = id1 == id2 instance Ord XName where compare (XN _ n1) (XN _ n2) = compare n1 n2 {- instance Read XName where readsPrec p str = [ (newXName x, y) | (x, y) <- readsPrec p str ] instance Show XName where show (XN _ s) = show s -} instance NFData XName where rnf (XN _ s) = rnf s instance WNFData XName where rwnf (XN _ s) = rnf s instance Binary XName where put (XN _ s) = put s get = do s <- get return $! newXName s ----------------------------------------------------------------------------- -- | -- Type for the namespace association list, used when propagating namespaces by -- modifying the 'QName' values in a tree type NsEnv = AssocList XName XName ----------------------------------------------------------------------------- -- | -- Namespace support for element and attribute names. -- -- A qualified name consists of a name prefix, a local name -- and a namespace uri. -- All modules, which are not namespace aware, use only the 'localPart' component. -- When dealing with namespaces, the document tree must be processed by 'Text.XML.HXT.Arrow.Namespace.propagateNamespaces' -- to split names of structure \"prefix:localPart\" and label the name with the apropriate namespace uri data QName = QN { localPart' :: ! XName , namePrefix' :: ! XName , namespaceUri' :: ! XName } deriving (Typeable) -- ----------------------------------------------------------------------------- -- | Two QNames are equal if (1. case) namespaces are both empty and the qualified names -- (prefix:localpart) are the same or (2. case) namespaces are set and namespaces and -- local parts are equal instance Eq QName where (QN lp1 px1 ns1) == (QN lp2 px2 ns2) | ns1 /= ns2 = False -- namespaces are set and differ | not (isNullXName ns1) = lp1 == lp2 -- namespaces are set and are equal: local parts must be equal | otherwise = lp1 == lp2 -- no namespaces are set: local parts must be equal && -- and prefixes are not set or they are equal px1 == px2 instance Ord QName where compare (QN lp1 px1 ns1) (QN lp2 px2 ns2) | isNullXName ns1 && isNullXName ns2 -- no namespaces set: px is significant = compare (px1, lp1) (px2, lp2) | otherwise -- namespace aware cmp: ns is significant, px is irrelevant = compare (lp1, ns1) (lp2, ns2) instance NFData QName where rnf x = seq x () instance WNFData QName instance Show QName where show = showQN -- ----------------------------------------------------------------------------- instance Binary QName where put (QN lp px ns) = put (unXN px) >> put (unXN lp) >> put (unXN ns) get = do px <- get lp <- get ns <- get return $! newNsName lp px ns -- ^^ -- strict apply !!! -- build the QNames strict, else the name sharing optimization will not be in effect -- ----------------------------------------------------------------------------- isNullXName :: XName -> Bool isNullXName = (== nullXName) {-# INLINE isNullXName #-} namePrefix :: QName -> String namePrefix = unXN . namePrefix' {-# INLINE namePrefix #-} localPart :: QName -> String localPart = unXN . localPart' {-# INLINE localPart #-} namespaceUri :: QName -> String namespaceUri = unXN . namespaceUri' {-# INLINE namespaceUri #-} -- ------------------------------------------------------------ -- | set name prefix setNamespaceUri' :: XName -> QName -> QName setNamespaceUri' ns (QN lp px _ns) = newQName lp px ns -- | set local part setLocalPart' :: XName -> QName -> QName setLocalPart' lp (QN _lp px ns) = newQName lp px ns -- | set name prefix setNamePrefix' :: XName -> QName -> QName setNamePrefix' px (QN lp _px ns) = newQName lp px ns -- ------------------------------------------------------------ -- | -- builds the full name \"prefix:localPart\", if prefix is not null, else the local part is the result qualifiedName :: QName -> String qualifiedName (QN lp px _ns) | isNullXName px = unXN lp | otherwise = unXN px ++ (':' : unXN lp) -- | functional list version of qualifiedName used in xshow qualifiedName' :: QName -> String -> String qualifiedName' (QN lp px _ns) | isNullXName px = (unXN lp ++) | otherwise = (unXN px ++) . (':' :) . (unXN lp ++) -- | -- builds the \"universal\" name, that is the namespace uri surrounded with \"{\" and \"}\" followed by the local part -- (specialisation of 'buildUniversalName') universalName :: QName -> String universalName = buildUniversalName (\ ns lp -> '{' : ns ++ '}' : lp) -- | -- builds an \"universal\" uri, that is the namespace uri followed by the local part. This is usefull for RDF applications, -- where the subject, predicate and object often are concatenated from namespace uri and local part -- (specialisation of 'buildUniversalName') universalUri :: QName -> String universalUri = buildUniversalName (++) -- | -- builds a string from the namespace uri and the local part. If the namespace uri is empty, the local part is returned, else -- namespace uri and local part are combined with the combining function given by the first parameter buildUniversalName :: (String -> String -> String) -> QName -> String buildUniversalName bf n@(QN _lp _px ns) | isNullXName ns = localPart n | otherwise = unXN ns `bf` localPart n showQN :: QName -> String showQN n | null ns = show $ qualifiedName n | otherwise = show $ "{" ++ ns ++ "}" ++ qualifiedName n where ns = namespaceUri n -- ------------------------------------------------------------ -- -- internal XName functions mkQName' :: XName -> XName -> XName -> QName mkQName' px lp ns = newQName lp px ns {-# DEPRECATED mkQName' "use newQName instead with lp px ns param seq " #-} -- ------------------------------------------------------------ -- | -- constructs a simple name, with prefix and localPart but without a namespace uri. -- -- see also 'mkQName', 'mkName' mkPrefixLocalPart :: String -> String -> QName mkPrefixLocalPart px lp | null px = newLpName lp | otherwise = newPxName lp px -- | -- constructs a simple, namespace unaware name. -- If the name is in @prefix:localpart@ form and the prefix is not empty -- the name is split internally into -- a prefix and a local part. mkName :: String -> QName mkName n | (':' `elem` n) && not (null px) -- more restrictive: isWellformedQualifiedName n = newPxName lp px | otherwise = newLpName n where (px, (_ : lp)) = span (/= ':') n -- | -- constructs a complete qualified name with 'namePrefix', 'localPart' and 'namespaceUri'. -- This function can be used to build not wellformed prefix:localpart names. -- The XPath module uses wildcard names like @xxx:*@. These must be build with 'mkQName' -- and not with mkName. mkQName :: String -> String -> String -> QName mkQName px lp ns | null ns = mkPrefixLocalPart px lp | otherwise = newNsName lp px ns -- ------------------------------------------------------------ -- | -- old name for 'mkName' mkSNsName :: String -> QName mkSNsName = mkName {-# DEPRECATED mkSNsName "use mkName instead" #-} -- | -- constructs a simple, namespace aware name, with prefix:localPart as first parameter, -- namspace uri as second. -- -- see also 'mkName', 'mkPrefixLocalPart' {- mkNsName :: String -> String -> QName mkNsName n ns = trace ("mkNsName: " ++ show n ++ " " ++ show ns) (mkNsName' n ns) -} mkNsName :: String -> String -> QName mkNsName n ns | null ns = qn | otherwise = setNamespaceUri' ns' qn where qn = mkName n ns' = newXName ns -- ------------------------------------------------------------ -- | Equivalent QNames are defined as follows: The URIs are normalized before comparison. -- Comparison is done with 'equalQNameBy' and 'equivUri' equivQName :: QName -> QName -> Bool equivQName = equalQNameBy equivUri -- | Comparison of normalized namespace URIs using 'normalizeNsUri' equivUri :: String -> String -> Bool equivUri x y = normalizeNsUri x == normalizeNsUri y -- | Sometimes a weaker equality relation than 'equalQName' is appropriate, e.g no case significance in names, ... -- a name normalization function can be applied to the strings before comparing. Called by 'equalQName' and -- 'equivQName' equalQNameBy :: (String -> String -> Bool) -> QName -> QName -> Bool equalQNameBy equiv q1 q2 = localPart q1 == localPart q2 && (namespaceUri q1 `equiv` namespaceUri q2) -- | Normalization of URIs: Normalization is done by conversion into lowercase letters. A trailing \"\/\" is ignored normalizeNsUri :: String -> String normalizeNsUri = map toLower . stripSlash where stripSlash "" = "" stripSlash s | last s == '/' = init s | otherwise = s -- ----------------------------------------------------------------------------- -- Namespace predicates -- | -- Compute the name prefix and the namespace uri for a qualified name. -- -- This function does not test whether the name is a wellformed qualified name. -- see Namespaces in XML Rule [6] to [8]. Error checking is done with separate functions, -- see 'isWellformedQName' and 'isWellformedQualifiedName' for error checking. setNamespace :: NsEnv -> QName -> QName setNamespace env n@(QN lp px _ns) = maybe n (\ ns -> newQName lp px ns) . lookup px $ env -- ----------------------------------------------------------------------------- -- -- | -- test for wellformed NCName, rule [4] XML Namespaces isNCName :: String -> Bool isNCName [] = False isNCName n = and ( zipWith ($) (isXmlNCNameStartChar : repeat isXmlNCNameChar) n ) -- | -- test for wellformed QName, rule [6] XML Namespaces -- predicate is used in filter 'valdateNamespaces'. isWellformedQualifiedName :: String -> Bool isWellformedQualifiedName s | null lp = isNCName px | otherwise = isNCName px && isNCName (tail lp) where (px, lp) = span (/= ':') s -- | -- test for wellformed QName values. -- A QName is wellformed, if the local part is a NCName, the namePrefix, if not empty, is also a NCName. -- predicate is used in filter 'valdateNamespaces'. isWellformedQName :: QName -> Bool isWellformedQName (QN lp px _ns) = (isNCName . unXN) lp -- rule [8] XML Namespaces && ( isNullXName px || (isNCName . unXN) px -- rule [7] XML Namespaces ) -- | -- test whether an attribute name is a namesapce declaration name. -- If this is not the case True is the result, else -- the name must be a well formed namespace name: -- All namespace prefixes starting with \"xml\" are reserved for XML related definitions. -- predicate is used in filter 'valdateNamespaces'. isWellformedNSDecl :: QName -> Bool isWellformedNSDecl n = not (isNameSpaceName n) || isWellformedNameSpaceName n -- | -- test for a namespace name to be well formed isWellformedNameSpaceName :: QName -> Bool isWellformedNameSpaceName n@(QN lp px _ns) | isNullXName px = lp == xmlnsXName | otherwise = px == xmlnsXName && not (null lp') && not (a_xml `isPrefixOf` lp') where lp' = localPart n -- | -- test whether a name is a namespace declaration attribute name isNameSpaceName :: QName -> Bool isNameSpaceName (QN lp px _ns) | isNullXName px = lp == xmlnsXName | otherwise = px == xmlnsXName -- | -- -- predicate is used in filter 'valdateNamespaces'. isDeclaredNamespace :: QName -> Bool isDeclaredNamespace (QN _lp px ns) | isNullXName px = True -- no namespace used | px == xmlnsXName = ns == xmlnsNamespaceXName -- "xmlns" has a predefined namespace uri | px == xmlXName = ns == xmlNamespaceXName -- "xml" has a predefiend namespace" | otherwise = not (isNullXName ns) -- namespace values are not empty -- ----------------------------------------------------------------------------- toNsEnv :: AssocList String String -> NsEnv toNsEnv = map (newXName *** newXName) -- ----------------------------------------------------------------------------- -- the name and string cache data NameCache = NC { _newXN :: ! Int -- next free name id , _xnCache :: ! (M.Map String XName) , _qnCache :: ! (M.Map (XName, XName, XName) QName) -- we need another type than QName } -- for the key because of the unusable -- Eq instance of QName type ChangeNameCache r = NameCache -> (NameCache, r) -- ------------------------------------------------------------ -- | the internal cache for QNames (and name strings) theNameCache :: IORef NameCache theNameCache = unsafePerformIO (newIORef $ initialCache) {-# NOINLINE theNameCache #-} initialXNames :: [XName] nullXName , xmlnsNamespaceXName , xmlnsXName , xmlNamespaceXName , xmlXName :: XName initialXNames@ [ nullXName , xmlnsNamespaceXName , xmlnsXName , xmlNamespaceXName , xmlXName ] = zipWith XN [0..] $ [ "" , xmlnsNamespace , a_xmlns , xmlNamespace , a_xml ] initialQNames :: [QName] xmlnsQN :: QName initialQNames@ [xmlnsQN] = [QN xmlnsXName nullXName xmlnsNamespaceXName] initialCache :: NameCache initialCache = NC (length initialXNames) (M.fromList $ map (\ xn -> (unXN xn, xn)) initialXNames) (M.fromList $ map (\ qn@(QN lp px ns) -> ((lp, px, ns), qn)) initialQNames) -- ------------------------------------------------------------ changeNameCache :: NFData r => ChangeNameCache r -> r changeNameCache action = unsafePerformIO changeNameCache' where action' c = let r = action c in fst r `seq` r -- eval name cache to whnf changeNameCache' = do -- putStrLn "modify cache" res <- atomicModifyIORef theNameCache action' -- putStrLn "cache modified" return res {-# NOINLINE changeNameCache #-} newXName' :: String -> ChangeNameCache XName newXName' n c@(NC nxn xm qm) = case M.lookup n xm of Just xn -> (c, xn) Nothing -> let nxn' = nxn + 1 in let xn = (XN nxn n) in let xm' = M.insert n xn xm in -- trace ("newXName: XN " ++ show nxn ++ " " ++ show n) $ rnf xn `seq` (NC nxn' xm' qm, xn) newQName' :: XName -> XName -> XName -> ChangeNameCache QName newQName' lp px ns c@(NC nxn xm qm) = case M.lookup q' qm of Just qn -> -- trace ("oldQName: " ++ show qn) $ -- log evaluation sequence (c, qn) Nothing -> let qm' = M.insert q' q qm in -- trace ("newQName: " ++ show q) $ -- log insertion of a new QName q `seq` (NC nxn xm qm', q) where q' = (lp, px, ns) q = QN lp px ns andThen :: ChangeNameCache r1 -> (r1 -> ChangeNameCache r2) -> ChangeNameCache r2 andThen a1 a2 c0 = let (c1, r1) = a1 c0 in (a2 r1) c1 newXName :: String -> XName newXName n = changeNameCache $ newXName' n newQName :: XName -> XName -> XName -> QName newQName lp px ns = lp `seq` px `seq` ns `seq` -- XNames must be evaluated, else MVar blocks ( changeNameCache $ newQName' lp px ns ) newLpName :: String -> QName newLpName lp = changeNameCache $ newXName' lp `andThen` \ lp' -> newQName' lp' nullXName nullXName newPxName :: String -> String -> QName newPxName lp px = changeNameCache $ newXName' lp `andThen` \ lp' -> newXName' px `andThen` \ px' -> newQName' lp' px' nullXName newNsName :: String -> String -> String -> QName newNsName lp px ns = changeNameCache $ newXName' lp `andThen` \ lp' -> newXName' px `andThen` \ px' -> newXName' ns `andThen` \ ns' -> newQName' lp' px' ns' ----------------------------------------------------------------------------- hxt-9.3.1.15/src/Text/XML/HXT/IO/0000755000000000000000000000000012465166667014111 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/IO/GetFILE.hs0000644000000000000000000000736112465166667015633 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.IO.GetFILE Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable The GET method for file protocol -} -- ------------------------------------------------------------ module Text.XML.HXT.IO.GetFILE ( getStdinCont , getCont ) where import Control.Exception ( try ) import qualified Data.ByteString.Lazy as B import Network.URI ( unEscapeString ) import System.IO.Error ( ioeGetErrorString ) import System.Directory ( doesFileExist -- , getPermissions -- , readable ) import Text.XML.HXT.DOM.XmlKeywords -- ------------------------------------------------------------ getStdinCont :: Bool -> IO (Either ([(String, String)], String) B.ByteString) getStdinCont strictInput = do c <- try ( do cb <- B.getContents if strictInput then B.length cb `seq` return cb else return cb ) return (either readErr Right c) where readErr e = Left ( [ (transferStatus, "999") , (transferMessage, msg) ] , msg ) where msg = "stdin read error: " ++ es es = ioeGetErrorString e getCont :: Bool -> String -> IO (Either ([(String, String)], String) B.ByteString) getCont strictInput source = do -- preliminary source'' <- checkFile source' case source'' of Nothing -> return $ fileErr "file not found" Just fn -> do -- perm <- getPermissions fn -- getPermission may fail -- if not (readable perm) if False then return $ fileErr "file not readable" else do c <- try $ do cb <- B.readFile fn if strictInput then B.length `seq` return cb else return cb return (either readErr Right c) where source' = drivePath $ source readErr e = fileErr (ioeGetErrorString e) fileErr msg0 = Left ( [ (transferStatus, "999") , (transferMessage, msg) ] , msg ) where msg = "file read error: " ++ show msg0 ++ " when accessing " ++ show source' -- remove leading / if file starts with windows drive letter, e.g. /c:/windows -> c:/windows drivePath ('/' : file@(d : ':' : _more)) | d `elem` ['A'..'Z'] || d `elem` ['a'..'z'] = file drivePath file = file -- | check whether file exists, if not -- try to unescape filename and check again -- return the existing filename checkFile :: String -> IO (Maybe String) checkFile fn = do exists <- doesFileExist fn if exists then return (Just fn) else do exists' <- doesFileExist fn' return ( if exists' then Just fn' else Nothing ) where fn' = unEscapeString fn -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/0000755000000000000000000000000012465166667014674 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/Arrow/Edit.hs0000644000000000000000000006120412465166667016120 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Edit Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable common edit arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes , canonicalizeForXPath , canonicalizeContents , collapseAllXText , collapseXText , xshowEscapeXml , escapeXmlRefs , escapeHtmlRefs , haskellRepOfXmlDoc , treeRepOfXmlDoc , addHeadlineToXmlDoc , indentDoc , numberLinesInXmlDoc , preventEmptyElements , removeComment , removeAllComment , removeWhiteSpace , removeAllWhiteSpace , removeDocWhiteSpace , transfCdata , transfAllCdata , transfCharRef , transfAllCharRef , substAllXHTMLEntityRefs , substXHTMLEntityRef , rememberDTDAttrl , addDefaultDTDecl , hasXmlPi , addXmlPi , addXmlPiEncoding , addDoctypeDecl , addXHtmlDoctypeStrict , addXHtmlDoctypeTransitional , addXHtmlDoctypeFrameset ) where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Control.Arrow.NTreeEdit import Data.Char.Properties.XMLCharProps (isXmlSpaceChar) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.DOM.FormatXmlTree (formatXmlTree) import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XS import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.Parser.HtmlParsec (emptyHtmlTags) import Text.XML.HXT.Parser.XhtmlEntities (xhtmlEntities) import Text.XML.HXT.Parser.XmlEntities (xmlEntities) import Data.List (isPrefixOf) import qualified Data.Map as M import Data.Maybe -- ------------------------------------------------------------ -- | -- Applies some "Canonical XML" rules to a document tree. -- -- The rules differ slightly for canonical XML and XPath in handling of comments -- -- Note: This is not the whole canonicalization as it is specified by the W3C -- Recommendation. Adding attribute defaults or sorting attributes in lexicographic -- order is done by the @transform@ function of module @Text.XML.HXT.Validator.Validation@. -- Replacing entities or line feed normalization is done by the parser. -- -- -- Not implemented yet: -- -- - Whitespace within start and end tags is normalized -- -- - Special characters in attribute values and character content are replaced by character references -- -- see 'canonicalizeAllNodes' and 'canonicalizeForXPath' canonicalizeTree' :: LA XmlTree XmlTree -> LA XmlTree XmlTree canonicalizeTree' toBeRemoved = ( processChildren ( (none `when` (isText <+> isXmlPi)) -- remove XML PI and all text around XML root element >>> (deep isPi `when` isDTD) -- remove DTD parts, except PIs whithin DTD ) `when` isRoot ) >>> canonicalizeNodes toBeRemoved canonicalizeNodes :: LA XmlTree XmlTree -> LA XmlTree XmlTree canonicalizeNodes toBeRemoved = editNTreeA $ [ toBeRemoved :-> none , ( isElem >>> getAttrl >>> getChildren >>> isCharRef ) -- canonicalize attribute list :-> ( processAttrl ( processChildren transfCharRef >>> collapseXText' -- combine text in attribute values ) >>> ( collapseXText' -- and combine text in content `when` (getChildren >>. has2XText) ) ) , ( isElem >>> (getChildren >>. has2XText) ) :-> collapseXText' -- combine text in content , isCharRef :-> ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText ) , isCdata :-> ( getCdata >>> mkText ) ] -- | -- Applies some "Canonical XML" rules to a document tree. -- -- The rule differ slightly for canonical XML and XPath in handling of comments -- -- Note: This is not the whole canonicalization as it is specified by the W3C -- Recommendation. Adding attribute defaults or sorting attributes in lexicographic -- order is done by the @transform@ function of module @Text.XML.HXT.Validator.Validation@. -- Replacing entities or line feed normalization is done by the parser. -- -- Rules: remove DTD parts, processing instructions, comments and substitute char refs in attribute -- values and text -- -- Not implemented yet: -- -- - Whitespace within start and end tags is normalized -- -- - Special characters in attribute values and character content are replaced by character references canonicalizeAllNodes :: ArrowList a => a XmlTree XmlTree canonicalizeAllNodes = fromLA $ canonicalizeTree' isCmt -- remove comment {-# INLINE canonicalizeAllNodes #-} -- | -- Canonicalize a tree for XPath -- Like 'canonicalizeAllNodes' but comment nodes are not removed -- -- see 'canonicalizeAllNodes' canonicalizeForXPath :: ArrowList a => a XmlTree XmlTree canonicalizeForXPath = fromLA $ canonicalizeTree' none -- comment remains there {-# INLINE canonicalizeForXPath #-} -- | -- Canonicalize the contents of a document -- -- substitutes all char refs in text and attribute values, -- removes CDATA section and combines all sequences of resulting text -- nodes into a single text node -- -- see 'canonicalizeAllNodes' canonicalizeContents :: ArrowList a => a XmlTree XmlTree canonicalizeContents = fromLA $ canonicalizeNodes none {-# INLINE canonicalizeContents #-} -- ------------------------------------------------------------ has2XText :: XmlTrees -> XmlTrees has2XText ts0@(t1 : ts1@(t2 : ts2)) | XN.isText t1 = if XN.isText t2 then ts0 else has2XText ts2 | otherwise = has2XText ts1 has2XText _ = [] collapseXText' :: LA XmlTree XmlTree collapseXText' = replaceChildren ( listA getChildren >>> arrL (foldr mergeText' []) ) where mergeText' :: XmlTree -> XmlTrees -> XmlTrees mergeText' t1 (t2 : ts2) | XN.isText t1 && XN.isText t2 = let s1 = fromJust . XN.getText $ t1 s2 = fromJust . XN.getText $ t2 t = XN.mkText (s1 ++ s2) in t : ts2 mergeText' t1 ts = t1 : ts -- | -- Collects sequences of text nodes in the list of children of a node into one single text node. -- This is useful, e.g. after char and entity reference substitution collapseXText :: ArrowList a => a XmlTree XmlTree collapseXText = fromLA collapseXText' -- | -- Applies collapseXText recursively. -- -- -- see also : 'collapseXText' collapseAllXText :: ArrowList a => a XmlTree XmlTree collapseAllXText = fromLA $ processBottomUp collapseXText' -- ------------------------------------------------------------ -- | apply an arrow to the input and convert the resulting XML trees into an XML escaped string -- -- This is a save variant for converting a tree into an XML string representation -- that is parsable with 'Text.XML.HXT.Arrow.ReadDocument'. -- It is implemented with 'Text.XML.HXT.Arrow.XmlArrow.xshow', -- but xshow does no XML escaping. The XML escaping is done with -- 'Text.XML.HXT.Arrow.Edit.escapeXmlDoc' before xshow is applied. -- -- So the following law holds -- -- > xshowEscapeXml f >>> xread == f xshowEscapeXml :: ArrowXml a => a n XmlTree -> a n String xshowEscapeXml f = f >. (uncurry XS.xshow'' escapeXmlRefs) -- ------------------------------------------------------------ -- | -- escape XmlText, -- transform all special XML chars into char- or entity- refs type EntityRefTable = M.Map Int String xmlEntityRefTable , xhtmlEntityRefTable :: EntityRefTable xmlEntityRefTable = buildEntityRefTable $ xmlEntities xhtmlEntityRefTable = buildEntityRefTable $ xhtmlEntities buildEntityRefTable :: [(String, Int)] -> EntityRefTable buildEntityRefTable = M.fromList . map (\ (x,y) -> (y,x) ) type EntitySubstTable = M.Map String String xhtmlEntitySubstTable :: EntitySubstTable xhtmlEntitySubstTable = M.fromList . map (second $ (:[]) . toEnum) $ xhtmlEntities -- ------------------------------------------------------------ substXHTMLEntityRef :: LA XmlTree XmlTree substXHTMLEntityRef = ( getEntityRef >>> arrL subst >>> mkText ) `orElse` this where subst name = maybe [] (:[]) $ M.lookup name xhtmlEntitySubstTable substAllXHTMLEntityRefs :: ArrowXml a => a XmlTree XmlTree substAllXHTMLEntityRefs = fromLA $ processBottomUp substXHTMLEntityRef -- ------------------------------------------------------------ escapeXmlRefs :: (Char -> String -> String, Char -> String -> String) escapeXmlRefs = (cquote, aquote) where cquote c | c `elem` "<&" = ('&' :) . ((lookupRef c xmlEntityRefTable) ++) . (';' :) | otherwise = (c :) aquote c | c `elem` "<>\"\'&\n\r\t" = ('&' :) . ((lookupRef c xmlEntityRefTable) ++) . (';' :) | otherwise = (c :) escapeHtmlRefs :: (Char -> String -> String, Char -> String -> String) escapeHtmlRefs = (cquote, aquote) where cquote c | isHtmlTextEsc c = ('&' :) . ((lookupRef c xhtmlEntityRefTable) ++) . (';' :) | otherwise = (c :) aquote c | isHtmlAttrEsc c = ('&' :) . ((lookupRef c xhtmlEntityRefTable) ++) . (';' :) | otherwise = (c :) isHtmlTextEsc c = c >= toEnum(128) || ( c `elem` "<&" ) isHtmlAttrEsc c = c >= toEnum(128) || ( c `elem` "<>\"\'&\n\r\t" ) lookupRef :: Char -> EntityRefTable -> String lookupRef c = fromMaybe ('#' : show (fromEnum c)) . M.lookup (fromEnum c) {-# INLINE lookupRef #-} -- ------------------------------------------------------------ preventEmptyElements :: ArrowList a => [String] -> Bool -> a XmlTree XmlTree preventEmptyElements ns isHtml = fromLA $ editNTreeA [ ( isElem >>> isNoneEmpty >>> neg getChildren ) :-> replaceChildren (txt "") ] where isNoneEmpty | not (null ns) = hasNameWith (localPart >>> (`elem` ns)) | isHtml = hasNameWith (localPart >>> (`notElem` emptyHtmlTags)) | otherwise = this -- ------------------------------------------------------------ -- | -- convert a document into a Haskell representation (with show). -- -- Useful for debugging and trace output. -- see also : 'treeRepOfXmlDoc', 'numberLinesInXmlDoc' haskellRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree haskellRepOfXmlDoc = fromLA $ root [getAttrl] [show ^>> mkText] -- | -- convert a document into a text and add line numbers to the text representation. -- -- Result is a root node with a single text node as child. -- Useful for debugging and trace output. -- see also : 'haskellRepOfXmlDoc', 'treeRepOfXmlDoc' numberLinesInXmlDoc :: ArrowList a => a XmlTree XmlTree numberLinesInXmlDoc = fromLA $ processChildren (changeText numberLines) where numberLines :: String -> String numberLines str = concat $ zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str) where lineNr :: Int -> String lineNr n = (reverse (take 6 (reverse (show n) ++ replicate 6 ' '))) ++ " " -- | -- convert a document into a text representation in tree form. -- -- Useful for debugging and trace output. -- see also : 'haskellRepOfXmlDoc', 'numberLinesInXmlDoc' treeRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree treeRepOfXmlDoc = fromLA $ root [getAttrl] [formatXmlTree ^>> mkText] addHeadlineToXmlDoc :: ArrowXml a => a XmlTree XmlTree addHeadlineToXmlDoc = fromLA $ ( addTitle $< (getAttrValue a_source >>^ formatTitle) ) where addTitle str = replaceChildren ( txt str <+> getChildren <+> txt "\n" ) formatTitle str = "\n" ++ headline ++ "\n" ++ underline ++ "\n\n" where headline = "content of: " ++ str underline = map (const '=') headline -- ------------------------------------------------------------ -- | -- remove a Comment node removeComment :: ArrowXml a => a XmlTree XmlTree removeComment = none `when` isCmt -- | -- remove all comments in a tree recursively removeAllComment :: ArrowXml a => a XmlTree XmlTree removeAllComment = fromLA $ editNTreeA [isCmt :-> none] -- ------------------------------------------------------------ -- | -- simple filter for removing whitespace. -- -- no check on sigificant whitespace, e.g. in HTML \-elements, is done. -- -- -- see also : 'removeAllWhiteSpace', 'removeDocWhiteSpace' removeWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeWhiteSpace = fromLA $ none `when` isWhiteSpace -- | -- simple recursive filter for removing all whitespace. -- -- removes all text nodes in a tree that consist only of whitespace. -- -- -- see also : 'removeWhiteSpace', 'removeDocWhiteSpace' removeAllWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeAllWhiteSpace = fromLA $ editNTreeA [isWhiteSpace :-> none] -- fromLA $ processBottomUp removeWhiteSpace' -- less efficient -- ------------------------------------------------------------ -- | -- filter for removing all not significant whitespace. -- -- the tree traversed for removing whitespace between elements, -- that was inserted for indentation and readability. -- whitespace is only removed at places, where it's not significat -- preserving whitespace may be controlled in a document tree -- by a tag attribute @xml:space@ -- -- allowed values for this attribute are @default | preserve@ -- -- input is root node of the document to be cleaned up, -- output the semantically equivalent simplified tree -- -- -- see also : 'indentDoc', 'removeAllWhiteSpace' removeDocWhiteSpace :: ArrowXml a => a XmlTree XmlTree removeDocWhiteSpace = fromLA $ removeRootWhiteSpace removeRootWhiteSpace :: LA XmlTree XmlTree removeRootWhiteSpace = processChildren processRootElement `when` isRoot where processRootElement :: LA XmlTree XmlTree processRootElement = removeWhiteSpace >>> processChild where processChild = choiceA [ isDTD :-> removeAllWhiteSpace -- whitespace in DTD is redundant , this :-> replaceChildren ( getChildren >>. indentTrees insertNothing False 1 ) ] -- ------------------------------------------------------------ -- | -- filter for indenting a document tree for pretty printing. -- -- the tree is traversed for inserting whitespace for tag indentation. -- -- whitespace is only inserted or changed at places, where it isn't significant, -- is's not inserted between tags and text containing non whitespace chars. -- -- whitespace is only inserted or changed at places, where it's not significant. -- preserving whitespace may be controlled in a document tree -- by a tag attribute @xml:space@ -- -- allowed values for this attribute are @default | preserve@. -- -- input is a complete document tree or a document fragment -- result is the semantically equivalent formatted tree. -- -- -- see also : 'removeDocWhiteSpace' indentDoc :: ArrowXml a => a XmlTree XmlTree indentDoc = fromLA $ ( ( isRoot `guards` indentRoot ) `orElse` (root [] [this] >>> indentRoot >>> getChildren) ) -- ------------------------------------------------------------ indentRoot :: LA XmlTree XmlTree indentRoot = processChildren indentRootChildren where indentRootChildren = removeText >>> indentChild >>> insertNL where removeText = none `when` isText insertNL = this <+> txt "\n" indentChild = ( replaceChildren ( getChildren >>. indentTrees (insertIndentation 2) False 1 ) `whenNot` isDTD ) -- ------------------------------------------------------------ -- -- copied from EditFilter and rewritten for arrows -- to remove dependency to the filter module indentTrees :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees indentTrees _ _ _ [] = [] indentTrees indentFilter preserveSpace level ts = runLAs lsf ls ++ indentRest rs where runLAs f l = runLA (constL l >>> f) undefined (ls, rs) = break XN.isElem ts isSignificant :: Bool isSignificant = preserveSpace || (not . null . runLAs isSignificantPart) ls isSignificantPart :: LA XmlTree XmlTree isSignificantPart = catA [ isText `guards` neg isWhiteSpace , isCdata , isCharRef , isEntityRef ] lsf :: LA XmlTree XmlTree lsf | isSignificant = this | otherwise = (none `when` isWhiteSpace) >>> (indentFilter level <+> this) indentRest :: XmlTrees -> XmlTrees indentRest [] | isSignificant = [] | otherwise = runLA (indentFilter (level - 1)) undefined indentRest (t':ts') = runLA ( ( indentElem >>> lsf ) `when` isElem ) t' ++ ( if null ts' then indentRest else indentTrees indentFilter preserveSpace level ) ts' where indentElem = replaceChildren ( getChildren >>. indentChildren ) xmlSpaceAttrValue :: String xmlSpaceAttrValue = concat . runLA (getAttrValue "xml:space") $ t' preserveSpace' :: Bool preserveSpace' = ( fromMaybe preserveSpace . lookup xmlSpaceAttrValue ) [ ("preserve", True) , ("default", False) ] indentChildren :: XmlTrees -> XmlTrees indentChildren cs' | all (maybe False (all isXmlSpaceChar) . XN.getText) cs' = [] | otherwise = indentTrees indentFilter preserveSpace' (level + 1) cs' -- filter for indenting elements insertIndentation :: Int -> Int -> LA a XmlTree insertIndentation indentWidth level = txt ('\n' : replicate (level * indentWidth) ' ') -- filter for removing all whitespace insertNothing :: Int -> LA a XmlTree insertNothing _ = none -- ------------------------------------------------------------ -- | -- converts a CDATA section into normal text nodes transfCdata :: ArrowXml a => a XmlTree XmlTree transfCdata = fromLA $ (getCdata >>> mkText) `when` isCdata -- | -- converts CDATA sections in whole document tree into normal text nodes transfAllCdata :: ArrowXml a => a XmlTree XmlTree transfAllCdata = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)] -- | -- converts a character reference to normal text transfCharRef :: ArrowXml a => a XmlTree XmlTree transfCharRef = fromLA $ ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText ) `when` isCharRef -- | -- recursively converts all character references to normal text transfAllCharRef :: ArrowXml a => a XmlTree XmlTree transfAllCharRef = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)] -- ------------------------------------------------------------ rememberDTDAttrl :: ArrowList a => a XmlTree XmlTree rememberDTDAttrl = fromLA $ ( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) ) `orElse` this ) where addDTDAttrl al = seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al addDefaultDTDecl :: ArrowList a => a XmlTree XmlTree addDefaultDTDecl = fromLA $ ( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) ) where hasDtdPrefix = isA (fst >>> (dtdPrefix `isPrefixOf`)) >>> arr (first (drop (length dtdPrefix))) addDTD [] = this addDTD al = replaceChildren ( mkDTDDoctype al none <+> txt "\n" <+> ( getChildren >>> (none `when` isDTDDoctype) ) -- remove old DTD decl ) -- ------------------------------------------------------------ hasXmlPi :: ArrowXml a => a XmlTree XmlTree hasXmlPi = fromLA ( getChildren >>> isPi >>> hasName t_xml ) -- | add an \ processing instruction -- if it's not already there addXmlPi :: ArrowXml a => a XmlTree XmlTree addXmlPi = fromLA ( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none >>> addAttr a_version "1.0" ) <+> txt "\n" ) `whenNot` hasXmlPi ) -- | add an encoding spec to the \ processing instruction addXmlPiEncoding :: ArrowXml a => String -> a XmlTree XmlTree addXmlPiEncoding enc = fromLA $ processChildren ( addAttr a_encoding enc `when` ( isPi >>> hasName t_xml ) ) -- | add an XHTML strict doctype declaration to a document addXHtmlDoctypeStrict , addXHtmlDoctypeTransitional , addXHtmlDoctypeFrameset :: ArrowXml a => a XmlTree XmlTree -- | add an XHTML strict doctype declaration to a document addXHtmlDoctypeStrict = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" -- | add an XHTML transitional doctype declaration to a document addXHtmlDoctypeTransitional = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" -- | add an XHTML frameset doctype declaration to a document addXHtmlDoctypeFrameset = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" -- | add a doctype declaration to a document -- -- The arguments are the root element name, the PUBLIC id and the SYSTEM id addDoctypeDecl :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree addDoctypeDecl rootElem public system = fromLA $ replaceChildren ( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) ) . ( if null system then id else ( (k_system, system) : ) ) $ [ (a_name, rootElem) ] ) none <+> txt "\n" <+> getChildren ) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/GeneralEntitySubstitution.hs0000644000000000000000000003154712465166667022451 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.GeneralEntitySubstitution Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable general entity substitution -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.GeneralEntitySubstitution ( processGeneralEntities ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlEntityValueAsAttrValue , parseXmlEntityValueAsContent ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ data GEContext = ReferenceInContent | ReferenceInAttributeValue | ReferenceInEntityValue -- or OccursInAttributeValue -- not used during substitution but during validation -- or ReferenceInDTD -- not used: syntax check detects errors type GESubstArrow = GEContext -> RecList -> GEArrow XmlTree XmlTree type GEArrow b c = IOStateArrow GEEnv b c type RecList = [String] -- ------------------------------------------------------------ newtype GEEnv = GEEnv (M.Map String GESubstArrow) emptyGeEnv :: GEEnv emptyGeEnv = GEEnv M.empty lookupGeEnv :: String -> GEEnv -> Maybe GESubstArrow lookupGeEnv k (GEEnv env) = M.lookup k env addGeEntry :: String -> GESubstArrow -> GEEnv -> GEEnv addGeEntry k a (GEEnv env) = GEEnv $ M.insert k a env -- ------------------------------------------------------------ -- | -- substitution of general entities -- -- input: a complete document tree including root node processGeneralEntities :: IOStateArrow s XmlTree XmlTree processGeneralEntities = ( traceMsg 1 "processGeneralEntities: collect and substitute general entities" >>> withOtherUserState emptyGeEnv (processChildren (processGeneralEntity ReferenceInContent [])) >>> setDocumentStatusFromSystemState "in general entity processing" >>> traceTree >>> traceSource ) `when` documentStatusOk processGeneralEntity :: GESubstArrow processGeneralEntity context recl = choiceA [ isElem :-> ( processAttrl (processChildren substEntitiesInAttrValue) >>> processChildren (processGeneralEntity context recl) ) , isEntityRef :-> substEntityRef , isDTDDoctype :-> processChildren (processGeneralEntity context recl) , isDTDEntity :-> addEntityDecl , isDTDAttlist :-> substEntitiesInAttrDefaultValue , this :-> this ] where addEntityDecl :: GEArrow XmlTree XmlTree addEntityDecl = perform ( choiceA [ isIntern :-> addInternalEntity -- don't change sequence of cases , isExtern :-> addExternalEntity , isUnparsed :-> addUnparsedEntity ] ) where isIntern = none `when` hasDTDAttr k_system isExtern = none `when` hasDTDAttr k_ndata isUnparsed = this addInternalEntity :: GEArrow XmlTree b addInternalEntity = insertInternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: general entity definition for " ++) . show) ) &&& xshow (getChildren >>> isText) ) where insertInternal entity contents = insertEntity (substInternal contents) entity >>> none addExternalEntity :: GEArrow XmlTree b addExternalEntity = insertExternal $<< ( ( getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: external entity definition for " ++) . show) ) &&& getDTDAttrValue a_url -- the absolute URL, not the relative in attr: k_system ) where insertExternal entity uri = insertEntity (substExternalParsed1Time uri) entity >>> none addUnparsedEntity :: GEArrow XmlTree b addUnparsedEntity = getDTDAttrValue a_name >>> traceValue 2 (("processGeneralEntity: unparsed entity definition for " ++) . show) >>> applyA (arr (insertEntity substUnparsed)) >>> none insertEntity :: (String -> GESubstArrow) -> String -> GEArrow b b insertEntity fct entity = ( getUserState >>> applyA (arr checkDefined) ) `guards` addEntity fct entity where checkDefined geEnv = maybe ok alreadyDefined . lookupGeEnv entity $ geEnv where ok = this alreadyDefined _ = issueWarn ("entity " ++ show entity ++ " already defined, repeated definition ignored") >>> none addEntity :: (String -> GESubstArrow) -> String -> GEArrow b b addEntity fct entity = changeUserState ins where ins _ geEnv = addGeEntry entity (fct entity) geEnv substEntitiesInAttrDefaultValue :: GEArrow XmlTree XmlTree substEntitiesInAttrDefaultValue = applyA ( xshow ( getDTDAttrValue a_default -- parse the default value >>> -- substitute entities mkText -- and convert value into a string >>> parseXmlEntityValueAsAttrValue "default value of attribute" >>> filterErrorMsg >>> substEntitiesInAttrValue ) >>> arr (setDTDAttrValue a_default) ) `when` hasDTDAttr a_default substEntitiesInAttrValue :: GEArrow XmlTree XmlTree substEntitiesInAttrValue = ( processGeneralEntity ReferenceInAttributeValue recl `when` isEntityRef ) >>> changeText normalizeWhiteSpace >>> transfCharRef where normalizeWhiteSpace = map ( \c -> if c `elem` "\n\t\r" then ' ' else c ) substEntityRef :: GEArrow XmlTree XmlTree substEntityRef = applyA ( ( ( getEntityRef -- get the entity name and the env >>> -- and compute the arrow to be applied traceValue 2 (("processGeneralEntity: entity reference for entity " ++) . show) >>> traceMsg 3 ("recursion list = " ++ show recl) ) &&& getUserState ) >>> arr2 substA ) where substA :: String -> GEEnv -> GEArrow XmlTree XmlTree substA entity geEnv = maybe entityNotFound entityFound . lookupGeEnv entity $ geEnv where errMsg msg = issueErr msg entityNotFound = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, no definition found, (forward reference?)") entityFound fct | entity `elem` recl = errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, cyclic definition") | otherwise = fct context recl substExternalParsed1Time :: String -> String -> GESubstArrow substExternalParsed1Time uri entity cx rl = perform ( traceMsg 2 ("substExternalParsed1Time: read and parse external parsed entity " ++ show entity) >>> runInLocalURIContext ( root [sattr a_source uri] [] -- uri must be an absolute uri >>> -- abs uri is computed during parameter entity handling getXmlEntityContents >>> processExternalEntityContents ) >>> applyA ( arr $ \ s -> addEntity (substExternalParsed s) entity ) ) >>> processGeneralEntity cx rl where processExternalEntityContents :: IOStateArrow s XmlTree String processExternalEntityContents = ( ( ( documentStatusOk -- reading entity succeeded >>> -- with content stored in a text node (getChildren >>> isText) ) `guards` this ) `orElse` issueErr ("illegal value for external parsed entity " ++ show entity) ) >>> xshow (getChildren >>> isText) substExternalParsed :: String -> String -> GESubstArrow substExternalParsed s entity ReferenceInContent rl = includedIfValidating s rl entity substExternalParsed _ entity ReferenceInAttributeValue _ = forbidden entity "external parsed general" "in attribute value" substExternalParsed _ _ ReferenceInEntityValue _ = bypassed substInternal :: String -> String -> GESubstArrow substInternal s entity ReferenceInContent rl = included s rl entity substInternal s entity ReferenceInAttributeValue rl = includedInLiteral s rl entity substInternal _ _ ReferenceInEntityValue _ = bypassed substUnparsed :: String -> GESubstArrow substUnparsed entity ReferenceInContent _ = forbidden entity "unparsed" "content" substUnparsed entity ReferenceInAttributeValue _ = forbidden entity "unparsed" "attribute value" substUnparsed entity ReferenceInEntityValue _ = forbidden entity "unparsed" "entity value" -- XML 1.0 chapter 4.4.2 included :: String -> RecList -> String -> GEArrow XmlTree XmlTree included s rl entity = traceMsg 3 ("substituting general entity " ++ show entity ++ " with value " ++ show s) >>> txt s >>> parseXmlEntityValueAsContent ("substituting general entity " ++ show entity ++ " in contents") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.3 includedIfValidating :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedIfValidating = included -- XML 1.0 chapter 4.4.4 forbidden :: String -> String -> String -> GEArrow XmlTree XmlTree forbidden entity msg cx = issueErr ("reference of " ++ msg ++ show entity ++ " forbidden in " ++ cx) -- XML 1.0 chapter 4.4.5 includedInLiteral :: String -> RecList -> String -> GEArrow XmlTree XmlTree includedInLiteral s rl entity = txt s >>> parseXmlEntityValueAsAttrValue ("substituting general entity " ++ show entity ++ " in attribute value") >>> filterErrorMsg >>> processGeneralEntity context (entity : rl) -- XML 1.0 chapter 4.4.7 bypassed :: GEArrow XmlTree XmlTree bypassed = this -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlArrow.hs0000644000000000000000000006744712465166667017025 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlArrow Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Basic arrows for processing XML documents All arrows use IO and a global state for options, errorhandling, ... -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlArrow ( module Text.XML.HXT.Arrow.XmlArrow ) where import Control.Arrow -- classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow -- arrow types import Control.Arrow.StateListArrow import Control.Arrow.IOListArrow import Control.Arrow.IOStateListArrow import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar ) import Data.Maybe import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import qualified Text.XML.HXT.DOM.ShowXml as XS -- ------------------------------------------------------------ {- | Arrows for processing 'Text.XML.HXT.DOM.TypeDefs.XmlTree's These arrows can be grouped into predicates, selectors, constructors, and transformers. All predicates (tests) act like 'Control.Arrow.ArrowIf.none' for failure and 'Control.Arrow.ArrowIf.this' for success. A logical and can be formed by @ a1 >>> a2 @, a locical or by @ a1 \<+\> a2 @. Selector arrows will fail, when applied to wrong input, e.g. selecting the text of a node with 'getText' will fail when applied to a none text node. Edit arrows will remain the input unchanged, when applied to wrong argument, e.g. editing the content of a text node with 'changeText' applied to an element node will return the unchanged element node. -} infixl 7 += class (Arrow a, ArrowList a, ArrowTree a) => ArrowXml a where -- discriminating predicates -- | test for text nodes isText :: a XmlTree XmlTree isText = isA XN.isText {-# INLINE isText #-} isBlob :: a XmlTree XmlTree isBlob = isA XN.isBlob {-# INLINE isBlob #-} -- | test for char reference, used during parsing isCharRef :: a XmlTree XmlTree isCharRef = isA XN.isCharRef {-# INLINE isCharRef #-} -- | test for entity reference, used during parsing isEntityRef :: a XmlTree XmlTree isEntityRef = isA XN.isEntityRef {-# INLINE isEntityRef #-} -- | test for comment isCmt :: a XmlTree XmlTree isCmt = isA XN.isCmt {-# INLINE isCmt #-} -- | test for CDATA section, used during parsing isCdata :: a XmlTree XmlTree isCdata = isA XN.isCdata {-# INLINE isCdata #-} -- | test for processing instruction isPi :: a XmlTree XmlTree isPi = isA XN.isPi {-# INLINE isPi #-} -- | test for processing instruction \ isXmlPi :: a XmlTree XmlTree isXmlPi = isPi >>> hasName "xml" -- | test for element isElem :: a XmlTree XmlTree isElem = isA XN.isElem {-# INLINE isElem #-} -- | test for DTD part, used during parsing isDTD :: a XmlTree XmlTree isDTD = isA XN.isDTD {-# INLINE isDTD #-} -- | test for attribute tree isAttr :: a XmlTree XmlTree isAttr = isA XN.isAttr {-# INLINE isAttr #-} -- | test for error message isError :: a XmlTree XmlTree isError = isA XN.isError {-# INLINE isError #-} -- | test for root node (element with name \"\/\") isRoot :: a XmlTree XmlTree isRoot = isA XN.isRoot {-# INLINE isRoot #-} -- | test for text nodes with text, for which a predicate holds -- -- example: @hasText (all (\`elem\` \" \\t\\n\"))@ check for text nodes with only whitespace content hasText :: (String -> Bool) -> a XmlTree XmlTree hasText p = (isText >>> getText >>> isA p) `guards` this -- | test for text nodes with only white space -- -- implemented with 'hasTest' isWhiteSpace :: a XmlTree XmlTree isWhiteSpace = hasText (all isXmlSpaceChar) {-# INLINE isWhiteSpace #-} -- | -- test whether a node (element, attribute, pi) has a name with a special property hasNameWith :: (QName -> Bool) -> a XmlTree XmlTree hasNameWith p = (getQName >>> isA p) `guards` this {-# INLINE hasNameWith #-} -- | -- test whether a node (element, attribute, pi) has a specific qualified name -- useful only after namespace propagation hasQName :: QName -> a XmlTree XmlTree hasQName n = (getQName >>> isA (== n)) `guards` this {-# INLINE hasQName #-} -- | -- test whether a node has a specific name (prefix:localPart or localPart), -- generally useful, even without namespace handling hasName :: String -> a XmlTree XmlTree hasName n = (getName >>> isA (== n)) `guards` this {-# INLINE hasName #-} -- | -- test whether a node has a specific name as local part, -- useful only after namespace propagation hasLocalPart :: String -> a XmlTree XmlTree hasLocalPart n = (getLocalPart >>> isA (== n)) `guards` this {-# INLINE hasLocalPart #-} -- | -- test whether a node has a specific name prefix, -- useful only after namespace propagation hasNamePrefix :: String -> a XmlTree XmlTree hasNamePrefix n = (getNamePrefix >>> isA (== n)) `guards` this {-# INLINE hasNamePrefix #-} -- | -- test whether a node has a specific namespace URI -- useful only after namespace propagation hasNamespaceUri :: String -> a XmlTree XmlTree hasNamespaceUri n = (getNamespaceUri >>> isA (== n)) `guards` this {-# INLINE hasNamespaceUri #-} -- | -- test whether an element node has an attribute node with a specific name hasAttr :: String -> a XmlTree XmlTree hasAttr n = (getAttrl >>> hasName n) `guards` this {-# INLINE hasAttr #-} -- | -- test whether an element node has an attribute node with a specific qualified name hasQAttr :: QName -> a XmlTree XmlTree hasQAttr n = (getAttrl >>> hasQName n) `guards` this {-# INLINE hasQAttr #-} -- | -- test whether an element node has an attribute with a specific value hasAttrValue :: String -> (String -> Bool) -> a XmlTree XmlTree hasAttrValue n p = (getAttrl >>> hasName n >>> xshow getChildren >>> isA p) `guards` this -- | -- test whether an element node has an attribute with a qualified name and a specific value hasQAttrValue :: QName -> (String -> Bool) -> a XmlTree XmlTree hasQAttrValue n p = (getAttrl >>> hasQName n >>> xshow getChildren >>> isA p) `guards` this -- constructor arrows ------------------------------------------------------------ -- | text node construction arrow mkText :: a String XmlTree mkText = arr XN.mkText {-# INLINE mkText #-} -- | blob node construction arrow mkBlob :: a Blob XmlTree mkBlob = arr XN.mkBlob {-# INLINE mkBlob #-} -- | char reference construction arrow, useful for document output mkCharRef :: a Int XmlTree mkCharRef = arr XN.mkCharRef {-# INLINE mkCharRef #-} -- | entity reference construction arrow, useful for document output mkEntityRef :: a String XmlTree mkEntityRef = arr XN.mkEntityRef {-# INLINE mkEntityRef #-} -- | comment node construction, useful for document output mkCmt :: a String XmlTree mkCmt = arr XN.mkCmt {-# INLINE mkCmt #-} -- | CDATA construction, useful for document output mkCdata :: a String XmlTree mkCdata = arr XN.mkCdata {-# INLINE mkCdata #-} -- | error node construction, useful only internally mkError :: Int -> a String XmlTree mkError level = arr (XN.mkError level) -- | element construction: -- | the attributes and the content of the element are computed by applying arrows -- to the input mkElement :: QName -> a n XmlTree -> a n XmlTree -> a n XmlTree mkElement n af cf = (listA af &&& listA cf) >>> arr2 (\ al cl -> XN.mkElement n al cl) -- | attribute node construction: -- | the attribute value is computed by applying an arrow to the input mkAttr :: QName -> a n XmlTree -> a n XmlTree mkAttr qn f = listA f >>> arr (XN.mkAttr qn) -- | processing instruction construction: -- | the content of the processing instruction is computed by applying an arrow to the input mkPi :: QName -> a n XmlTree -> a n XmlTree mkPi qn f = listA f >>> arr (XN.mkPi qn) -- convenient arrows for constructors -------------------------------------------------- -- | convenient arrow for element construction, more comfortable variant of 'mkElement' -- -- example for simplifying 'mkElement' : -- -- > mkElement qn (a1 <+> ... <+> ai) (c1 <+> ... <+> cj) -- -- equals -- -- > mkqelem qn [a1,...,ai] [c1,...,cj] mkqelem :: QName -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree mkqelem n afs cfs = mkElement n (catA afs) (catA cfs) {-# INLINE mkqelem #-} -- | convenient arrow for element construction with strings instead of qualified names as element names, see also 'mkElement' and 'mkelem' mkelem :: String -> [a n XmlTree] -> [a n XmlTree] -> a n XmlTree mkelem n afs cfs = mkElement (mkName n) (catA afs) (catA cfs) {-# INLINE mkelem #-} -- | convenient arrow for element constrution with attributes but without content, simple variant of 'mkelem' and 'mkElement' aelem :: String -> [a n XmlTree] -> a n XmlTree aelem n afs = catA afs >. \ al -> XN.mkElement (mkName n) al [] {-# INLINE aelem #-} -- | convenient arrow for simple element constrution without attributes, simple variant of 'mkelem' and 'mkElement' selem :: String -> [a n XmlTree] -> a n XmlTree selem n cfs = catA cfs >. XN.mkElement (mkName n) [] {-# INLINE selem #-} -- | convenient arrow for constrution of empty elements without attributes, simple variant of 'mkelem' and 'mkElement' eelem :: String -> a n XmlTree eelem n = constA (XN.mkElement (mkName n) [] []) {-# INLINE eelem #-} -- | construction of an element node with name \"\/\" for document roots root :: [a n XmlTree] -> [a n XmlTree] -> a n XmlTree root = mkelem t_root {-# INLINE root #-} -- | alias for 'mkAttr' qattr :: QName -> a n XmlTree -> a n XmlTree qattr = mkAttr {-# INLINE qattr #-} -- | convenient arrow for attribute constrution, simple variant of 'mkAttr' attr :: String -> a n XmlTree -> a n XmlTree attr = mkAttr . mkName {-# INLINE attr #-} -- constant arrows (ignoring the input) for tree construction ------------------------------ -- | constant arrow for text nodes txt :: String -> a n XmlTree txt = constA . XN.mkText {-# INLINE txt #-} -- | constant arrow for blob nodes blb :: Blob -> a n XmlTree blb = constA . XN.mkBlob {-# INLINE blb #-} -- | constant arrow for char reference nodes charRef :: Int -> a n XmlTree charRef = constA . XN.mkCharRef {-# INLINE charRef #-} -- | constant arrow for entity reference nodes entityRef :: String -> a n XmlTree entityRef = constA . XN.mkEntityRef {-# INLINE entityRef #-} -- | constant arrow for comment cmt :: String -> a n XmlTree cmt = constA . XN.mkCmt {-# INLINE cmt #-} -- | constant arrow for warning warn :: String -> a n XmlTree warn = constA . (XN.mkError c_warn) {-# INLINE warn #-} -- | constant arrow for errors err :: String -> a n XmlTree err = constA . (XN.mkError c_err) {-# INLINE err #-} -- | constant arrow for fatal errors fatal :: String -> a n XmlTree fatal = constA . (XN.mkError c_fatal) {-# INLINE fatal #-} -- | constant arrow for simple processing instructions, see 'mkPi' spi :: String -> String -> a n XmlTree spi piName piCont = constA (XN.mkPi (mkName piName) [XN.mkAttr (mkName a_value) [XN.mkText piCont]]) {-# INLINE spi #-} -- | constant arrow for attribute nodes, attribute name is a qualified name and value is a text, -- | see also 'mkAttr', 'qattr', 'attr' sqattr :: QName -> String -> a n XmlTree sqattr an av = constA (XN.mkAttr an [XN.mkText av]) {-# INLINE sqattr #-} -- | constant arrow for attribute nodes, attribute name and value are -- | given by parameters, see 'mkAttr' sattr :: String -> String -> a n XmlTree sattr an av = constA (XN.mkAttr (mkName an) [XN.mkText av]) {-# INLINE sattr #-} -- selector arrows -------------------------------------------------- -- | select the text of a text node getText :: a XmlTree String getText = arrL (maybeToList . XN.getText) {-# INLINE getText #-} -- | select the value of a char reference getCharRef :: a XmlTree Int getCharRef = arrL (maybeToList . XN.getCharRef) {-# INLINE getCharRef #-} -- | select the name of a entity reference node getEntityRef :: a XmlTree String getEntityRef = arrL (maybeToList . XN.getEntityRef) {-# INLINE getEntityRef #-} -- | select the comment of a comment node getCmt :: a XmlTree String getCmt = arrL (maybeToList . XN.getCmt) {-# INLINE getCmt #-} -- | select the content of a CDATA node getCdata :: a XmlTree String getCdata = arrL (maybeToList . XN.getCdata) {-# INLINE getCdata #-} -- | select the name of a processing instruction getPiName :: a XmlTree QName getPiName = arrL (maybeToList . XN.getPiName) {-# INLINE getPiName #-} -- | select the content of a processing instruction getPiContent :: a XmlTree XmlTree getPiContent = arrL (fromMaybe [] . XN.getPiContent) {-# INLINE getPiContent #-} -- | select the name of an element node getElemName :: a XmlTree QName getElemName = arrL (maybeToList . XN.getElemName) {-# INLINE getElemName #-} -- | select the attribute list of an element node getAttrl :: a XmlTree XmlTree getAttrl = arrL (fromMaybe [] . XN.getAttrl) {-# INLINE getAttrl #-} -- | select the DTD type of a DTD node getDTDPart :: a XmlTree DTDElem getDTDPart = arrL (maybeToList . XN.getDTDPart) {-# INLINE getDTDPart #-} -- | select the DTD attributes of a DTD node getDTDAttrl :: a XmlTree Attributes getDTDAttrl = arrL (maybeToList . XN.getDTDAttrl) {-# INLINE getDTDAttrl #-} -- | select the name of an attribute getAttrName :: a XmlTree QName getAttrName = arrL (maybeToList . XN.getAttrName) {-# INLINE getAttrName #-} -- | select the error level (c_warn, c_err, c_fatal) from an error node getErrorLevel :: a XmlTree Int getErrorLevel = arrL (maybeToList . XN.getErrorLevel) {-# INLINE getErrorLevel #-} -- | select the error message from an error node getErrorMsg :: a XmlTree String getErrorMsg = arrL (maybeToList . XN.getErrorMsg) {-# INLINE getErrorMsg #-} -- | select the qualified name from an element, attribute or pi getQName :: a XmlTree QName getQName = arrL (maybeToList . XN.getName) {-# INLINE getQName #-} -- | select the prefix:localPart or localPart from an element, attribute or pi getName :: a XmlTree String getName = arrL (maybeToList . XN.getQualifiedName) {-# INLINE getName #-} -- | select the univeral name ({namespace URI} ++ localPart) getUniversalName :: a XmlTree String getUniversalName = arrL (maybeToList . XN.getUniversalName) {-# INLINE getUniversalName #-} -- | select the univeral name (namespace URI ++ localPart) getUniversalUri :: a XmlTree String getUniversalUri = arrL (maybeToList . XN.getUniversalUri) {-# INLINE getUniversalUri #-} -- | select the local part getLocalPart :: a XmlTree String getLocalPart = arrL (maybeToList . XN.getLocalPart) {-# INLINE getLocalPart #-} -- | select the name prefix getNamePrefix :: a XmlTree String getNamePrefix = arrL (maybeToList . XN.getNamePrefix) {-# INLINE getNamePrefix #-} -- | select the namespace URI getNamespaceUri :: a XmlTree String getNamespaceUri = arrL (maybeToList . XN.getNamespaceUri) {-# INLINE getNamespaceUri #-} -- | select the value of an attribute of an element node, -- always succeeds with empty string as default value \"\" getAttrValue :: String -> a XmlTree String getAttrValue n = xshow (getAttrl >>> hasName n >>> getChildren) -- | like 'getAttrValue', but fails if the attribute does not exist getAttrValue0 :: String -> a XmlTree String getAttrValue0 n = getAttrl >>> hasName n >>> xshow getChildren -- | like 'getAttrValue', but select the value of an attribute given by a qualified name, -- always succeeds with empty string as default value \"\" getQAttrValue :: QName -> a XmlTree String getQAttrValue n = xshow (getAttrl >>> hasQName n >>> getChildren) -- | like 'getQAttrValue', but fails if attribute does not exist getQAttrValue0 :: QName -> a XmlTree String getQAttrValue0 n = getAttrl >>> hasQName n >>> xshow getChildren -- edit arrows -------------------------------------------------- -- | edit the string of a text node changeText :: (String -> String) -> a XmlTree XmlTree changeText cf = arr (XN.changeText cf) `when` isText -- | edit the blob of a blob node changeBlob :: (Blob -> Blob) -> a XmlTree XmlTree changeBlob cf = arr (XN.changeBlob cf) `when` isBlob -- | edit the comment string of a comment node changeCmt :: (String -> String) -> a XmlTree XmlTree changeCmt cf = arr (XN.changeCmt cf) `when` isCmt -- | edit an element-, attribute- or pi- name changeQName :: (QName -> QName) -> a XmlTree XmlTree changeQName cf = arr (XN.changeName cf) `when` getQName -- | edit an element name changeElemName :: (QName -> QName) -> a XmlTree XmlTree changeElemName cf = arr (XN.changeElemName cf) `when` isElem -- | edit an attribute name changeAttrName :: (QName -> QName) -> a XmlTree XmlTree changeAttrName cf = arr (XN.changeAttrName cf) `when` isAttr -- | edit a pi name changePiName :: (QName -> QName) -> a XmlTree XmlTree changePiName cf = arr (XN.changePiName cf) `when` isPi -- | edit an attribute value changeAttrValue :: (String -> String) -> a XmlTree XmlTree changeAttrValue cf = replaceChildren ( xshow getChildren >>> arr cf >>> mkText ) `when` isAttr -- | edit an attribute list of an element node changeAttrl :: (XmlTrees -> XmlTrees -> XmlTrees) -> a XmlTree XmlTree -> a XmlTree XmlTree changeAttrl cf f = ( ( listA f &&& this ) >>> arr2 changeAL ) `when` ( isElem <+> isPi ) where changeAL as x = XN.changeAttrl (\ xs -> cf xs as) x -- | replace an element, attribute or pi name setQName :: QName -> a XmlTree XmlTree setQName n = changeQName (const n) {-# INLINE setQName #-} -- | replace an element name setElemName :: QName -> a XmlTree XmlTree setElemName n = changeElemName (const n) {-# INLINE setElemName #-} -- | replace an attribute name setAttrName :: QName -> a XmlTree XmlTree setAttrName n = changeAttrName (const n) {-# INLINE setAttrName #-} -- | replace an element name setPiName :: QName -> a XmlTree XmlTree setPiName n = changePiName (const n) {-# INLINE setPiName #-} -- | replace an atribute list of an element node setAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree setAttrl = changeAttrl (const id) -- (\ x y -> y) {-# INLINE setAttrl #-} -- | add a list of attributes to an element addAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree addAttrl = changeAttrl (XN.mergeAttrl) {-# INLINE addAttrl #-} -- | add (or replace) an attribute addAttr :: String -> String -> a XmlTree XmlTree addAttr an av = addAttrl (sattr an av) {-# INLINE addAttr #-} -- | remove an attribute removeAttr :: String -> a XmlTree XmlTree removeAttr an = processAttrl (none `when` hasName an) -- | remove an attribute with a qualified name removeQAttr :: QName -> a XmlTree XmlTree removeQAttr an = processAttrl (none `when` hasQName an) -- | process the attributes of an element node with an arrow processAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree processAttrl f = setAttrl (getAttrl >>> f) -- | process a whole tree inclusive attribute list of element nodes -- see also: 'Control.Arrow.ArrowTree.processTopDown' processTopDownWithAttrl :: a XmlTree XmlTree -> a XmlTree XmlTree processTopDownWithAttrl f = processTopDown ( f >>> ( processAttrl (processTopDown f) `when` isElem)) -- | convenient op for adding attributes or children to a node -- -- usage: @ tf += cf @ -- -- the @tf@ arrow computes an element node, and all trees computed by @cf@ are -- added to this node, if a tree is an attribute, it is inserted in the attribute list -- else it is appended to the content list. -- -- attention: do not build long content list this way because '+=' is implemented by ++ -- -- examples: -- -- > eelem "a" -- > += sattr "href" "page.html" -- > += sattr "name" "here" -- > += txt "look here" -- -- is the same as -- -- > mkelem [ sattr "href" "page.html" -- > , sattr "name" "here" -- > ] -- > [ txt "look here" ] -- -- and results in the XML fragment: \look here\<\/a\> -- -- advantage of the '+=' operator is, that attributes and content can be added -- any time step by step. -- if @tf@ computes a whole list of trees, e.g. a list of \"td\" or \"tr\" elements, -- the attributes or content is added to all trees. useful for adding \"class\" or \"style\" attributes -- to table elements. (+=) :: a b XmlTree -> a b XmlTree -> a b XmlTree tf += cf = (tf &&& listA cf) >>> arr2 addChildren where addChildren :: XmlTree -> XmlTrees -> XmlTree addChildren t cs = foldl addChild t cs addChild :: XmlTree -> XmlTree -> XmlTree addChild t c | not (XN.isElem t) = t | XN.isAttr c = XN.changeAttrl (XN.addAttr c) t | otherwise = XN.changeChildren (++ [c]) t -- | apply an arrow to the input and convert the resulting XML trees into a string representation xshow :: a n XmlTree -> a n String xshow f = f >. XS.xshow {-# INLINE xshow #-} -- | apply an arrow to the input and convert the resulting XML trees into a string representation xshowBlob :: a n XmlTree -> a n Blob xshowBlob f = f >. XS.xshowBlob {-# INLINE xshowBlob #-} {- | Document Type Definition arrows These are separated, because they are not needed for document processing, only when processing the DTD, e.g. for generating access funtions for the toolbox from a DTD (se example DTDtoHaskell in the examples directory) -} class (ArrowXml a) => ArrowDTD a where isDTDDoctype :: a XmlTree XmlTree isDTDDoctype = isA (maybe False (== DOCTYPE ) . XN.getDTDPart) isDTDElement :: a XmlTree XmlTree isDTDElement = isA (maybe False (== ELEMENT ) . XN.getDTDPart) isDTDContent :: a XmlTree XmlTree isDTDContent = isA (maybe False (== CONTENT ) . XN.getDTDPart) isDTDAttlist :: a XmlTree XmlTree isDTDAttlist = isA (maybe False (== ATTLIST ) . XN.getDTDPart) isDTDEntity :: a XmlTree XmlTree isDTDEntity = isA (maybe False (== ENTITY ) . XN.getDTDPart) isDTDPEntity :: a XmlTree XmlTree isDTDPEntity = isA (maybe False (== PENTITY ) . XN.getDTDPart) isDTDNotation :: a XmlTree XmlTree isDTDNotation = isA (maybe False (== NOTATION) . XN.getDTDPart) isDTDCondSect :: a XmlTree XmlTree isDTDCondSect = isA (maybe False (== CONDSECT) . XN.getDTDPart) isDTDName :: a XmlTree XmlTree isDTDName = isA (maybe False (== NAME ) . XN.getDTDPart) isDTDPERef :: a XmlTree XmlTree isDTDPERef = isA (maybe False (== PEREF ) . XN.getDTDPart) hasDTDAttr :: String -> a XmlTree XmlTree hasDTDAttr n = isA (isJust . lookup n . fromMaybe [] . XN.getDTDAttrl) getDTDAttrValue :: String -> a XmlTree String getDTDAttrValue n = arrL (maybeToList . lookup n . fromMaybe [] . XN.getDTDAttrl) setDTDAttrValue :: String -> String -> a XmlTree XmlTree setDTDAttrValue n v = arr (XN.changeDTDAttrl (addEntry n v)) `when` isDTD mkDTDElem :: DTDElem -> Attributes -> a n XmlTree -> a n XmlTree mkDTDElem e al cf = listA cf >>> arr (XN.mkDTDElem e al) mkDTDDoctype :: Attributes -> a n XmlTree -> a n XmlTree mkDTDDoctype = mkDTDElem DOCTYPE mkDTDElement :: Attributes -> a n XmlTree mkDTDElement al = mkDTDElem ELEMENT al none mkDTDEntity :: Attributes -> a n XmlTree mkDTDEntity al = mkDTDElem ENTITY al none mkDTDPEntity :: Attributes -> a n XmlTree mkDTDPEntity al = mkDTDElem PENTITY al none instance ArrowXml LA instance ArrowXml (SLA s) instance ArrowXml IOLA instance ArrowXml (IOSLA s) instance ArrowDTD LA instance ArrowDTD (SLA s) instance ArrowDTD IOLA instance ArrowDTD (IOSLA s) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/DocumentOutput.hs0000644000000000000000000002350312465166667020232 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentOutput Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document output -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , putXmlTree , putXmlSource , encodeDocument , encodeDocument' ) where import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import qualified Data.ByteString.Lazy as BS import Data.Maybe import Data.String.Unicode (getOutputEncodingFct') import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XS import Text.XML.HXT.Arrow.Edit (addHeadlineToXmlDoc, addXmlPi, addXmlPiEncoding, escapeHtmlRefs, escapeXmlRefs, indentDoc, numberLinesInXmlDoc, treeRepOfXmlDoc) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import System.IO (Handle, IOMode (..), hClose, hPutStrLn, hSetBinaryMode, openBinaryFile, openFile, stdout) -- ------------------------------------------------------------ -- -- | Write the contents of a document tree into an output stream (file or stdout). -- -- If textMode is set, writing is done with Haskell string output, else (default) -- writing is done with lazy ByteString output putXmlDocument :: Bool -> String -> IOStateArrow s XmlTree XmlTree putXmlDocument textMode dst = perform putDoc where putDoc = ( if textMode then ( xshow getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> hPutStrLn h s))) ) else ( xshowBlob getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> do BS.hPutStr h s BS.hPutStr h (stringToBlob "\n") ) ) ) ) ) >>> ( ( traceMsg 1 ("io error, document not written to " ++ outFile) >>> arr show >>> mkError c_fatal >>> filterErrorMsg ) ||| ( traceMsg 2 ("document written to " ++ outFile ++ ", textMode = " ++ show textMode) >>> none ) ) where isStdout = null dst || dst == "-" outFile = if isStdout then "stdout" else show dst hPutDocument :: (Handle -> IO ()) -> IO () hPutDocument action | isStdout = do hSetBinaryMode stdout (not textMode) action stdout hSetBinaryMode stdout False | otherwise = do handle <- ( if textMode then openFile else openBinaryFile ) dst WriteMode action handle hClose handle -- | -- write the tree representation of a document to a file putXmlTree :: String -> IOStateArrow s XmlTree XmlTree putXmlTree dst = perform ( treeRepOfXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- | -- write a document with indentaion and line numers putXmlSource :: String -> IOStateArrow s XmlTree XmlTree putXmlSource dst = perform ( (this ) `whenNot` isRoot >>> indentDoc >>> numberLinesInXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- ------------------------------------------------------------ getEncodingParam :: IOStateArrow s XmlTree String getEncodingParam = catA [ getSysVar theOutputEncoding -- 4. guess: take output encoding parameter from global state , getSysVar theInputEncoding -- 5. guess: take encoding parameter from global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) getOutputEncoding :: String -> IOStateArrow s XmlTree String getOutputEncoding defaultEnc = getEC $< getEncodingParam where getEC enc' = fromLA $ getOutputEncoding' defaultEnc enc' encodeDocument :: Bool -> Bool -> String -> IOStateArrow s XmlTree XmlTree encodeDocument quoteXml supressXmlPi defaultEnc = encode $< getOutputEncoding defaultEnc where encode enc = traceMsg 2 ("encodeDocument: encoding is " ++ show enc) >>> ( encodeDocument' quoteXml supressXmlPi enc `orElse` ( issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "encoding document" ) ) -- ------------------------------------------------------------ isBinaryDoc :: LA XmlTree XmlTree isBinaryDoc = ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> not (null t || isTextMimeType t || isXmlMimeType t)) ) `guards` this getOutputEncoding' :: String -> String -> LA XmlTree String getOutputEncoding' defaultEnc defaultEnc2 = catA [ isBinaryDoc >>> -- 0. guess: binary data found: no encoding at all constA isoLatin1 -- the content should usually be a blob -- this handling is like the decoding in DocumentInput, -- there nothing is decoded for non text or non xml contents , getChildren -- 1. guess: evaluate >>> ( ( isPi >>> hasName t_xml ) `guards` getAttrValue a_encoding ) , constA defaultEnc -- 2. guess: explicit parameter, may be "" , getAttrValue a_output_encoding -- 3. guess: take output encoding parameter in root node , constA defaultEnc2 -- default : UNICODE or utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses encodeDocument' :: ArrowXml a => Bool -> Bool -> String -> a XmlTree XmlTree encodeDocument' quoteXml supressXmlPi defaultEnc = fromLA (encode $< getOutputEncoding' defaultEnc utf8) where encode :: String -> LA XmlTree XmlTree encode encodingScheme | encodingScheme == unicodeString = replaceChildren ( (getChildren >. XS.xshow'' cQuot aQuot) >>> mkText ) | isNothing encodeFct = none | otherwise = ( if supressXmlPi then processChildren (none `when` isXmlPi) else ( addXmlPi >>> addXmlPiEncoding encodingScheme ) ) >>> ( isLatin1Blob `orElse` encodeDoc (fromJust encodeFct) ) >>> addAttr a_output_encoding encodingScheme where (cQuot, aQuot) | quoteXml = escapeXmlRefs | otherwise = escapeHtmlRefs encodeFct = getOutputEncodingFct' encodingScheme encodeDoc ef = replaceChildren ( xshowBlobWithEnc cQuot aQuot ef getChildren >>> mkBlob ) xshowBlobWithEnc cenc aenc enc f = f >. XS.xshow' cenc aenc enc -- if encoding scheme is isolatin1 and the contents is a single blob (bytestring) -- the encoding is the identity. -- This optimization enables processing (copying) of none XML contents -- without any conversions from and to strings isLatin1Blob | encodingScheme /= isoLatin1 = none | otherwise = childIsSingleBlob `guards` this where childIsSingleBlob = listA getChildren >>> isA (length >>> (== 1)) >>> unlistA >>> isBlob -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlOptions.hs0000644000000000000000000003303512465166667017350 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlOptions Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable system configuration and common options options -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlOptions where import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.SystemConfig import Data.Maybe import System.Console.GetOpt -- ------------------------------------------------------------ -- -- | -- commonly useful options for XML input -- -- can be used for option definition with haskell getopt -- -- defines options: 'a_trace', 'a_proxy', 'a_use_curl', 'a_do_not_use_curl', 'a_options_curl', 'a_encoding', -- 'a_issue_errors', 'a_do_not_issue_errors', 'a_parse_html', 'a_parse_by_mimetype', 'a_issue_warnings', 'a_do_not_issue_warnings', -- 'a_parse_xml', 'a_validate', 'a_do_not_validate', 'a_canonicalize', 'a_do_not_canonicalize', --- 'a_preserve_comment', 'a_do_not_preserve_comment', 'a_check_namespaces', 'a_do_not_check_namespaces', -- 'a_remove_whitespace', 'a_do_not_remove_whitespace' inputOptions :: [OptDescr SysConfig] inputOptions = [ Option "t" [a_trace] (OptArg trc "LEVEL") "trace level (0-4), default 1" , Option "p" [a_proxy] (ReqArg withProxy "PROXY") "proxy for http access (e.g. \"www-cache:3128\")" , Option "" [a_redirect] (NoArg (withRedirect True)) "automatically follow redirected URIs" , Option "" [a_no_redirect] (NoArg (withRedirect False)) "switch off following redirected URIs" , Option "" [a_default_baseuri] (ReqArg withDefaultBaseURI "URI") "default base URI, default: \"file:////\"" , Option "e" [a_encoding] (ReqArg withInputEncoding "CHARSET") ( "default document encoding (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ", ...)" ) , Option "" [a_mime_types] (ReqArg withMimeTypeFile "FILE") "set mime type configuration file, e.g. \"/etc/mime.types\"" , Option "" [a_issue_errors] (NoArg (withErrors True)) "issue all error messages on stderr (default)" , Option "" [a_do_not_issue_errors] (NoArg (withErrors False)) "ignore all error messages" , Option "" [a_ignore_encoding_errors] (NoArg (withEncodingErrors False)) "ignore encoding errors" , Option "" [a_ignore_none_xml_contents] (NoArg (withIgnoreNoneXmlContents True)) "discards all contents of none XML/HTML documents, only the meta info remains in the doc tree" , Option "" [a_accept_mimetypes] (ReqArg withMT "MIMETYPES") "only accept documents matching the given comma separated list of mimetype specs" , Option "H" [a_parse_html] (NoArg (withParseHTML True)) "parse input as HTML, try to interprete everything as HTML, no validation" , Option "M" [a_parse_by_mimetype] (NoArg (withParseByMimeType True)) "parse dependent on mime type: text/html as HTML, text/xml and text/xhtml and others as XML, else no parse" , Option "" [a_parse_xml] (NoArg (withParseHTML False)) "parse input as XML, (default)" , Option "" [a_strict_input] (NoArg (withStrictInput True)) "read input files strictly, this ensures closing the files correctly even if not read completely" , Option "" [a_issue_warnings] (NoArg (withWarnings True)) "issue warnings, when parsing HTML (default)" , Option "Q" [a_do_not_issue_warnings] (NoArg (withWarnings False)) "ignore warnings, when parsing HTML" , Option "" [a_validate] (NoArg (withValidate True)) "document validation when parsing XML (default)" , Option "w" [a_do_not_validate] (NoArg (withValidate False)) "only wellformed check, no validation" , Option "" [a_subst_dtd_entities] (NoArg (withSubstDTDEntities True)) "entities defined in DTD are substituted when parsing XML (default)" , Option "" [a_do_not_subst_dtd_entities] (NoArg (withSubstDTDEntities False)) "entities defined in DTD are NOT substituted when parsing XML" , Option "" [a_subst_html_entities] (NoArg (withSubstHTMLEntities True)) "entities defined in XHTML are substituted when parsing XML, only in effect when prev. option is switched off" , Option "" [a_do_not_subst_html_entities] (NoArg (withSubstHTMLEntities False)) "only entities predefined in XML are substituted when parsing XML (default)" , Option "" [a_canonicalize] (NoArg (withCanonicalize True)) "canonicalize document, remove DTD, comment, transform CDATA, CharRef's, ... (default)" , Option "c" [a_do_not_canonicalize] (NoArg (withCanonicalize False)) "do not canonicalize document, don't remove DTD, comment, don't transform CDATA, CharRef's, ..." , Option "C" [a_preserve_comment] (NoArg (withPreserveComment True)) "don't remove comments during canonicalisation" , Option "" [a_do_not_preserve_comment] (NoArg (withPreserveComment False)) "remove comments during canonicalisation (default)" , Option "n" [a_check_namespaces] (NoArg (withCheckNamespaces True)) "tag tree with namespace information and check namespaces" , Option "" [a_do_not_check_namespaces] (NoArg (withCheckNamespaces False)) "ignore namespaces (default)" , Option "r" [a_remove_whitespace] (NoArg (withRemoveWS True)) "remove redundant whitespace, simplifies tree and processing" , Option "" [a_do_not_remove_whitespace] (NoArg (withRemoveWS False)) "don't remove redundant whitespace (default)" ] where withMT = withAcceptedMimeTypes . words . map (\ x -> if x == ',' then ' ' else x) trc = withTrace . max 0 . min 9 . (read :: String -> Int) . ('0':) . filter (`elem` "0123456789") . fromMaybe v_1 -- | -- commonly useful options for XML output -- -- defines options: 'a_indent', 'a_output_encoding', 'a_output_html' and others outputOptions :: [OptDescr SysConfig] outputOptions = [ Option "f" [a_output_file] (ReqArg (withSysAttr a_output_file) "FILE") "output file for resulting document (default: stdout)" , Option "i" [a_indent] (NoArg (withIndent True)) "indent XML output for readability" , Option "o" [a_output_encoding] (ReqArg withOutputEncoding "CHARSET") ( "encoding of output (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ")" ) , Option "" [a_output_xml] (NoArg withOutputXML ) "output of none ASCII chars as HTMl entity references" , Option "" [a_output_html] (NoArg withOutputHTML ) "output of none ASCII chars as HTMl entity references" , Option "" [a_output_xhtml] (NoArg withOutputXHTML ) "output of HTML elements with empty content (script, ...) done in format
instead of " , Option "" [a_output_plain] (NoArg withOutputPLAIN ) "output of HTML elements with empty content (script, ...) done in format instead of " , Option "" [a_no_xml_pi] (NoArg (withXmlPi False)) ("output without processing instruction, useful in combination with --" ++ show a_output_html) , Option "" [a_no_empty_elem_for] (ReqArg (withNoEmptyElemFor . words') "NAMES") "output of empty elements done in format only for given list of element names" , Option "" [a_add_default_dtd] (NoArg (withAddDefaultDTD True)) "add the document type declaration given in the input document" , Option "" [a_text_mode] (NoArg (withTextMode True)) "output in text mode" ] where words' = words . map (\ c -> if c == ',' then ' ' else c) -- | -- commonly useful options -- -- defines options: 'a_verbose', 'a_help' generalOptions :: [OptDescr SysConfig] generalOptions = [ Option "v" [a_verbose] (NoArg (withSysAttr a_verbose v_1)) "verbose output" , Option "h?" [a_help] (NoArg (withSysAttr a_help v_1)) "this message" ] -- | -- defines 'a_version' option versionOptions :: [OptDescr SysConfig] versionOptions = [ Option "V" [a_version] (NoArg (withSysAttr a_version v_1)) "show program version" ] -- | -- debug output options showOptions :: [OptDescr SysConfig] showOptions = [ Option "" [a_show_tree] (NoArg (withShowTree True)) "output tree representation instead of document source" , Option "" [a_show_haskell] (NoArg (withShowHaskell True)) "output internal Haskell representation instead of document source" ] -- ------------------------------------------------------------ a_accept_mimetypes, a_add_default_dtd, a_canonicalize, a_check_namespaces, a_collect_errors, a_default_baseuri, a_do_not_canonicalize, a_do_not_check_namespaces, a_do_not_issue_errors, a_do_not_issue_warnings, a_do_not_preserve_comment, a_do_not_remove_whitespace, a_do_not_subst_dtd_entities, a_do_not_subst_html_entities, a_do_not_validate, a_error, a_error_log, a_help, a_if_modified_since, a_if_unmodified_since, a_ignore_encoding_errors, a_ignore_none_xml_contents, a_indent, a_issue_errors, a_issue_warnings, a_mime_types, a_no_empty_elements, a_no_empty_elem_for, a_no_redirect, a_no_xml_pi, a_output_file, a_output_xml, a_output_html, a_output_xhtml, a_output_plain, a_parse_by_mimetype, a_parse_html, a_parse_xml, a_preserve_comment, a_proxy, a_redirect, a_remove_whitespace, a_show_haskell, a_show_tree, a_strict_input, a_subst_dtd_entities, a_subst_html_entities, a_text_mode, a_trace, a_validate, a_verbose :: String a_accept_mimetypes = "accept-mimetypes" a_add_default_dtd = "add-default-dtd" a_canonicalize = "canonicalize" a_check_namespaces = "check-namespaces" a_collect_errors = "collect-errors" a_default_baseuri = "default-base-URI" a_do_not_canonicalize = "do-not-canonicalize" a_do_not_check_namespaces = "do-not-check-namespaces" a_do_not_issue_errors = "do-not-issue-errors" a_do_not_issue_warnings = "do-not-issue-warnings" a_do_not_preserve_comment = "do-not-preserve-comment" a_do_not_remove_whitespace = "do-not-remove-whitespace" a_do_not_subst_dtd_entities = "do-not-subst-dtd-entities" a_do_not_subst_html_entities = "do-not-subst-html-entities" a_do_not_validate = "do-not-validate" a_error = "error" a_error_log = "errorLog" a_help = "help" a_if_modified_since = "if-modified-since" a_if_unmodified_since = "if-unmodified-since" a_ignore_encoding_errors = "ignore-encoding-errors" a_ignore_none_xml_contents = "ignore-none-xml-contents" a_indent = "indent" a_issue_warnings = "issue-warnings" a_issue_errors = "issue-errors" a_mime_types = "mimetypes" a_no_empty_elements = "no-empty-elements" a_no_empty_elem_for = "no-empty-elem-for" a_no_redirect = "no-redirect" a_no_xml_pi = "no-xml-pi" a_output_file = "output-file" a_output_html = "output-html" a_output_xhtml = "output-xhtml" a_output_xml = "output-xml" a_output_plain = "output-plain" a_parse_by_mimetype = "parse-by-mimetype" a_parse_html = "parse-html" a_parse_xml = "parse-xml" a_preserve_comment = "preserve-comment" a_proxy = "proxy" a_redirect = "redirect" a_remove_whitespace = "remove-whitespace" a_show_haskell = "show-haskell" a_show_tree = "show-tree" a_strict_input = "strict-input" a_subst_dtd_entities = "subst-dtd-entities" a_subst_html_entities = "subst-html-entities" a_text_mode = "text-mode" a_trace = "trace" a_validate = "validate" a_verbose = "verbose" -- ------------------------------------------------------------ -- | -- select options from a predefined list of option desciptions selectOptions :: [String] -> [OptDescr a] -> [OptDescr a] selectOptions ol os = concat . map (\ on -> filter (\ (Option _ ons _ _) -> on `elem` ons) os) $ ol removeOptions :: [String] -> [OptDescr a] -> [OptDescr a] removeOptions ol os = filter (\ (Option _ ons _ _) -> not . any (`elem` ol) $ ons ) os -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState.hs0000644000000000000000000000675412465166667017005 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the interface for the basic state maipulation functions -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState ( -- * Data Types XIOState , XIOSysState , IOStateArrow , IOSArrow , SysConfig , SysConfigList , -- * User State Manipulation getUserState , setUserState , changeUserState , withExtendedUserState , withOtherUserState , withoutUserState , -- * Run IO State arrows runX , -- * Global System State Configuration and Access configSysVars , setSysAttr , unsetSysAttr , getSysAttr , getAllSysAttrs , setSysAttrString , setSysAttrInt , getSysAttrInt , getConfigAttr , -- * Error Handling clearErrStatus , setErrStatus , getErrStatus , setErrMsgStatus , setErrorMsgHandler , errorMsgStderr , errorMsgCollect , errorMsgStderrAndCollect , errorMsgIgnore , getErrorMessages , filterErrorMsg , issueWarn , issueErr , issueFatal , issueExc , setDocumentStatus , setDocumentStatusFromSystemState , documentStatusOk , -- * Tracing setTraceLevel , getTraceLevel , withTraceLevel , setTraceCmd , getTraceCmd , trace , traceMsg , traceValue , traceString , traceSource , traceTree , traceDoc , -- * Document Base setBaseURI , getBaseURI , changeBaseURI , setDefaultBaseURI , getDefaultBaseURI , runInLocalURIContext , -- * URI Manipulation expandURIString , expandURI , mkAbsURI , getFragmentFromURI , getPathFromURI , getPortFromURI , getQueryFromURI , getRegNameFromURI , getSchemeFromURI , getUserInfoFromURI , -- * Mime Type Handling getMimeTypeTable , setMimeTypeTable , setMimeTypeTableFromFile , -- * System Configuration and Options yes , no , withAcceptedMimeTypes , withAddDefaultDTD , withSysAttr , withCanonicalize , withCompression , withCheckNamespaces , withDefaultBaseURI , withStrictDeserialize , withEncodingErrors , withErrors , withFileMimeType , withIgnoreNoneXmlContents , withIndent , withInputEncoding , withInputOption , withInputOptions , withMimeTypeFile , withMimeTypeHandler , withNoEmptyElemFor , withXmlPi , withOutputEncoding , withOutputXML , withOutputHTML , withOutputXHTML , withOutputPLAIN , withParseByMimeType , withParseHTML , withPreserveComment , withProxy , withRedirect , withRemoveWS , withShowHaskell , withShowTree , withStrictInput , withSubstDTDEntities , withSubstHTMLEntities , withTextMode , withTrace , withValidate , withWarnings ) where import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.MimeTypeTable import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow import Text.XML.HXT.Arrow.XmlState.SystemConfig import Text.XML.HXT.Arrow.XmlState.TraceHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.URIHandling -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/Binary.hs0000644000000000000000000000616412465166667016463 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Binary Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable De-/Serialisation arrows for XmlTrees and other arbitrary values with a Binary instance -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Binary ( readBinaryValue , writeBinaryValue ) where import Control.Arrow () import Control.Arrow.ArrowExc import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Data.Binary import qualified Data.ByteString.Lazy as B import System.IO (IOMode (..), hClose, openBinaryFile) import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ readBinaryValue :: (Binary a) => String -> IOStateArrow s b a readBinaryValue file = (uncurry $ decodeBinaryValue file) $< getSysVar ( theStrictDeserialize .&&&. theBinaryDeCompression ) -- | Read a serialied value from a file, optionally decompress it and decode the value -- In case of an error, the error message is issued and the arrow fails decodeBinaryValue :: (Binary a) => String -> Bool -> DeCompressionFct -> IOStateArrow s b a decodeBinaryValue file strict decompress = arrIO0 dec `catchA` issueExc "readBinaryValue" where dec = ( if strict then readItAll else B.readFile file ) >>= return . decode . decompress readItAll = do h <- openBinaryFile file ReadMode c <- B.hGetContents h B.length c `seq` do hClose h return c -- hack: force reading whole file and close it immediately -- | Serialize a value, optionally compress it, and write it to a file. -- In case of an error, the error message is issued and the arrow fails writeBinaryValue :: (Binary a) => String -> IOStateArrow s a () writeBinaryValue file = flip encodeBinaryValue file $< getSysVar theBinaryCompression encodeBinaryValue :: (Binary a) => CompressionFct -> String -> IOStateArrow s a () encodeBinaryValue compress file = arrIO enc `catchA` issueExc "writeBinaryXmlTree" where enc = B.writeFile file . compress . encode -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/ReadDocument.hs0000644000000000000000000005110612465166667017605 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ReadDocument Copyright : Copyright (C) 2005-2013 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable Compound arrows for reading an XML\/HTML document or an XML\/HTML string -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ReadDocument ( readDocument , readFromDocument , readString , readFromString , hread , hreadDoc , xread , xreadDoc ) where import Control.Arrow.ListArrows import Data.Maybe ( fromMaybe ) import qualified Data.Map as M import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.Edit ( canonicalizeAllNodes , canonicalizeForXPath , canonicalizeContents , rememberDTDAttrl , removeDocWhiteSpace ) import qualified Text.XML.HXT.Arrow.ParserInterface as PI import Text.XML.HXT.Arrow.ProcessDocument ( getDocumentContents , parseXmlDocument , parseXmlDocumentWithExpat , parseHtmlDocument , propagateAndValidateNamespaces , andValidateNamespaces ) import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- {- | the main document input filter this filter can be configured by a list of configuration options, a value of type 'Text.XML.HXT.XmlState.TypeDefs.SysConfig' for all available options see module 'Text.XML.HXT.Arrow.XmlState.SystemConfig' - @withValidate yes\/no@ : switch on\/off DTD validation. Only for XML parsed documents, not for HTML parsing. - @withSubstDTDEntities yes\/no@ : switch on\/off entity substitution for general entities defined in DTD validation. Default is @yes@. Switching this option and the validation off can lead to faster parsing, in that case reading the DTD documents is not longer necessary. Only used with XML parsed documents, not with HTML parsing. - @withSubstHTMLEntities yes\/no@ : switch on\/off entity substitution for general entities defined in HTML validation. Default is @no@. Switching this option on and the validation and substDTDEntities off can lead to faster parsing, in that case reading the DTD documents is not longer necessary, HTML general entities are still substituted. Only used with XML parsed documents, not with HTML parsing. - @withParseHTML yes\/no@ : switch on HTML parsing. - @withParseByMimeType yes\/no@ : select XML\/HTML parser by document mime type. text\/xml and text\/xhtml are parsed as XML, text\/html as HTML. - @withCheckNamespaces yes\/no@ : Switch on\/off namespace propagation and checking - @withInputEncoding \@ : Set default encoding. - @withTagSoup@ : use light weight and lazy parser based on tagsoup lib. This is only available when package hxt-tagsoup is installed and the source contains an @import Text.XML.HXT.TagSoup@. - @withRelaxNG \@ : validate document with Relax NG, the parameter is for the schema URI. This implies using XML parser, no validation against DTD, and canonicalisation. - @withCurl [\...]@ : Use the libCurl binding for HTTP access. This is only available when package hxt-curl is installed and the source contains an @import Text.XML.HXT.Curl@. - @withHTTP [\...]@ : Use the Haskell HTTP package for HTTP access. This is only available when package hxt-http is installed and the source contains an @import Text.XML.HXT.HTTP@. examples: > readDocument [] "test.xml" reads and validates a document \"test.xml\", no namespace propagation, only canonicalization is performed > ... > import Text.XML.HXT.Curl > ... > > readDocument [ withValidate no > , withInputEncoding isoLatin1 > , withParseByMimeType yes > , withCurl [] > ] "http://localhost/test.php" reads document \"test.php\", parses it as HTML or XML depending on the mimetype given from the server, but without validation, default encoding 'isoLatin1'. HTTP access is done via libCurl. > readDocument [ withParseHTML yes > , withInputEncoding isoLatin1 > ] "" reads a HTML document from standard input, no validation is done when parsing HTML, default encoding is 'isoLatin1', > readDocument [ withInputEncoding isoLatin1 > , withValidate no > , withMimeTypeFile "/etc/mime.types" > , withStrictInput yes > ] "test.svg" reads an SVG document from \"test.svg\", sets the mime type by looking in the system mimetype config file, default encoding is 'isoLatin1', > ... > import Text.XML.HXT.Curl > import Text.XML.HXT.TagSoup > ... > > readDocument [ withParseHTML yes > , withTagSoup > , withProxy "www-cache:3128" > , withCurl [] > , withWarnings no > ] "http://www.haskell.org/" reads Haskell homepage with HTML parser, ignoring any warnings (at the time of writing, there were some HTML errors), with http access via libCurl interface and proxy \"www-cache\" at port 3128, parsing is done with tagsoup HTML parser. This requires packages \"hxt-curl\" and \"hxt-tagsoup\" to be installed > readDocument [ withValidate yes > , withCheckNamespaces yes > , withRemoveWS yes > , withTrace 2 > , withHTTP [] > ] "http://www.w3c.org/" read w3c home page (xhtml), validate and check namespaces, remove whitespace between tags, trace activities with level 2. HTTP access is done with Haskell HTTP package > readDocument [ withValidate no > , withSubstDTDEntities no > ... > ] "http://www.w3c.org/" read w3c home page (xhtml), but without accessing the DTD given in that document. Only the predefined XML general entity refs are substituted. > readDocument [ withValidate no > , withSubstDTDEntities no > , withSubstHTMLEntities yes > ... > ] "http://www.w3c.org/" same as above, but with substituion of all general entity refs defined in XHTML. for minimal complete examples see 'Text.XML.HXT.Arrow.WriteDocument.writeDocument' and 'runX', the main starting point for running an XML arrow. -} readDocument :: SysConfigList -> String -> IOStateArrow s b XmlTree readDocument config src = localSysEnv $ readDocument' config src readDocument' :: SysConfigList -> String -> IOStateArrow s b XmlTree readDocument' config src = configSysVars config >>> readD $< getSysVar theWithCache where readD True = constA undefined -- just for generalizing the signature to: IOStateArrow s b XmlTree >>> -- instead of IOStateArrow s XmlTree XmlTree (withoutUserState $< (getSysVar theCacheRead >>^ ($ src))) readD False = readDocument'' src readDocument'' :: String -> IOStateArrow s b XmlTree readDocument'' src = getDocumentContents src >>> ( processDoc $<< ( getMimeType &&& getSysVar (theParseByMimeType .&&&. theParseHTML .&&&. theAcceptedMimeTypes .&&&. theRelaxValidate .&&&. theXmlSchemaValidate ) ) ) >>> traceMsg 1 ("readDocument: " ++ show src ++ " processed") >>> traceSource >>> traceTree where processNoneEmptyDoc p = ifA (fromLA hasEmptyBody) (replaceChildren none) p where hasEmptyBody = hasAttrValue transferStatus (/= "200") -- test on empty response body for not o.k. responses `guards` -- e.g. 3xx status values ( neg getChildren <+> ( getChildren >>> isWhiteSpace ) ) getMimeType = getAttrValue transferMimeType >>^ stringToLower applyMimeTypeHandler mt = withoutUserState (applyMTH $< getSysVar theMimeTypeHandlers) where applyMTH mtTable = fromMaybe none $ fmap (\ f -> processNoneEmptyDoc (traceMimeStart >>> f >>> traceMimeEnd) ) $ M.lookup mt mtTable traceMimeStart = traceMsg 2 $ "readDocument: calling user defined document parser" traceMimeEnd = traceMsg 2 $ "readDocument: user defined document parser finished" processDoc mimeType options = traceMsg 1 (unwords [ "readDocument:", show src , "(mime type:", show mimeType, ") will be processed" ] ) >>> ( applyMimeTypeHandler mimeType -- try user defined document handlers `orElse` processDoc' mimeType options ) processDoc' mimeType ( parseByMimeType , ( parseHtml , ( acceptedMimeTypes , ( validateWithRelax , validateWithXmlSchema )))) = ( if isAcceptedMimeType acceptedMimeTypes mimeType then ( processNoneEmptyDoc ( ( parse $< getSysVar (theValidate .&&&. theSubstDTDEntities .&&&. theSubstHTMLEntities .&&&. theIgnoreNoneXmlContents .&&&. theTagSoup .&&&. theExpat ) ) >>> ( if isXmlOrHtml then ( ( checknamespaces $< getSysVar (theCheckNamespaces .&&&. theTagSoup ) ) >>> rememberDTDAttrl >>> ( canonicalize $< getSysVar (thePreserveComment .&&&. theCanonicalize .&&&. theTagSoup ) ) >>> ( whitespace $< getSysVar (theRemoveWS .&&&. theTagSoup ) ) >>> relaxOrXmlSchema ) else this ) ) ) else ( traceMsg 1 (unwords [ "readDocument:", show src , "mime type:", show mimeType, "not accepted"]) >>> replaceChildren none -- remove contents of not accepted mimetype ) ) where isAcceptedMimeType :: [String] -> String -> Bool isAcceptedMimeType mts mt | null mts || null mt = True | otherwise = foldr (matchMt mt') False $ mts' where mt' = parseMt mt mts' = map parseMt $ mts parseMt = break (== '/') >>> second (drop 1) matchMt (ma,mi) (mas,mis) r = ( (ma == mas || mas == "*") && (mi == mis || mis == "*") ) || r parse ( validate , ( substDTD , ( substHTML , ( removeNoneXml , ( withTagSoup' , withExpat' ))))) | not isXmlOrHtml = if removeNoneXml then replaceChildren none -- don't parse, if mime type is not XML nor HTML else this -- but remove contents when option is set | isHtml || withTagSoup' = configSysVar (setS theLowerCaseNames isHtml) >>> parseHtmlDocument -- parse as HTML or with tagsoup XML | isXml = if withExpat' then parseXmlDocumentWithExpat else parseXmlDocument validate substDTD substHTML validateWithRelax -- parse as XML | otherwise = this -- suppress warning checknamespaces (withNamespaces, withTagSoup') | withNamespaces && withTagSoup' = andValidateNamespaces -- propagation is done in tagsoup | withNamespaces || validateWithRelax || validateWithXmlSchema = propagateAndValidateNamespaces -- RelaxNG and XML Schema require correct namespaces | otherwise = this canonicalize (preserveCmt, (canonicalize', withTagSoup')) | withTagSoup' = this -- tagsoup already removes redundant stuff | validateWithRelax || validateWithXmlSchema = canonicalizeAllNodes -- no comments in schema validation | canonicalize' && preserveCmt = canonicalizeForXPath | canonicalize' = canonicalizeAllNodes | otherwise = this relaxOrXmlSchema | validateWithXmlSchema = withoutUserState $< getSysVar theXmlSchemaValidator | validateWithRelax = withoutUserState $< getSysVar theRelaxValidator | otherwise = this whitespace (removeWS, withTagSoup') | ( removeWS || validateWithXmlSchema -- XML Schema does not like WS ) && not withTagSoup' = removeDocWhiteSpace -- tagsoup already removes whitespace | otherwise = this isHtml = ( not parseByMimeType && parseHtml ) -- force HTML || ( parseByMimeType && isHtmlMimeType mimeType ) isXml = ( not parseByMimeType && not parseHtml ) || ( parseByMimeType && ( isXmlMimeType mimeType || null mimeType ) -- mime type is XML or not known ) isXmlOrHtml = isHtml || isXml -- ------------------------------------------------------------ -- | -- the arrow version of 'readDocument', the arrow input is the source URI readFromDocument :: SysConfigList -> IOStateArrow s String XmlTree readFromDocument config = applyA ( arr $ readDocument config ) -- ------------------------------------------------------------ -- | -- read a document that is stored in a normal Haskell String -- -- the same function as readDocument, but the parameter forms the input. -- All options available for 'readDocument' are applicable for readString, -- except input encoding options. -- -- Encoding: No decoding is done, the String argument is taken as Unicode string -- All decoding must be done before calling readString, even if the -- XML document contains an encoding spec. readString :: SysConfigList -> String -> IOStateArrow s b XmlTree readString config content = readDocument config (stringProtocol ++ content) -- ------------------------------------------------------------ -- | -- the arrow version of 'readString', the arrow input is the source URI readFromString :: SysConfigList -> IOStateArrow s String XmlTree readFromString config = applyA ( arr $ readString config ) -- ------------------------------------------------------------ -- | -- parse a string as HTML content, substitute all HTML entity refs and canonicalize tree. -- (substitute char refs, ...). Errors are ignored. -- -- This arrow delegates all work to the parseHtmlContent parser in module HtmlParser. -- -- This is a simpler version of 'readFromString' without any options, -- but it does not run in the IO monad. hread :: ArrowXml a => a String XmlTree hread = fromLA $ PI.hread -- substHtmlEntityRefs is done in parser >>> -- as well as subst HTML char refs editNTreeA [isError :-> none] -- ignores all errors >>> canonicalizeContents -- combine text nodes, substitute char refs -- comments are not removed -- | like hread, but accepts a whole document, not a HTML content hreadDoc :: ArrowXml a => a String XmlTree hreadDoc = fromLA $ root [] [PI.hreadDoc] -- substHtmlEntityRefs is done in parser >>> -- as well as subst HTML char refs editNTreeA [isError :-> none] -- ignores all errors >>> canonicalizeForXPath -- remove DTD spec and text in content of root node -- and do a canonicalizeContents >>> getChildren -- ------------------------------------------------------------ -- | -- parse a string as XML CONTENT, (no xml decl or doctype decls are allowed), -- substitute all predefined XML entity refs and canonicalize tree -- This xread arrow delegates all work to the xread parser function in module XmlParsec xread :: ArrowXml a => a String XmlTree xread = PI.xreadCont -- | -- a more general version of xread which -- parses a whole document including a prolog -- (xml decl, doctype decl) and processing -- instructions. Doctype decls remain uninterpreted, -- but are in the list of results trees. xreadDoc :: ArrowXml a => a String XmlTree xreadDoc = PI.xreadDoc {- -- the old version, where the parser does not subst char refs and cdata xread = root [] [parseXmlContent] -- substXmlEntityRefs is done in parser >>> canonicalizeContents >>> getChildren -- -} -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/ParserInterface.hs0000644000000000000000000000555212465166667020314 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ParserInterface Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable interface to the HXT XML and DTD parsers -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ParserInterface ( module Text.XML.HXT.Arrow.ParserInterface ) where import Control.Arrow.ArrowList import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import qualified Text.XML.HXT.Parser.HtmlParsec as HP import qualified Text.XML.HXT.Parser.XmlParsec as XP import qualified Text.XML.HXT.Parser.XmlDTDParser as DP -- ------------------------------------------------------------ parseXmlDoc :: ArrowXml a => a (String, String) XmlTree parseXmlDoc = arr2L XP.parseXmlDocument parseXmlDTDPart :: ArrowXml a => a (String, XmlTree) XmlTree parseXmlDTDPart = arr2L XP.parseXmlDTDPart xreadCont :: ArrowXml a => a String XmlTree xreadCont = arrL XP.xread xreadDoc :: ArrowXml a => a String XmlTree xreadDoc = arrL XP.xreadDoc parseXmlEntityEncodingSpec , parseXmlDocEncodingSpec , removeEncodingSpec :: ArrowXml a => a XmlTree XmlTree parseXmlDocEncodingSpec = arrL XP.parseXmlDocEncodingSpec parseXmlEntityEncodingSpec = arrL XP.parseXmlEntityEncodingSpec removeEncodingSpec = arrL XP.removeEncodingSpec parseXmlDTDdeclPart :: ArrowXml a => a XmlTree XmlTree parseXmlDTDdeclPart = arrL DP.parseXmlDTDdeclPart parseXmlDTDdecl :: ArrowXml a => a XmlTree XmlTree parseXmlDTDdecl = arrL DP.parseXmlDTDdecl parseXmlDTDEntityValue :: ArrowXml a => a XmlTree XmlTree parseXmlDTDEntityValue = arrL DP.parseXmlDTDEntityValue parseXmlEntityValueAsContent :: ArrowXml a => String -> a XmlTree XmlTree parseXmlEntityValueAsContent = arrL . XP.parseXmlEntityValueAsContent parseXmlEntityValueAsAttrValue :: ArrowXml a => String -> a XmlTree XmlTree parseXmlEntityValueAsAttrValue = arrL . XP.parseXmlEntityValueAsAttrValue -- ------------------------------------------------------------ parseHtmlDoc :: ArrowList a => a (String, String) XmlTree parseHtmlDoc = arr2L HP.parseHtmlDocument hread :: ArrowList a => a String XmlTree hread = arrL HP.parseHtmlContent hreadDoc :: ArrowList a => a String XmlTree hreadDoc = arrL $ HP.parseHtmlDocument "string" -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlRegex.hs0000644000000000000000000003236312465166667016772 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlRegex Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Regular Expression Matcher working on lists of XmlTrees It's intended to import this module with an explicit import declaration for not spoiling the namespace with these somewhat special arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlRegex ( XmlRegex , mkZero , mkUnit , mkPrim , mkPrim' , mkPrimA , mkDot , mkStar , mkAlt , mkAlts , mkSeq , mkSeqs , mkRep , mkRng , mkOpt , mkPerm , mkPerms , mkMerge , nullable , delta , matchXmlRegex , splitXmlRegex , scanXmlRegex , matchRegexA , splitRegexA , scanRegexA ) where import Control.Arrow.ListArrows import Data.Maybe import Text.XML.HXT.DOM.Interface import Text.XML.HXT.DOM.ShowXml (xshow) -- ------------------------------------------------------------ -- the exported regex arrows -- | check whether a sequence of XmlTrees match an Xml regular expression -- -- The arrow for 'matchXmlRegex'. -- -- The expession is build up from simple arrows acting as predicate ('mkPrimA') for -- an XmlTree and of the usual cobinators for sequence ('mkSeq'), repetition -- ('mkStar', mkRep', 'mkRng') and choice ('mkAlt', 'mkOpt') matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees matchRegexA re ts = ts >>. (\ s -> maybe [s] (const []) . matchXmlRegex re $ s) -- | split the sequence of trees computed by the filter a into -- -- The arrow for 'splitXmlRegex'. -- -- a first part matching the regex and a rest, -- if a prefix of the input sequence does not match the regex, the arrow fails -- else the pair containing the result lists is returned splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees) splitRegexA re ts = ts >>. (maybeToList . splitXmlRegex re) -- | scan the input sequence with a regex and give the result as a list of lists of trees back -- the regex must at least match one input tree, so the empty sequence should not match the regex -- -- The arrow for 'scanXmlRegex'. scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees scanRegexA re ts = ts >>. (fromMaybe [] . scanXmlRegex re) -- ------------------------------------------------------------ data XmlRegex = Zero String | Unit | Sym (XmlTree -> Bool) String -- optional external repr. of predicate | Dot | Star XmlRegex | Alt XmlRegex XmlRegex | Seq XmlRegex XmlRegex | Rep Int XmlRegex -- 1 or more repetitions | Rng Int Int XmlRegex -- n..m repetitions | Perm XmlRegex XmlRegex | Merge XmlRegex XmlRegex -- ------------------------------------------------------------ {- just for documentation class Inv a where inv :: a -> Bool instance Inv XmlRegex where inv (Zero _) = True inv Unit = True inv (Sym p _) = p holds for some XmlTrees inv Dot = True inv (Star e) = inv e inv (Alt e1 e2) = inv e1 && inv e2 inv (Seq e1 e2) = inv e1 && inv e2 inv (Rep i e) = i > 0 && inv e inv (Rng i j e) = (i < j || (i == j && i > 1)) && inv e inv (Perm e1 e2) = inv e1 && inv e2 -} -- ------------------------------------------------------------ -- -- smart constructors mkZero :: String -> XmlRegex mkZero = Zero mkUnit :: XmlRegex mkUnit = Unit mkPrim :: (XmlTree -> Bool) -> XmlRegex mkPrim p = Sym p "" mkPrim' :: (XmlTree -> Bool) -> String -> XmlRegex mkPrim' = Sym mkPrimA :: LA XmlTree XmlTree -> XmlRegex mkPrimA a = mkPrim (not . null . runLA a) mkDot :: XmlRegex mkDot = Dot mkStar :: XmlRegex -> XmlRegex mkStar (Zero _) = mkUnit -- {}* == () mkStar e@Unit = e -- ()* == () mkStar e@(Star _e1) = e -- (r*)* == r* mkStar (Rep 1 e1) = mkStar e1 -- (r+)* == r* mkStar e@(Alt _ _) = Star (rmStar e) -- (a*|b)* == (a|b)* mkStar e = Star e rmStar :: XmlRegex -> XmlRegex rmStar (Alt e1 e2) = mkAlt (rmStar e1) (rmStar e2) rmStar (Star e1) = rmStar e1 rmStar (Rep 1 e1) = rmStar e1 rmStar e1 = e1 mkAlt :: XmlRegex -> XmlRegex -> XmlRegex mkAlt e1 (Zero _) = e1 -- e1 u {} = e1 mkAlt (Zero _) e2 = e2 -- {} u e2 = e2 mkAlt e1@(Star Dot) _e2 = e1 -- A* u e1 = A* mkAlt _e1 e2@(Star Dot) = e2 -- e1 u A* = A* mkAlt (Sym p1 e1) (Sym p2 e2) = mkPrim' (\ x -> p1 x || p2 x) (e e1 e2) -- melting of predicates where e "" x2 = x2 e x1 "" = x1 e x1 x2 = x1 ++ "|" ++ x2 mkAlt e1 e2@(Sym _ _) = mkAlt e2 e1 -- symmetry: predicates always first mkAlt e1@(Sym _ _) (Alt e2@(Sym _ _) e3) = mkAlt (mkAlt e1 e2) e3 -- prepare melting of predicates mkAlt (Alt e1 e2) e3 = mkAlt e1 (mkAlt e2 e3) -- associativity mkAlt e1 e2 = Alt e1 e2 mkAlts :: [XmlRegex] -> XmlRegex mkAlts = foldr mkAlt (mkZero "") mkSeq :: XmlRegex -> XmlRegex -> XmlRegex mkSeq e1@(Zero _) _e2 = e1 mkSeq _e1 e2@(Zero _) = e2 mkSeq Unit e2 = e2 mkSeq e1 Unit = e1 mkSeq (Seq e1 e2) e3 = mkSeq e1 (mkSeq e2 e3) mkSeq e1 e2 = Seq e1 e2 mkSeqs :: [XmlRegex] -> XmlRegex mkSeqs = foldr mkSeq mkUnit mkRep :: Int -> XmlRegex -> XmlRegex mkRep 0 e = mkStar e mkRep _ e@(Zero _) = e mkRep _ e@Unit = e mkRep i e = Rep i e mkRng :: Int -> Int -> XmlRegex -> XmlRegex mkRng 0 0 _e = mkUnit mkRng 1 1 e = e mkRng lb ub _e | lb > ub = Zero $ "illegal range " ++ show lb ++ ".." ++ show ub mkRng _l _u e@(Zero _) = e mkRng _l _u e@Unit = e mkRng lb ub e = Rng lb ub e mkOpt :: XmlRegex -> XmlRegex mkOpt = mkRng 0 1 mkPerm :: XmlRegex -> XmlRegex -> XmlRegex mkPerm e1@(Zero _) _ = e1 mkPerm _ e2@(Zero _) = e2 mkPerm Unit e2 = e2 mkPerm e1 Unit = e1 mkPerm e1 e2 = Perm e1 e2 mkPerms :: [XmlRegex] -> XmlRegex mkPerms = foldr mkPerm mkUnit mkMerge :: XmlRegex -> XmlRegex -> XmlRegex mkMerge e1@(Zero _) _ = e1 mkMerge _ e2@(Zero _) = e2 mkMerge Unit e2 = e2 mkMerge e1 Unit = e1 mkMerge e1 e2 = Merge e1 e2 -- ------------------------------------------------------------ instance Show XmlRegex where show (Zero s) = "{err:" ++ s ++ "}" show Unit = "()" show (Sym _p "") = "" show (Sym _p r ) = r show Dot = "." show (Star e) = "(" ++ show e ++ ")*" show (Alt e1 e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")" show (Seq e1 e2) = show e1 ++ show e2 show (Rep 1 e) = "(" ++ show e ++ ")+" show (Rep i e) = "(" ++ show e ++ "){" ++ show i ++ ",}" show (Rng 0 1 e) = "(" ++ show e ++ ")?" show (Rng i j e) = "(" ++ show e ++ "){" ++ show i ++ "," ++ show j ++ "}" show (Perm e1 e2) = "(" ++ show e1 ++ show e2 ++ "|" ++ show e2 ++ show e1 ++ ")" show (Merge e1 e2) = "(" ++ show e1 ++ "&" ++ show e2 ++ ")" -- ------------------------------------------------------------ unexpected :: XmlTree -> String -> String unexpected t e = emsg e ++ (cut 80 . xshow) [t] where emsg "" = "unexpected: " emsg s = "expected: " ++ s ++ ", but got: " cut n s | null rest = s' | otherwise = s' ++ "..." where (s', rest) = splitAt n s -- ------------------------------------------------------------ nullable :: XmlRegex -> Bool nullable (Zero _) = False nullable Unit = True nullable (Sym _p _) = False -- assumption: p holds for at least one tree nullable Dot = False nullable (Star _) = True nullable (Alt e1 e2) = nullable e1 || nullable e2 nullable (Seq e1 e2) = nullable e1 && nullable e2 nullable (Rep _i e) = nullable e nullable (Rng i _ e) = i == 0 || nullable e nullable (Perm e1 e2) = nullable e1 && nullable e2 nullable (Merge e1 e2) = nullable e1 && nullable e2 -- ------------------------------------------------------------ delta :: XmlRegex -> XmlTree -> XmlRegex delta e@(Zero _) _ = e delta Unit c = mkZero $ unexpected c "" delta (Sym p e) c | p c = mkUnit | otherwise = mkZero $ unexpected c e delta Dot _ = mkUnit delta e@(Star e1) c = mkSeq (delta e1 c) e delta (Alt e1 e2) c = mkAlt (delta e1 c) (delta e2 c) delta (Seq e1 e2) c | nullable e1 = mkAlt (mkSeq (delta e1 c) e2) (delta e2 c) | otherwise = mkSeq (delta e1 c) e2 delta (Rep i e) c = mkSeq (delta e c) (mkRep (i-1) e) delta (Rng i j e) c = mkSeq (delta e c) (mkRng ((i-1) `max` 0) (j-1) e) delta (Perm e1 e2) c = case e1' of (Zero _) -> mkPerm e1 (delta e2 c) _ -> mkPerm e1' e2 where e1' = delta e1 c delta (Merge e1 e2) c = mkAlt (mkMerge (delta e1 c) e2) (mkMerge e1 (delta e2 c)) -- ------------------------------------------------------------ delta' :: XmlRegex -> XmlTrees -> XmlRegex delta' = foldl delta -- | match a sequence of XML trees with a regular expression over trees -- -- If the input matches, the result is Nothing, else Just an error message is returned matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String matchXmlRegex e = res . delta' e where res (Zero er) = Just er res re | nullable re = Nothing -- o.k. | otherwise = Just $ "input does not match " ++ show e -- ------------------------------------------------------------ -- | split a sequence of XML trees into a pair of a a matching prefix and a rest -- -- If there is no matching prefix, Nothing is returned splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees) splitXmlRegex re = splitXmlRegex' re [] splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees) splitXmlRegex' re res [] | nullable re = Just (reverse res, []) | otherwise = Nothing splitXmlRegex' (Zero _) _ _ = Nothing splitXmlRegex' re res xs@(x:xs') | isJust res' = res' | nullable re = Just (reverse res, xs) | otherwise = Nothing where re' = delta re x res' = splitXmlRegex' re' (x:res) xs' -- ------------------------------------------------------------ -- | scan a sequence of XML trees and split it into parts matching the given regex -- -- If the parts cannot be split because of a missing match, or because of the -- empty sequence as match, Nothing is returned scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees] scanXmlRegex re ts = scanXmlRegex' re (splitXmlRegex re ts) scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees] scanXmlRegex' _ Nothing = Nothing scanXmlRegex' _ (Just (rs, [])) = Just [rs] scanXmlRegex' _ (Just ([], _)) = Nothing -- re is nullable (the empty word matches), nothing split off -- would give infinite list of empty lists scanXmlRegex' re (Just (rs, rest)) | isNothing res = Nothing | otherwise = Just (rs : fromJust res) where res = scanXmlRegex' re (splitXmlRegex re rest) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/DTDProcessing.hs0000644000000000000000000004451412465166667017710 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DTDProcessing Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable DTD processing function for including external parts of a DTD parameter entity substitution and general entity substitution Implemtation completely done with arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DTDProcessing ( processDTD ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.XmlNode as XN import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.ParserInterface ( parseXmlDTDdecl , parseXmlDTDdeclPart , parseXmlDTDEntityValue , parseXmlDTDPart ) import Text.XML.HXT.Arrow.Edit ( transfCharRef ) import Text.XML.HXT.Arrow.DocumentInput ( getXmlEntityContents ) import Data.Maybe import qualified Data.Map as M ( Map , empty , lookup , insert ) -- ------------------------------------------------------------ -- data DTDPart = Internal | External deriving (Eq) type RecList = [String] type DTDStateArrow b c = IOStateArrow PEEnv b c -- ------------------------------------------------------------ newtype PEEnv = PEEnv (M.Map String XmlTree) emptyPeEnv :: PEEnv emptyPeEnv = PEEnv M.empty lookupPeEnv :: String -> PEEnv -> Maybe XmlTree lookupPeEnv k (PEEnv env) = M.lookup k env addPeEntry :: String -> XmlTree -> PEEnv -> PEEnv addPeEntry k a (PEEnv env) = PEEnv $ M.insert k a env getPeValue :: DTDStateArrow String XmlTree getPeValue = (this &&& getUserState) >>> arrL (\ (n, env) -> maybeToList . lookupPeEnv n $ env) addPe :: String -> DTDStateArrow XmlTree XmlTree addPe n = traceMsg 2 ("substParamEntity: add entity " ++ show n ++ " to env") >>> changeUserState ins where ins t peEnv = addPeEntry n t peEnv -- ------------------------------------------------------------ -- | -- a filter for DTD processing -- -- inclusion of external parts of DTD, -- parameter entity substitution -- conditional section evaluation -- -- input tree must represent a complete document including root node processDTD :: IOStateArrow s XmlTree XmlTree processDTD = runInLocalURIContext ( processRoot >>> traceTree >>> traceSource ) `when` ( isRoot >>> getChildren ) where processRoot :: IOStateArrow s XmlTree XmlTree processRoot = ( traceMsg 1 ("processDTD: process parameter entities") >>> setSysAttrString a_standalone "" >>> processChildren substParamEntities >>> setDocumentStatusFromSystemState "in XML DTD processing" >>> traceMsg 1 ("processDTD: parameter entities processed") ) `when` documentStatusOk substParamEntities :: IOStateArrow s XmlTree XmlTree substParamEntities = withOtherUserState emptyPeEnv processParamEntities `when` isDTDDoctype where processParamEntities :: DTDStateArrow XmlTree XmlTree processParamEntities = mergeEntities $<<< ( listA processPredef &&& listA processInt &&& listA (runInLocalURIContext processExt) ) where mergeEntities dtdPre dtdInt dtdExt = replaceChildren (arrL $ const $ foldl1 mergeDTDs [dtdPre, dtdInt, dtdExt]) processPredef = predefDTDPart >>> substParamEntity Internal [] processInt = getChildren >>> substParamEntity Internal [] processExt = externalDTDPart >>> substParamEntity External [] mergeDTDs :: XmlTrees -> XmlTrees -> XmlTrees mergeDTDs dtdInt dtdExt = dtdInt ++ (filter (filterDTDNodes dtdInt) dtdExt) filterDTDNodes :: XmlTrees -> XmlTree -> Bool filterDTDNodes dtdPart t = not (any (filterDTDNode t) dtdPart) filterDTDNode :: XmlTree -> XmlTree -> Bool filterDTDNode t1 t2 = fromMaybe False $ do dp1 <- XN.getDTDPart t1 dp2 <- XN.getDTDPart t2 al1 <- XN.getDTDAttrl t1 al2 <- XN.getDTDAttrl t2 return ( dp1 == dp2 && ( dp1 `elem` [ELEMENT, NOTATION, ENTITY, ATTLIST] ) && ( lookup a_name al1 == lookup a_name al2 ) && ( dp1 /= ATTLIST || lookup a_value al1 == lookup a_value al2 ) ) substParamEntity :: DTDPart -> RecList -> DTDStateArrow XmlTree XmlTree substParamEntity loc recList = choiceA [ isDTDEntity :-> ( traceDTD "ENTITY declaration before DTD declaration parsing" >>> processChildren (substPeRefsInDTDdecl recList) >>> parseXmlDTDdecl >>> substPeRefsInEntityValue >>> traceDTD "ENTITY declaration after PE substitution" >>> processEntityDecl >>> traceDTD "ENTITY declaration after DTD declaration parsing" ) , ( isDTDElement <+> isDTDAttlist <+> isDTDNotation ) :-> ( traceDTD "DTD declaration before PE substitution" >>> processChildren (substPeRefsInDTDdecl recList) >>> parseXmlDTDdecl >>> traceDTD "DTD declaration after DTD declaration parsing" ) , isDTDPERef :-> substPeRefsInDTDpart recList , isDTDCondSect :-> ( if loc == Internal then issueErr "conditional sections in internal part of the DTD is not allowed" else evalCondSect $< getDTDAttrValue a_value ) , isCmt :-> none , this :-> this ] where processEntityDecl :: DTDStateArrow XmlTree XmlTree processEntityDecl = choiceA [ isDTDEntity :-> ( ifA (hasDTDAttr k_system) processExternalEntity processInternalEntity ) , isDTDPEntity :-> ( processParamEntity $< getDTDAttrValue a_name ) , this :-> none ] where processExternalEntity :: DTDStateArrow XmlTree XmlTree -- processing external entities is delayed until first usage processExternalEntity -- only the current base uri must be remembered = setDTDAttrValue a_url $< ( getDTDAttrValue k_system >>> mkAbsURI ) processInternalEntity :: DTDStateArrow XmlTree XmlTree processInternalEntity = this -- everything is already done in substPeRefsInEntityValue processParamEntity :: String -> DTDStateArrow XmlTree XmlTree processParamEntity peName = ifA (constA peName >>> getPeValue) ( issueWarn ("parameter entity " ++ show peName ++ " already defined") >>> none -- second def must be ignored ) ( ( ifA ( hasDTDAttr k_system ) -- is external param entity ? ( setDTDAttrValue a_url $< -- store absolut url ( getDTDAttrValue k_system >>> mkAbsURI ) ) -- this is too early, pe may be not referenced and file may be not there -- ( runInLocalURIContext getExternalParamEntityValue ) ( this ) -- everything is already done in substPeRefsInEntityValue ) >>> addPe peName ) substPERef :: String -> DTDStateArrow XmlTree XmlTree substPERef pn = choiceA [ isUndefinedRef :-> issueErr ("parameter entity " ++ show pn ++ " not found (forward reference?)") , isInternalRef :-> issueErr ("a parameter entity reference of " ++ show pn ++ " occurs in the internal subset of the DTD") , isUnreadExternalRef :-> ( perform ( peVal -- load the external pe value >>> -- update the pe env getExternalParamEntityValue pn -- and try again >>> addPe pn ) >>> substPERef pn ) , this :-> substPE ] `when` isDTDPERef where peVal = constA pn >>> getPeValue isUnreadExternalRef = ( peVal >>> getDTDAttrValue a_url >>> isA (not . null) ) `guards` this isInternalRef = none -- isA (const (loc == Internal)) -- TODO: check this restriction, it seams rather meaningless isUndefinedRef = neg peVal substPE = replaceChildren (peVal >>> getChildren) -- store PE value in children component substPeRefsInEntityValue :: DTDStateArrow XmlTree XmlTree substPeRefsInEntityValue = ( ( replaceChildren ( xshow ( getChildren -- substitute char entites >>> -- and parameter references transfCharRef -- combine all pieces to a single string >>> -- as the new entity value substPeRefsInValue [] ) >>> mkText ) ) `whenNot` hasDTDAttr k_system -- only apply for internal entities ) `when` ( isDTDEntity <+> isDTDPEntity ) -- only apply for entity declarations substPeRefsInDTDpart :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInDTDpart rl = recursionCheck "DTD part" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInDTDdecl: before parseXmlDTDPart" >>> ( runInPeContext ( getChildren >>> ( (constA ("parameter entity: " ++ pn)) &&& this ) >>> parseXmlDTDPart >>> traceDTD "substPeRefsInDTDpart: after parseXmlDTDPart" >>> substParamEntity loc (pn : recl) ) `when` isDTDPERef ) substPeRefsInDTDdecl :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInDTDdecl rl = recursionCheck "DTD declaration" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInDTDdecl: before parseXmlDTDdeclPart" >>> ( runInPeContext ( parseXmlDTDdeclPart >>> traceDTD "substPeRefsInDTDdecl: after parseXmlDTDdeclPart" >>> processChildren ( substPeRefsInDTDdecl (pn : recl) ) ) `when` isDTDPERef ) substPeRefsInValue :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInValue rl = recursionCheck "entity value" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> parseXmlDTDEntityValue >>> -- transfCharRef this must be done somewhere else -- >>> substPeRefsInValue (pn : recl) substPeRefsInCondSect :: RecList -> DTDStateArrow XmlTree XmlTree substPeRefsInCondSect rl = recursionCheck "conditional section" rl subst where subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree subst recl pn = substPERef pn >>> traceDTD "substPeRefsInCondSect: parseXmlDTDdeclPart" >>> runInPeContext ( parseXmlDTDdeclPart >>> traceDTD "substPeRefsInCondSect: after parseXmlDTDdeclPart" >>> processChildren ( substPeRefsInCondSect (pn : recl) ) ) recursionCheck :: String -> RecList -> (RecList -> String -> DTDStateArrow XmlTree XmlTree) -> DTDStateArrow XmlTree XmlTree recursionCheck wher rl subst = ( recusiveSubst $< getDTDAttrValue a_peref ) `when` isDTDPERef where recusiveSubst name | name `elem` rl = issueErr ("recursive call of parameter entity " ++ show name ++ " in " ++ wher) | otherwise = subst rl name runInPeContext :: DTDStateArrow XmlTree XmlTree -> DTDStateArrow XmlTree XmlTree runInPeContext f = ( runWithNewBase $< getDTDAttrValue a_url ) `orElse` f where runWithNewBase base = runInLocalURIContext ( perform (constA base >>> setBaseURI) >>> f ) evalCondSect :: String -> DTDStateArrow XmlTree XmlTree evalCondSect content = traceDTD "evalCondSect: process conditional section" >>> processChildren (substPeRefsInCondSect []) >>> parseXmlDTDdecl >>> ( hasText (== k_include) `guards` ( ( constA "conditional section" &&& txt content ) >>> parseXmlDTDPart >>> traceMsg 2 "evalCond: include DTD part" >>> substParamEntity External recList ) ) predefDTDPart :: DTDStateArrow XmlTree XmlTree predefDTDPart = ( constA "predefined entities" &&& ( constA predefinedEntities >>> mkText) ) >>> parseXmlDTDPart where predefinedEntities :: String predefinedEntities = concat [ "" , "" , "" , "" , "" ] externalDTDPart :: DTDStateArrow XmlTree XmlTree externalDTDPart = isDTDDoctype `guards` ( hasDTDAttr k_system `guards` ( getExternalDTDPart $< getDTDAttrValue k_system ) ) getExternalDTDPart :: String -> DTDStateArrow XmlTree XmlTree getExternalDTDPart src = root [sattr a_source src] [] >>> getXmlEntityContents >>> replaceChildren ( ( constA src &&& getChildren ) >>> parseXmlDTDPart ) >>> traceDoc "processExternalDTD: parsing DTD part done" >>> getChildren getExternalParamEntityValue :: String -> DTDStateArrow XmlTree XmlTree getExternalParamEntityValue pn = isDTDPEntity `guards` ( setEntityValue $< ( listA ( getEntityValue $< getDTDAttrValue a_url ) ) ) where getEntityValue :: String -> DTDStateArrow XmlTree XmlTree getEntityValue url = root [sattr a_source url] [] >>> runInLocalURIContext getXmlEntityContents >>> traceMsg 2 ("getExternalParamEntityValue: contents read for " ++ show pn ++ " from " ++ show url) >>> getChildren setEntityValue :: XmlTrees -> DTDStateArrow XmlTree XmlTree setEntityValue res | null res = issueErr ("illegal external parameter entity value for entity %" ++ pn ++";") | otherwise = replaceChildren (constL res) >>> setDTDAttrValue a_url "" -- mark entity as read traceDTD :: String -> DTDStateArrow XmlTree XmlTree traceDTD msg = traceMsg 3 msg >>> traceTree -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/Pickle.hs0000644000000000000000000002166112465166667016445 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Pickle Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Pickler functions for converting between user defined data types and XmlTree data. Usefull for persistent storage and retreival of arbitray data as XML documents This module is an adaptation of the pickler combinators developed by Andrew Kennedy ( http:\/\/research.microsoft.com\/~akenn\/fun\/picklercombinators.pdf ) The difference to Kennedys approach is that the target is not a list of Chars but a list of XmlTrees. The basic picklers will convert data into XML text nodes. New are the picklers for creating elements and attributes. One extension was neccessary: The unpickling may fail. Therefore the unpickler has a Maybe result type. Failure is used to unpickle optional elements (Maybe data) and lists of arbitray length There is an example program demonstrating the use of the picklers for a none trivial data structure. (see \"examples\/arrows\/pickle\" directory) -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Pickle ( xpickleDocument -- from this module Text.XML.HXT.Arrow.Pickle , xunpickleDocument , xpickleWriteDTD , xpickleDTD , checkPickler , xpickleVal , xunpickleVal , thePicklerDTD , a_addDTD -- from Text.XML.HXT.Arrow.Pickle.Xml , pickleDoc , unpickleDoc , unpickleDoc' , showPickled , PU(..) , XmlPickler(..) , xp4Tuple , xp5Tuple , xp6Tuple , xp7Tuple , xp8Tuple , xp9Tuple , xp10Tuple , xp11Tuple , xp12Tuple , xp13Tuple , xp14Tuple , xp15Tuple , xp16Tuple , xp17Tuple , xp18Tuple , xp19Tuple , xp20Tuple , xp21Tuple , xp22Tuple , xp23Tuple , xp24Tuple , xpAddFixedAttr , xpAddNSDecl , xpAlt , xpAttr , xpAttrFixed , xpAttrImplied , xpAttrNS , xpCheckEmpty , xpCheckEmptyAttributes , xpCheckEmptyContents , xpTextAttr , xpChoice , xpDefault , xpElem , xpElemNS , xpElemWithAttrValue , xpFilterAttr , xpFilterCont , xpInt , xpLift , xpLiftEither , xpLiftMaybe , xpList , xpList1 , xpMap , xpOption , xpPair , xpPrim , xpSeq , xpSeq' , xpText , xpText0 , xpTextDT , xpText0DT , xpTree , xpTrees , xpTriple , xpUnit , xpWrap , xpWrapEither , xpWrapMaybe , xpXmlText , xpZero -- from Text.XML.HXT.Arrow.Pickle.Schema , Schema , Schemas , DataTypeDescr ) where import Control.Arrow.ListArrows import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.ReadDocument import Text.XML.HXT.Arrow.WriteDocument import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.Pickle.Xml import Text.XML.HXT.Arrow.Pickle.Schema import Text.XML.HXT.Arrow.Pickle.DTD -- ------------------------------------------------------------ -- the arrow interface for pickling and unpickling -- | store an arbitray value in a persistent XML document -- -- The pickler converts a value into an XML tree, this is written out with -- 'Text.XML.HXT.Arrow.writeDocument'. The option list is passed to 'Text.XML.HXT.Arrow.writeDocument' -- -- An option evaluated by this arrow is 'a_addDTD'. -- If 'a_addDTD' is set ('v_1'), the pickler DTD is added as an inline DTD into the document. xpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s a XmlTree xpickleDocument xp config dest = localSysEnv $ configSysVars config >>> xpickleVal xp >>> traceMsg 1 "xpickleVal applied" >>> ifA ( getSysAttr a_addDTD >>> isA (== v_1) ) ( replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren) <+> getChildren ) ) this >>> writeDocument [] dest -- | Option for generating and adding DTD when document is pickled a_addDTD :: String a_addDTD = "addDTD" -- | read an arbitray value from an XML document -- -- The document is read with 'Text.XML.HXT.Arrow.readDocument'. Options are passed -- to 'Text.XML.HXT.Arrow.readDocument'. The conversion from XmlTree is done with the -- pickler. -- -- @ xpickleDocument xp al dest >>> xunpickleDocument xp al' dest @ is the identity arrow -- when applied with the appropriate options. When during pickling indentation is switched on, -- the whitespace must be removed during unpickling. xunpickleDocument :: PU a -> SysConfigList -> String -> IOStateArrow s b a xunpickleDocument xp conf src = readDocument conf src >>> traceMsg 1 ("xunpickleVal for " ++ show src ++ " started") >>> xunpickleVal xp >>> traceMsg 1 ("xunpickleVal for " ++ show src ++ " finished") -- | Write out the DTD generated out of a pickler. Calls 'xpicklerDTD' xpickleWriteDTD :: PU b -> SysConfigList -> String -> IOStateArrow s b XmlTree xpickleWriteDTD xp config dest = xpickleDTD xp >>> writeDocument config dest -- | The arrow for generating the DTD out of a pickler -- -- A DTD is generated from a pickler and check for consistency. -- Errors concerning the DTD are issued. xpickleDTD :: PU b -> IOStateArrow s b XmlTree xpickleDTD xp = root [] [ constL (thePicklerDTD xp) >>> filterErrorMsg ] -- | An arrow for checking picklers -- -- A value is transformed into an XML document by a given pickler, -- the associated DTD is extracted from the pickler and checked, -- the document including the DTD is tranlated into a string, -- this string is read and validated against the included DTD, -- and unpickled. -- The last step is the equality with the input. -- -- If the check succeeds, the arrow works like this, else it fails. checkPickler :: Eq a => PU a -> IOStateArrow s a a checkPickler xp = ( ( ( ( xpickleVal xp >>> replaceChildren ( (constA undefined >>> xpickleDTD xp >>> getChildren) <+> getChildren ) >>> writeDocumentToString [] >>> readFromString [withValidate True] >>> xunpickleVal xp ) &&& this ) >>> isA (uncurry (==)) ) `guards` this ) `orElse` issueErr "pickle/unpickle combinators failed" -- | The arrow version of the pickler function xpickleVal :: ArrowXml a => PU b -> a b XmlTree xpickleVal xp = arr (pickleDoc xp) -- | The arrow version of the unpickler function {- old version, runs outside IO xunpickleVal :: ArrowXml a => PU b -> a XmlTree b xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element `when` isRoot ) >>> arrL (maybeToList . unpickleDoc xp) -- -} xunpickleVal :: PU b -> IOStateArrow s XmlTree b xunpickleVal xp = ( processChildren (none `whenNot` isElem) -- remove all stuff surrounding the root element `when` isRoot ) >>> arr (unpickleDoc' xp) >>> ( ( (issueFatal $< arr ("document unpickling failed\n" ++)) >>> none ) ||| this ) -- | Compute the associated DTD of a pickler thePicklerDTD :: PU b -> XmlTrees thePicklerDTD = dtdDescrToXml . dtdDescr . theSchema -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/ProcessDocument.hs0000644000000000000000000002741012465166667020351 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.ProcessDocument Copyright : Copyright (C) 2011 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Compound arrows for reading, parsing, validating and writing XML documents All arrows use IO and a global state for options, errorhandling, ... -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.ProcessDocument ( parseXmlDocument , parseXmlDocumentWithExpat , parseHtmlDocument , validateDocument , propagateAndValidateNamespaces , andValidateNamespaces , getDocumentContents ) where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow (fromLA) import Control.Arrow.NTreeEdit import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.ParserInterface (parseHtmlDoc, parseXmlDoc) import Text.XML.HXT.Arrow.Edit (substAllXHTMLEntityRefs, transfAllCharRef) import Text.XML.HXT.Arrow.GeneralEntitySubstitution (processGeneralEntities) import Text.XML.HXT.Arrow.DTDProcessing (processDTD) import Text.XML.HXT.Arrow.DocumentInput (getXmlContents) import Text.XML.HXT.Arrow.Namespace (propagateNamespaces, validateNamespaces) import Text.XML.HXT.DTDValidation.Validation (generalEntitiesDefined, getDTDSubset, transform, validate) -- ------------------------------------------------------------ {- | XML parser Input tree must be a root tree with a text tree as child containing the document to be parsed. The parser generates from the input string a tree of a wellformed XML document, processes the DTD (parameter substitution, conditional DTD parts, ...) and substitutes all general entity references. Next step is character reference substitution. Last step is the document validation. Validation can be controlled by an extra parameter. Example: > parseXmlDocument True -- parse and validate document > > parseXmlDocument False -- only parse document, don't validate This parser is useful for applications processing correct XML documents. -} parseXmlDocument :: Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree parseXmlDocument validateD substDTD substHTML validateRX = ( replaceChildren ( ( getAttrValue a_source &&& xshow getChildren ) >>> parseXmlDoc >>> filterErrorMsg ) >>> setDocumentStatusFromSystemState "parse XML document" >>> ( ifA (fromLA getDTDSubset) ( processDTDandEntities >>> ( if validate' -- validation only possible if there is a DTD then validateDocument else this ) ) ( if validate' -- validation only consists of checking -- for undefined entity refs -- predefined XML entity refs are substituted -- in the XML parser into char refs -- so there is no need for an entity substitution then traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs" >>> perform checkUndefinedEntityRefs >>> traceMsg 2 "checkUndefinedEntityRefs: looking for undefined entity refs done" >>> setDocumentStatusFromSystemState "decoding document" else this ) ) ) `when` documentStatusOk where validate' = validateD && not validateRX processDTDandEntities = ( if validateD || substDTD then processDTD else this ) >>> ( if substDTD then ( processGeneralEntities -- DTD contains general entity definitions `when` fromLA generalEntitiesDefined ) else if substHTML then substAllXHTMLEntityRefs else this ) >>> transfAllCharRef checkUndefinedEntityRefs :: IOStateArrow s XmlTree XmlTree checkUndefinedEntityRefs = deep isEntityRef >>> getEntityRef >>> arr (\ en -> "general entity reference \"&" ++ en ++ ";\" is undefined") >>> mkError c_err >>> filterErrorMsg -- ------------------------------------------------------------ parseXmlDocumentWithExpat :: IOStateArrow s XmlTree XmlTree parseXmlDocumentWithExpat = ( withoutUserState $< getSysVar theExpatParser ) `when` documentStatusOk -- ------------------------------------------------------------ {- | HTML parser Input tree must be a root tree with a text tree as child containing the document to be parsed. The parser tries to parse everything as HTML, if the HTML document is not wellformed XML or if errors occur, warnings are generated. The warnings can be issued, or suppressed. Example: @ parseHtmlDocument True @ : parse document and issue warnings This parser is useful for applications like web crawlers, where the pages may contain arbitray errors, but the application is only interested in parts of the document, e.g. the plain text. -} parseHtmlDocument :: IOStateArrow s XmlTree XmlTree parseHtmlDocument = ( perform ( getAttrValue a_source >>> traceValue 1 (("parseHtmlDoc: parse HTML document " ++) . show) ) >>> ( parseHtml $< getSysVar (theTagSoup .&&&. theExpat) ) >>> ( removeWarnings $< getSysVar (theWarnings .&&&. theTagSoup) ) >>> setDocumentStatusFromSystemState "parse HTML document" >>> traceTree >>> traceSource >>> perform ( getAttrValue a_source >>> traceValue 1 (\ src -> "parse HTML document " ++ show src ++ " finished") ) ) `when` documentStatusOk where parseHtml (withTagSoup', withExpat') | withExpat' = withoutUserState $< getSysVar theExpatParser | withTagSoup' = withoutUserState $< getSysVar theTagSoupParser | otherwise = traceMsg 1 ("parse document with parsec HTML parser") >>> replaceChildren ( ( getAttrValue a_source -- get source name &&& xshow getChildren ) -- get string to be parsed >>> parseHtmlDoc -- run parser, entity substituion is done in parser ) removeWarnings (warnings, withTagSoup') | warnings = processTopDownWithAttrl -- remove warnings inserted by parser and entity subst filterErrorMsg | withTagSoup' = this -- warnings are not generated in tagsoup | otherwise = fromLA $ editNTreeA [isError :-> none] -- remove all warnings from document -- ------------------------------------------------------------ {- | Document validation Input must be a complete document tree. The document is validated with respect to the DTD spec. Only useful for XML documents containing a DTD. If the document is valid, it is transformed with respect to the DTD, normalization of attribute values, adding default values, sorting attributes by name,... If no error was found, result is the normalized tree, else the error status is set in the list of attributes of the root node \"\/\" and the document content is removed from the tree. -} validateDocument :: IOStateArrow s XmlTree XmlTree validateDocument = ( traceMsg 1 "validating document" >>> perform ( validateDoc >>> filterErrorMsg ) >>> setDocumentStatusFromSystemState "document validation" >>> traceMsg 1 "document validated, transforming doc with respect to DTD" >>> transformDoc >>> traceMsg 1 "document transformed" >>> traceSource >>> traceTree ) `when` documentStatusOk -- ------------------------------------------------------------ {- | Namespace propagation Input must be a complete document tree. The namespace declarations are evaluated and all element and attribute names are processed by splitting the name into prefix, local part and namespace URI. Naames are checked with respect to the XML namespace definition If no error was found, result is the unchanged input tree, else the error status is set in the list of attributes of the root node \"\/\" and the document content is removed from the tree. -} propagateAndValidateNamespaces :: IOStateArrow s XmlTree XmlTree propagateAndValidateNamespaces = ( traceMsg 1 "propagating namespaces" >>> propagateNamespaces >>> traceDoc "propagating namespaces done" >>> andValidateNamespaces ) `when` documentStatusOk andValidateNamespaces :: IOStateArrow s XmlTree XmlTree andValidateNamespaces = ( traceMsg 1 "validating namespaces" >>> ( setDocumentStatusFromSystemState "namespace propagation" `when` ( validateNamespaces >>> perform filterErrorMsg ) ) >>> traceMsg 1 "namespace validation finished" ) `when` documentStatusOk -- ------------------------------------------------------------ {- | creates a new document root, adds all options as attributes to the document root and calls 'getXmlContents'. If the document name is the empty string, the document will be read from standard input. For supported protocols see 'Text.XML.HXT.Arrow.DocumentInput.getXmlContents' -} getDocumentContents :: String -> IOStateArrow s b XmlTree getDocumentContents src = root [] [] >>> addAttr a_source src >>> traceMsg 1 ("readDocument: start processing document " ++ show src) >>> getXmlContents -- ------------------------------------------------------------ validateDoc :: ArrowList a => a XmlTree XmlTree validateDoc = fromLA ( validate `when` getDTDSubset -- validate only when DTD decl is present ) transformDoc :: ArrowList a => a XmlTree XmlTree transformDoc = fromLA transform -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/Namespace.hs0000644000000000000000000004052512465166667017132 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Namespace Copyright : Copyright (C) 2005-2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable namespace specific arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.Namespace ( attachNsEnv , cleanupNamespaces , collectNamespaceDecl , collectPrefixUriPairs , isNamespaceDeclAttr , getNamespaceDecl , processWithNsEnv , processWithNsEnvWithoutAttrl , propagateNamespaces , uniqueNamespaces , uniqueNamespacesFromDeclAndQNames , validateNamespaces ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Data.Maybe ( isNothing , fromJust ) import Data.List ( nub ) -- ------------------------------------------------------------ -- | test whether an attribute node contains an XML Namespace declaration isNamespaceDeclAttr :: ArrowXml a => a XmlTree XmlTree isNamespaceDeclAttr = fromLA $ (getAttrName >>> isA isNameSpaceName) `guards` this {-# INLINE isNamespaceDeclAttr #-} -- | get the namespace prefix and the namespace URI out of -- an attribute tree with a namespace declaration (see 'isNamespaceDeclAttr') -- for all other nodes this arrow fails getNamespaceDecl :: ArrowXml a => a XmlTree (String, String) getNamespaceDecl = fromLA $ isNamespaceDeclAttr >>> ( ( getAttrName >>> arr getNsPrefix ) &&& xshow getChildren ) where getNsPrefix = drop 6 . qualifiedName -- drop "xmlns:" -- ------------------------------------------------------------ -- | collect all namespace declarations contained in a document -- -- apply 'getNamespaceDecl' to a whole XmlTree collectNamespaceDecl :: LA XmlTree (String, String) collectNamespaceDecl = multi getAttrl >>> getNamespaceDecl -- | collect all (namePrefix, namespaceUri) pairs from a tree -- -- all qualified names are inspected, whether a namespace uri is defined, -- for these uris the prefix and uri is returned. This arrow is useful for -- namespace cleanup, e.g. for documents generated with XSLT. It can be used -- together with 'collectNamespaceDecl' to 'cleanupNamespaces' collectPrefixUriPairs :: LA XmlTree (String, String) collectPrefixUriPairs = multi (isElem <+> getAttrl <+> isPi) >>> getQName >>> arrL getPrefixUri where getPrefixUri :: QName -> [(String, String)] getPrefixUri n | null uri = [] | px == a_xmlns || px == a_xml = [] -- these ones are reserved an predefined | otherwise = [(namePrefix n, uri)] where uri = namespaceUri n px = namePrefix n -- ------------------------------------------------------------ -- | generate unique namespaces and add all namespace declarations to all top nodes containing a namespace declaration -- Usually the top node containing namespace declarations is the root node, but this isn't mandatory. -- -- Calls 'cleanupNamespaces' with 'collectNamespaceDecl' uniqueNamespaces :: ArrowXml a => a XmlTree XmlTree uniqueNamespaces = fromLA $ cleanupNamespaces' collectNamespaceDecl -- | generate unique namespaces and add all namespace declarations for all prefix-uri pairs in all qualified names -- -- useful for cleanup of namespaces in generated documents. -- Calls 'cleanupNamespaces' with @ collectNamespaceDecl \<+> collectPrefixUriPairs @ uniqueNamespacesFromDeclAndQNames :: ArrowXml a => a XmlTree XmlTree uniqueNamespacesFromDeclAndQNames = fromLA $ cleanupNamespaces' ( collectNamespaceDecl <+> collectPrefixUriPairs ) cleanupNamespaces' :: LA XmlTree (String, String) -> LA XmlTree XmlTree cleanupNamespaces' collectNamespaces = processTopDownUntil ( hasNamespaceDecl `guards` cleanupNamespaces collectNamespaces ) where hasNamespaceDecl = isElem >>> getAttrl >>> isNamespaceDeclAttr -- | does the real work for namespace cleanup. -- -- The parameter is used for collecting namespace uris and prefixes from the input tree cleanupNamespaces :: LA XmlTree (String, String) -> LA XmlTree XmlTree cleanupNamespaces collectNamespaces = renameNamespaces $< (listA collectNamespaces >>^ (toNsEnv >>> nub)) where renameNamespaces :: NsEnv -> LA XmlTree XmlTree renameNamespaces env = processBottomUp ( processAttrl ( ( none `when` isNamespaceDeclAttr ) -- remove all namespace declarations >>> changeQName renamePrefix -- update namespace prefix of attribute names, if namespace uri is set ) >>> changeQName renamePrefix -- update namespace prefix of element names ) >>> attachEnv env1 -- add all namespaces as attributes to the root node attribute list where renamePrefix :: QName -> QName renamePrefix n | isNullXName uri = n | isNothing newPx = n | otherwise = setNamePrefix' (fromJust newPx) n where uri = namespaceUri' n newPx = lookup uri revEnv1 revEnv1 = map (\ (x, y) -> (y, x)) env1 env1 :: NsEnv env1 = newEnv [] uris uris :: [XName] uris = nub . map snd $ env genPrefixes :: [XName] genPrefixes = map (newXName . ("ns" ++) . show) [(0::Int)..] newEnv :: NsEnv -> [XName] -> NsEnv newEnv env' [] = env' newEnv env' (uri:rest) = newEnv env'' rest where env'' = (prefix, uri) : env' prefix = head (filter notAlreadyUsed $ preferedPrefixes ++ genPrefixes) preferedPrefixes = map fst . filter ((==uri).snd) $ env notAlreadyUsed s = isNothing . lookup s $ env' -- ------------------------------------------------------------ -- | auxiliary arrow for processing with a namespace environment -- -- process a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. -- The namespace environment is implemented as a 'Data.AssocList.AssocList'. -- Processing of attributes can be controlled by a boolean parameter processWithNsEnv1 :: ArrowXml a => Bool -> (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnv1 withAttr f env = ifA isElem -- the test is just an optimization ( processWithExtendedEnv $< arr (extendEnv env) ) -- only element nodes contain namespace declarations ( processWithExtendedEnv env ) where processWithExtendedEnv env' = f env' -- apply the env filter >>> ( ( if withAttr then processAttrl (f env') -- apply the env to all attributes else this ) >>> processChildren (processWithNsEnv f env') -- apply the env recursively to all children ) `when` isElem -- attrl and children only need processing for elem nodes extendEnv :: NsEnv -> XmlTree -> NsEnv extendEnv env' t' = addEntries (toNsEnv newDecls) env' where newDecls = runLA ( getAttrl >>> getNamespaceDecl ) t' -- ------------------------------------------------------------ -- | process a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. -- -- The namespace environment is implemented as a 'Data.AssocList.AssocList' processWithNsEnv :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnv = processWithNsEnv1 True -- | process all element nodes of a document tree with an arrow, containing always the -- valid namespace environment as extra parameter. Attribute lists are not processed. -- -- See also: 'processWithNsEnv' processWithNsEnvWithoutAttrl :: ArrowXml a => (NsEnv -> a XmlTree XmlTree) -> NsEnv -> a XmlTree XmlTree processWithNsEnvWithoutAttrl = processWithNsEnv1 False -- ----------------------------------------------------------------------------- -- | attach all valid namespace declarations to the attribute list of element nodes. -- -- This arrow is useful for document processing, that requires access to all namespace -- declarations at any element node, but which cannot be done with a simple 'processWithNsEnv'. attachNsEnv :: ArrowXml a => NsEnv -> a XmlTree XmlTree attachNsEnv initialEnv = fromLA $ processWithNsEnvWithoutAttrl attachEnv initialEnv where attachEnv :: NsEnv -> LA XmlTree XmlTree attachEnv env = ( processAttrl (none `when` isNamespaceDeclAttr) >>> addAttrl (catA nsAttrl) ) `when` isElem where nsAttrl :: [LA XmlTree XmlTree] nsAttrl = map nsDeclToAttr env nsDeclToAttr :: (XName, XName) -> LA XmlTree XmlTree nsDeclToAttr (n, uri) = mkAttr qn (txt (unXN uri)) where qn :: QName qn | isNullXName n = newQName xmlnsXName nullXName xmlnsNamespaceXName | otherwise = newQName n xmlnsXName xmlnsNamespaceXName -- ----------------------------------------------------------------------------- -- | -- propagate all namespace declarations \"xmlns:ns=...\" to all element and attribute nodes of a document. -- -- This arrow does not check for illegal use of namespaces. -- The real work is done by 'propagateNamespaceEnv'. -- -- The arrow may be applied repeatedly if neccessary. propagateNamespaces :: ArrowXml a => a XmlTree XmlTree propagateNamespaces = fromLA $ propagateNamespaceEnv [ (xmlXName, xmlNamespaceXName) , (xmlnsXName, xmlnsNamespaceXName) ] -- | -- attaches the namespace info given by the namespace table -- to a tag node and its attributes and children. propagateNamespaceEnv :: NsEnv -> LA XmlTree XmlTree propagateNamespaceEnv = processWithNsEnv addNamespaceUri where addNamespaceUri :: NsEnv -> LA XmlTree XmlTree addNamespaceUri env' = choiceA [ isElem :-> changeElemName (setNamespace env') , isAttr :-> attachNamespaceUriToAttr env' , isPi :-> changePiName (setNamespace env') , this :-> this ] attachNamespaceUriToAttr :: NsEnv -> LA XmlTree XmlTree attachNamespaceUriToAttr attrEnv = ( ( getQName >>> isA (not . null . namePrefix) ) `guards` changeAttrName (setNamespace attrEnv) ) `orElse` ( changeAttrName (const xmlnsQN) `when` hasName a_xmlns ) -- ----------------------------------------------------------------------------- -- | -- validate the namespace constraints in a whole tree. -- -- Result is the list of errors concerning namespaces. -- Predicates 'isWellformedQName', 'isWellformedQualifiedName', 'isDeclaredNamespace' -- and 'isWellformedNSDecl' are applied to the appropriate elements and attributes. validateNamespaces :: ArrowXml a => a XmlTree XmlTree validateNamespaces = fromLA validateNamespaces1 validateNamespaces1 :: LA XmlTree XmlTree validateNamespaces1 = choiceA [ isRoot :-> ( getChildren >>> validateNamespaces1 ) -- root is correct by definition , this :-> multi validate1Namespaces ] -- | -- a single node for namespace constrains. validate1Namespaces :: LA XmlTree XmlTree validate1Namespaces = choiceA [ isElem :-> catA [ ( getQName >>> isA ( not . isWellformedQName ) ) `guards` nsError (\ n -> "element name " ++ show n ++ " is not a wellformed qualified name" ) , ( getQName >>> isA ( not . isDeclaredNamespace ) ) `guards` nsError (\ n -> "namespace for prefix in element name " ++ show n ++ " is undefined" ) , doubleOcc $< ( (getAttrl >>> getUniversalName) >>. doubles ) , getAttrl >>> validate1Namespaces ] , isAttr :-> catA [ ( getQName >>> isA ( not . isWellformedQName ) ) `guards` nsError (\ n -> "attribute name " ++ show n ++ " is not a wellformed qualified name" ) , ( getQName >>> isA ( not . isDeclaredNamespace ) ) `guards` nsError (\ n -> "namespace for prefix in attribute name " ++ show n ++ " is undefined" ) , ( hasNamePrefix a_xmlns >>> xshow getChildren >>> isA null ) `guards` nsError (\ n -> "namespace value of namespace declaration for " ++ show n ++ " has no value" ) , ( getQName >>> isA (not . isWellformedNSDecl ) ) `guards` nsError (\ n -> "illegal namespace declaration for name " ++ show n ++ " starting with reserved prefix " ++ show "xml" ) ] , isDTD :-> catA [ isDTDDoctype <+> isDTDAttlist <+> isDTDElement <+> isDTDName >>> getDTDAttrValue a_name >>> ( isA (not . isWellformedQualifiedName) `guards` nsErr (\ n -> "a DTD part contains a not wellformed qualified Name: " ++ show n) ) , isDTDAttlist >>> getDTDAttrValue a_value >>> ( isA (not . isWellformedQualifiedName) `guards` nsErr (\ n -> "an ATTLIST declaration contains as attribute name a not wellformed qualified Name: " ++ show n) ) , isDTDEntity <+> isDTDPEntity <+> isDTDNotation >>> getDTDAttrValue a_name >>> ( isA (not . isNCName) `guards` nsErr (\ n -> "an entity or notation declaration contains a not wellformed NCName: " ++ show n) ) ] , isPi :-> catA [ getName >>> ( isA (not . isNCName) `guards` nsErr (\ n -> "a PI contains a not wellformed NCName: " ++ show n) ) ] ] where nsError :: (QName -> String) -> LA XmlTree XmlTree nsError msg = getQName >>> nsErr msg nsErr :: (a -> String) -> LA a XmlTree nsErr msg = arr msg >>> mkError c_err doubleOcc :: String -> LA XmlTree XmlTree doubleOcc an = nsError (\ n -> "multiple occurences of universal name for attributes of tag " ++ show n ++ " : " ++ show an ) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/DocumentInput.hs0000644000000000000000000003424412465166667020035 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentInput Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document input -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentInput ( getXmlContents , getXmlEntityContents , getEncoding , getTextEncoding , decodeDocument , addInputError ) where import Control.Arrow import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import Data.List (isPrefixOf) import Data.String.Unicode (getDecodingFct, guessEncoding, normalizeNL) import System.FilePath (takeExtension) import qualified Text.XML.HXT.IO.GetFILE as FILE import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.ParserInterface (parseXmlDocEncodingSpec, parseXmlEntityEncodingSpec, removeEncodingSpec) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ---------------------------------------------------------- protocolHandlers :: AssocList String (IOStateArrow s XmlTree XmlTree) protocolHandlers = [ ("file", getFileContents) , ("http", getHttpContents) , ("https", getHttpContents) , ("stdin", getStdinContents) ] getProtocolHandler :: IOStateArrow s String (IOStateArrow s XmlTree XmlTree) getProtocolHandler = arr (\ s -> lookupDef getUnsupported s protocolHandlers) getUnsupported :: IOStateArrow s XmlTree XmlTree getUnsupported = perform ( getAttrValue a_source >>> arr (("unsupported protocol in URI " ++) . show) >>> applyA (arr issueFatal) ) >>> setDocumentStatusFromSystemState "accessing documents" getStringContents :: IOStateArrow s XmlTree XmlTree getStringContents = setCont $< getAttrValue a_source >>> addAttr transferMessage "OK" >>> addAttr transferStatus "200" where setCont contents = replaceChildren (txt contents') >>> addAttr transferURI (take 7 contents) -- the "string:" prefix is stored, this is required by setBaseURIFromDoc >>> addAttr a_source (show . prefix 48 $ contents') -- a quoted prefix of the content, max 48 chars is taken as source name where contents' = drop (length stringProtocol) contents prefix l s | length s' > l = take (l - 3) s' ++ "..." | otherwise = s' where s' = take (l + 1) s getFileContents :: IOStateArrow s XmlTree XmlTree getFileContents = applyA ( ( getSysVar theStrictInput &&& ( getAttrValue transferURI >>> getPathFromURI ) ) >>> traceValue 2 (\ (b, f) -> "read file " ++ show f ++ " (strict input = " ++ show b ++ ")") >>> arrIO (uncurry FILE.getCont) >>> ( arr (uncurry addInputError) -- io error occured ||| arr addTxtContent -- content read ) ) >>> addMimeType getStdinContents :: IOStateArrow s XmlTree XmlTree getStdinContents = applyA ( getSysVar theStrictInput >>> arrIO FILE.getStdinCont >>> ( arr (uncurry addInputError) -- io error occured ||| arr addTxtContent -- content read ) ) addInputError :: Attributes -> String -> IOStateArrow s XmlTree XmlTree addInputError al e = issueFatal e >>> seqA (map (uncurry addAttr) al) >>> setDocumentStatusFromSystemState "accessing documents" addMimeType :: IOStateArrow s XmlTree XmlTree addMimeType = addMime $< ( ( getSysVar theFileMimeType >>> isA (not . null) ) `orElse` ( getAttrValue transferURI >>> ( uriToMime $< getMimeTypeTable ) ) ) where addMime mt = addAttr transferMimeType mt uriToMime mtt = arr $ ( \ uri -> extensionToMimeType (drop 1 . takeExtension $ uri) mtt ) addTxtContent :: Blob -> IOStateArrow s XmlTree XmlTree addTxtContent bc = replaceChildren (blb bc) >>> addAttr transferMessage "OK" >>> addAttr transferStatus "200" getHttpContents :: IOStateArrow s XmlTree XmlTree getHttpContents = withoutUserState $ applyA $ getSysVar theHttpHandler getContentsFromString :: IOStateArrow s XmlTree XmlTree getContentsFromString = ( getAttrValue a_source >>> isA (isPrefixOf stringProtocol) ) `guards` getStringContents getContentsFromDoc :: IOStateArrow s XmlTree XmlTree getContentsFromDoc = ( ( addTransferURI $< getBaseURI >>> getCont ) `when` ( setAbsURI $< ( getAttrValue a_source >>^ ( \ src-> (if null src then "stdin:" else src) ) -- empty document name -> read from stdin ) ) ) >>> setDocumentStatusFromSystemState "getContentsFromDoc" where setAbsURI src = ifA ( constA src >>> changeBaseURI ) this ( issueFatal ("illegal URI : " ++ show src) ) addTransferURI uri = addAttr transferURI uri getCont = applyA ( getBaseURI -- compute the handler and call it >>> traceValue 2 (("getContentsFromDoc: reading " ++) . show) >>> getSchemeFromURI >>> getProtocolHandler ) `orElse` this -- don't change tree, when no handler can be found setBaseURIFromDoc :: IOStateArrow s XmlTree XmlTree setBaseURIFromDoc = perform ( getAttrValue transferURI >>> isA (isPrefixOf stringProtocol) -- do not change base URI when reading from a string >>> setBaseURI ) {- | Read the content of a document. This routine is usually called from 'Text.XML.HXT.Arrow.ProcessDocument.getDocumentContents'. The input must be a root node (constructed with 'Text.XML.HXT.Arrow.XmlArrow.root'), usually without children. The attribute list contains all input parameters, e.g. URI or source file name, encoding preferences, ... If the source name is empty, the input is read from standard input. The source is transformed into an absolute URI. If the source is a relative URI, or a file name, it is expanded into an absolut URI with respect to the current base URI. The default base URI is of protocol \"file\" and points to the current working directory. The currently supported protocols are \"http\", \"file\", \"stdin\" and \"string\". The latter two are internal protocols. An uri of the form \"stdin:\" stands for the content of the standard input stream. \"string:some text\" means, that \"some text\" is taken as input. This internal protocol is used for reading from normal 'String' values. -} getXmlContents :: IOStateArrow s XmlTree XmlTree getXmlContents = getXmlContents' parseXmlDocEncodingSpec >>> setBaseURIFromDoc getXmlEntityContents :: IOStateArrow s XmlTree XmlTree getXmlEntityContents = traceMsg 2 "getXmlEntityContents" >>> addAttr transferMimeType text_xml_external_parsed_entity -- the default transfer mimetype >>> getXmlContents' parseXmlEntityEncodingSpec >>> addAttr transferMimeType text_xml_external_parsed_entity >>> processChildren ( removeEncodingSpec >>> changeText normalizeNL -- newline normalization must be done here ) -- the following calls of the parsers don't do this >>> setBaseURIFromDoc >>> traceMsg 2 "getXmlEntityContents done" getXmlContents' :: IOStateArrow s XmlTree XmlTree -> IOStateArrow s XmlTree XmlTree getXmlContents' parseEncodingSpec = ( getContentsFromString -- no decoding done for string: protocol `orElse` ( getContentsFromDoc >>> choiceA [ isXmlHtmlDoc :-> ( parseEncodingSpec >>> filterErrorMsg >>> decodeDocument ) , isTextDoc :-> decodeDocument , this :-> this ] >>> perform ( getAttrValue transferURI >>> traceValue 1 (("getXmlContents: content read and decoded for " ++) . show) ) >>> traceDoc "getXmlContents'" ) ) `when` isRoot isMimeDoc :: (String -> Bool) -> IOStateArrow s XmlTree XmlTree isMimeDoc isMT = fromLA $ ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> null t || isMT t) ) `guards` this isTextDoc, isXmlHtmlDoc :: IOStateArrow s XmlTree XmlTree isTextDoc = isMimeDoc isTextMimeType isXmlHtmlDoc = isMimeDoc (\ mt -> isHtmlMimeType mt || isXmlMimeType mt) -- ------------------------------------------------------------ getEncoding :: IOStateArrow s XmlTree String getEncoding = catA [ xshow getChildren -- 1. guess: guess encoding by looking at the first few bytes >>> arr guessEncoding , getAttrValue transferEncoding -- 2. guess: take the transfer encoding , getAttrValue a_encoding -- 3. guess: take encoding parameter in root node , getSysVar theInputEncoding -- 4. guess: take encoding parameter in global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses getTextEncoding :: IOStateArrow s XmlTree String getTextEncoding = catA [ getAttrValue transferEncoding -- 1. guess: take the transfer encoding , getAttrValue a_encoding -- 2. guess: take encoding parameter in root node , getSysVar theInputEncoding -- 3. guess: take encoding parameter in global state , constA isoLatin1 -- default : no encoding ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses decodeDocument :: IOStateArrow s XmlTree XmlTree decodeDocument = choiceA [ ( isRoot >>> isXmlHtmlDoc ) :-> ( decodeX $< getSysVar theExpat) , ( isRoot >>> isTextDoc ) :-> ( decodeArr $< getTextEncoding ) , this :-> this ] where decodeX :: Bool -> IOStateArrow s XmlTree XmlTree decodeX False = decodeArr $< getEncoding decodeX True = noDecode $< getEncoding -- parse with expat noDecode enc = traceMsg 2 ("no decoding (done by expat): encoding is " ++ show enc) >>> addAttr transferEncoding enc decodeArr :: String -> IOStateArrow s XmlTree XmlTree decodeArr enc = maybe notFound found . getDecodingFct $ enc where found df = traceMsg 2 ("decodeDocument: encoding is " ++ show enc) >>> ( decodeText df $< getSysVar theEncodingErrors ) >>> addAttr transferEncoding enc notFound = issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "decoding document" {- just for performance test decodeText _ _ = this -} decodeText df withEncErrors = processChildren ( getText -- get the document content -- the following 3 lines -- don't seem to raise the space problem in decodeText -- space is allocated in blobToString and in parsec >>> arr df -- decode the text, result is (string, [errMsg]) >>> ( ( fst ^>> mkText ) -- take decoded string and build text node <+> ( if withEncErrors then ( arrL snd -- take the error messages >>> arr ((enc ++) . (" encoding error" ++)) -- prefix with enc error >>> applyA (arr issueErr) -- build issueErr arrow and apply >>> none -- neccessary for type match with <+> ) else none ) ) ) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/WriteDocument.hs0000644000000000000000000002273212465166667020027 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.WriteDocument Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Compound arrow for writing XML documents -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.WriteDocument ( writeDocument , writeDocument' , writeDocumentToString , prepareContents ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow ( initialSysState ) import Text.XML.HXT.Arrow.Edit ( haskellRepOfXmlDoc , indentDoc , addDefaultDTDecl , preventEmptyElements , removeDocWhiteSpace , treeRepOfXmlDoc ) import Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , encodeDocument , encodeDocument' ) -- ------------------------------------------------------------ -- {- | the main filter for writing documents this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument' usage: @ writeDocument optionList destination @ if @ destination @ is the empty string or \"-\", stdout is used as output device for available options see 'Text.XML.HXT.Arrow.XmlState.SystemConfig' - @withOutputXML@ : (default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary add XML processing instruction and encode document with respect to output encoding, - @withOutputHTML@ : issue HTML: translate all special XML chars and all HTML chars with a corresponding entity reference into entity references. Do not generate empty elements, e.g. @@. The short form introduces trouble in various browsers. - @withOutputXHTML@ : same as @withOutputHTML@, but all none ASCII chars are substituted by char references. - @withOutputPLAIN@ : Do not substitute any chars. This is useful when generating something else than XML/HTML, e.g. Haskell source code. - @withXmlPi yes/no@ : Add a @@ processing instruction to the beginning of the document. Default is yes. - @withAddDefaultDTD@ : if the document to be written was build by reading another document containing a Document Type Declaration, this DTD is inserted into the output document (default: no insert) - @withShowTree yes/no@ : show DOM tree representation of document (for debugging) - @withShowHaskell yes/no@ : show Haskell representaion of document (for debugging) a minimal main program for copying a document has the following structure: > module Main > where > > import Text.XML.HXT.Core > > main :: IO () > main > = do > runX ( readDocument [] "hello.xml" > >>> > writeDocument [] "bye.xml" > ) > return () an example for copying a document from the web to standard output with global trace level 1, input trace level 2, output encoding isoLatin1, and evaluation of error code is: > module Main > where > > import Text.XML.HXT.Core > import Text.XML.HXT.Curl > -- or > -- import Text.XML.HXT.HTTP > import System.Exit > > main :: IO () > main > = do > [rc] <- runX > ( configSysVars [ withTrace 1 -- set the defaults for all read-, > , withCurl [] -- write- and other operations > -- or withHTTP [] > ] > >>> > readDocument [ withTrace 2 -- use these additional > , withParseHTML yes -- options only for this read > ] > "http://www.haskell.org/" > >>> > writeDocument [ withOutputEncoding isoLatin1 > ] > "" -- output to stdout > >>> > getErrStatus > ) > exitWith ( if rc >= c_err > then ExitFailure 1 > else ExitSuccess > ) -} writeDocument :: SysConfigList -> String -> IOStateArrow s XmlTree XmlTree writeDocument config dst = localSysEnv $ configSysVars config >>> perform ( (flip writeDocument') dst $< getSysVar theTextMode ) writeDocument' :: Bool -> String -> IOStateArrow s XmlTree XmlTree writeDocument' textMode dst = ( traceMsg 1 ("writeDocument: destination is " ++ show dst) >>> ( (flip prepareContents) encodeDocument $< getSysVar idS ) >>> traceDoc "document after encoding" >>> putXmlDocument textMode dst >>> traceMsg 1 "writeDocument: finished" ) `when` documentStatusOk -- ------------------------------------------------------------ -- | -- Convert a document into a string. Formating is done the same way -- and with the same options as in 'writeDocument'. Default output encoding is -- no encoding, that means the result is a normal unicode encode haskell string. -- The default may be overwritten with the 'Text.XML.HXT.Arrow.XmlState.SystemConfig.withOutputEncoding' option. -- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option. -- -- This arrow fails, when the encoding scheme is not supported. -- The arrow is pure, it does not run in the IO monad. -- The XML PI is suppressed, if not explicitly turned on with an -- option @ (a_no_xml_pi, v_0) @ writeDocumentToString :: ArrowXml a => SysConfigList -> a XmlTree String writeDocumentToString config = prepareContents ( foldr (>>>) id (withOutputEncoding unicodeString : withXmlPi no : config ) $ initialSysState ) encodeDocument' >>> xshow getChildren -- ------------------------------------------------------------ -- | -- indent and format output prepareContents :: ArrowXml a => XIOSysState -> (Bool -> Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree prepareContents config encodeDoc = indent >>> addDtd >>> format where indent' = getS theIndent config removeWS' = getS theRemoveWS config showTree' = getS theShowTree config showHaskell' = getS theShowHaskell config outHtml' = getS theOutputFmt config == HTMLoutput outXhtml' = getS theOutputFmt config == XHTMLoutput outXml' = getS theOutputFmt config == XMLoutput noPi' = not $ getS theXmlPi config noEEsFor' = getS theNoEmptyElemFor config addDDTD' = getS theAddDefaultDTD config outEnc' = getS theOutputEncoding config addDtd | addDDTD' = addDefaultDTDecl | otherwise = this indent | indent' = indentDoc -- document indentation | removeWS' = removeDocWhiteSpace -- remove all whitespace between tags | otherwise = this format | showTree' = treeRepOfXmlDoc | showHaskell' = haskellRepOfXmlDoc | outHtml' = preventEmptyElements noEEsFor' True >>> encodeDoc -- convert doc into text with respect to output encoding with ASCII as default False noPi' ( if null outEnc' then usAscii else outEnc' ) | outXhtml' = preventEmptyElements noEEsFor' True >>> encodeDoc -- convert doc into text with respect to output encoding True noPi' outEnc' | outXml' = ( if null noEEsFor' then this else preventEmptyElements noEEsFor' False ) >>> encodeDoc -- convert doc into text with respect to output encoding True noPi' outEnc' | otherwise = this -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/0000755000000000000000000000000012465166667016435 5ustar0000000000000000hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/RunIOStateArrow.hs0000644000000000000000000002716412465166667022013 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.RunIOStateArrow Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable run an io state arrow -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.RunIOStateArrow where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.IOStateListArrow import Data.Map ( empty ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TraceHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- | -- apply an 'IOSArrow' to an empty root node with 'initialState' () as initial state -- -- the main entry point for running a state arrow with IO -- -- when running @ runX f@ an empty XML root node is applied to @f@. -- usually @f@ will start with a constant arrow (ignoring the input), e.g. a 'Text.XML.HXT.Arrow.ReadDocument.readDocument' arrow. -- -- for usage see examples with 'Text.XML.HXT.Arrow.WriteDocument.writeDocument' -- -- if input has to be feed into the arrow use 'Control.Arrow.IOStateListArrow.runIOSLA' like in @ runIOSLA f emptyX inputDoc @ runX :: IOSArrow XmlTree c -> IO [c] runX = runXIOState (initialState ()) runXIOState :: XIOState s -> IOStateArrow s XmlTree c -> IO [c] runXIOState s0 f = do (_finalState, res) <- runIOSLA (emptyRoot >>> f) s0 undefined return res where emptyRoot = root [] [] -- | the default global state, used as initial state when running an 'IOSArrow' with 'runIOSLA' or -- 'runX' initialState :: us -> XIOState us initialState s = XIOState { xioSysState = initialSysState , xioUserState = s } -- ------------------------------------------------------------ initialSysState :: XIOSysState initialSysState = XIOSys { xioSysWriter = initialSysWriter , xioSysEnv = initialSysEnv } initialSysWriter :: XIOSysWriter initialSysWriter = XIOwrt { xioErrorStatus = c_ok , xioErrorMsgList = [] , xioExpatErrors = none , xioRelaxNoOfErrors = 0 , xioRelaxDefineId = 0 , xioRelaxAttrList = [] } initialSysEnv :: XIOSysEnv initialSysEnv = XIOEnv { xioTraceLevel = 0 , xioTraceCmd = traceOutputToStderr , xioErrorMsgHandler = errorOutputToStderr , xioErrorMsgCollect = False , xioBaseURI = "" , xioDefaultBaseURI = "" , xioAttrList = [] , xioInputConfig = initialInputConfig , xioParseConfig = initialParseConfig , xioOutputConfig = initialOutputConfig , xioRelaxConfig = initialRelaxConfig , xioXmlSchemaConfig = initialXmlSchemaConfig , xioCacheConfig = initialCacheConfig } initialInputConfig :: XIOInputConfig initialInputConfig = XIOIcgf { xioStrictInput = False , xioEncodingErrors = True , xioInputEncoding = "" , xioHttpHandler = dummyHTTPHandler , xioInputOptions = [] , xioRedirect = False , xioProxy = "" } initialParseConfig :: XIOParseConfig initialParseConfig = XIOPcfg { xioMimeTypes = defaultMimeTypeTable , xioMimeTypeHandlers = empty , xioMimeTypeFile = "" , xioAcceptedMimeTypes = [] , xioFileMimeType = "" , xioWarnings = True , xioRemoveWS = False , xioParseByMimeType = False , xioParseHTML = False , xioLowerCaseNames = False , xioTagSoup = False , xioPreserveComment = False , xioValidate = True , xioSubstDTDEntities = True , xioSubstHTMLEntities = False , xioCheckNamespaces = False , xioCanonicalize = True , xioIgnoreNoneXmlContents = False , xioTagSoupParser = dummyTagSoupParser , xioExpat = False , xioExpatParser = dummyExpatParser } initialOutputConfig :: XIOOutputConfig initialOutputConfig = XIOOcfg { xioIndent = False , xioOutputEncoding = "" , xioOutputFmt = XMLoutput , xioXmlPi = True , xioNoEmptyElemFor = [] , xioAddDefaultDTD = False , xioTextMode = False , xioShowTree = False , xioShowHaskell = False } initialRelaxConfig :: XIORelaxConfig initialRelaxConfig = XIORxc { xioRelaxValidate = False , xioRelaxSchema = "" , xioRelaxCheckRestr = True , xioRelaxValidateExtRef = True , xioRelaxValidateInclude = True , xioRelaxCollectErrors = True , xioRelaxValidator = dummyRelaxValidator } initialXmlSchemaConfig :: XIOXmlSchemaConfig initialXmlSchemaConfig = XIOScc { xioXmlSchemaValidate = False , xioXmlSchemaSchema = "" , xioXmlSchemaValidator = dummyXmlSchemaValidator } initialCacheConfig :: XIOCacheConfig initialCacheConfig = XIOCch { xioBinaryCompression = id , xioBinaryDeCompression = id , xioWithCache = False , xioCacheDir = "" , xioDocumentAge = 0 , xioCache404Err = False , xioCacheRead = dummyCacheRead , xioStrictDeserialize = False } -- ------------------------------------------------------------ dummyHTTPHandler :: IOSArrow XmlTree XmlTree dummyHTTPHandler = ( issueFatal $ unlines $ [ "HTTP handler not configured," , "please install package hxt-curl and use 'withCurl' config option" , "or install package hxt-http and use 'withHTTP' config option" ] ) >>> addAttr transferMessage "HTTP handler not configured" >>> addAttr transferStatus "999" dummyTagSoupParser :: IOSArrow b b dummyTagSoupParser = issueFatal $ unlines $ [ "TagSoup parser not configured," , "please install package hxt-tagsoup" , " and use 'withTagSoup' parser config option from this package" ] dummyExpatParser :: IOSArrow b b dummyExpatParser = issueFatal $ unlines $ [ "Expat parser not configured," , "please install package hxt-expat" , " and use 'withExpat' parser config option from this package" ] dummyRelaxValidator :: IOSArrow b b dummyRelaxValidator = issueFatal $ unlines $ [ "RelaxNG validator not configured," , "please install package hxt-relaxng" , " and use 'withRelaxNG' config option from this package" ] dummyXmlSchemaValidator :: IOSArrow b b dummyXmlSchemaValidator = issueFatal $ unlines $ [ "XML Schema validator not configured," , "please install package hxt-xmlschema" , " and use 'withXmlSchema' config option from this package" ] dummyCacheRead :: String -> IOSArrow b b dummyCacheRead = const $ issueFatal $ unlines $ [ "Document cache not configured," , "please install package hxt-cache and use 'withCache' config option" ] -- ------------------------------------------------------------ getConfigAttr :: String -> SysConfigList -> String getConfigAttr n c = lookup1 n $ tl where s = (foldr (>>>) id c) initialSysState tl = getS theAttrList s -- ---------------------------------------- theSysConfigComp :: Selector XIOSysState a -> Selector SysConfig a theSysConfigComp sel = S { getS = \ cf -> getS sel (cf initialSysState) , setS = \ val cf -> setS sel val . cf } -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/TraceHandling.hs0000644000000000000000000001240712465166667021500 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.TraceHandling Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the trace arrows -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.TraceHandling where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import System.IO ( hPutStrLn , hFlush , stderr ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.SystemConfig import Text.XML.HXT.Arrow.Edit ( addHeadlineToXmlDoc , treeRepOfXmlDoc , indentDoc ) -- ------------------------------------------------------------ -- | set the global trace level setTraceLevel :: Int -> IOStateArrow s b b setTraceLevel l = configSysVar $ withTrace l -- | read the global trace level getTraceLevel :: IOStateArrow s b Int getTraceLevel = getSysVar theTraceLevel -- | set the global trace command. This command does the trace output setTraceCmd :: (Int -> String -> IO ()) -> IOStateArrow s b b setTraceCmd c = configSysVar $ setS theTraceCmd c -- | acces the command for trace output getTraceCmd :: IOStateArrow a b (Int -> String -> IO ()) getTraceCmd = getSysVar theTraceCmd -- | run an arrow with a given trace level, the old trace level is restored after the arrow execution withTraceLevel :: Int -> IOStateArrow s b c -> IOStateArrow s b c withTraceLevel level f = localSysEnv $ setTraceLevel level >>> f -- | apply a trace arrow and issue message to stderr trace :: Int -> IOStateArrow s b String -> IOStateArrow s b b trace level trc = perform ( trc >>> ( getTraceCmd &&& this ) >>> arrIO (\ (cmd, msg) -> cmd level msg) ) `when` ( getTraceLevel >>> isA (>= level) ) -- | trace the current value transfered in a sequence of arrows. -- -- The value is formated by a string conversion function. This is a substitute for -- the old and less general traceString function traceValue :: Int -> (b -> String) -> IOStateArrow s b b traceValue level trc = trace level (arr $ (('-' : "- (" ++ show level ++ ") ") ++) . trc) -- | an old alias for 'traceValue' traceString :: Int -> (b -> String) -> IOStateArrow s b b traceString = traceValue -- | issue a string message as trace traceMsg :: Int -> String -> IOStateArrow s b b traceMsg level msg = traceValue level (const msg) -- | issue the source representation of a document if trace level >= 3 -- -- for better readability the source is formated with indentDoc traceSource :: IOStateArrow s XmlTree XmlTree traceSource = trace 3 $ xshow $ choiceA [ isRoot :-> ( indentDoc >>> getChildren ) , isElem :-> ( root [] [this] >>> indentDoc >>> getChildren >>> isElem ) , this :-> this ] -- | issue the tree representation of a document if trace level >= 4 traceTree :: IOStateArrow s XmlTree XmlTree traceTree = trace 4 $ xshow $ treeRepOfXmlDoc >>> addHeadlineToXmlDoc >>> getChildren -- | trace a main computation step -- issue a message when trace level >= 1, issue document source if level >= 3, issue tree when level is >= 4 traceDoc :: String -> IOStateArrow s XmlTree XmlTree traceDoc msg = traceMsg 1 msg >>> traceSource >>> traceTree -- ---------------------------------------------------------- traceOutputToStderr :: Int -> String -> IO () traceOutputToStderr _level msg = do hPutStrLn stderr msg hFlush stderr -- ---------------------------------------------------------- hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/TypeDefs.hs0000644000000000000000000011777612465166667020537 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.TypeDefs Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the basic state arrows for XML processing A state is needed for global processing options, like encoding options, document base URI, trace levels and error message handling The state is separated into a user defined state and a system state. The system state contains variables for error message handling, for tracing, for the document base for accessing XML documents with relative references, e.g. DTDs, and a global key value store. This assoc list has strings as keys and lists of XmlTrees as values. It is used to store arbitrary XML and text values, e.g. user defined global options. The user defined part of the store is in the default case empty, defined as (). It can be extended with an arbitray data type -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.TypeDefs ( module Text.XML.HXT.Arrow.XmlState.TypeDefs , Selector(..) , chgS , idS , (.&&&.) ) where import Control.Arrow import Control.Arrow.ArrowList import Control.Arrow.IOStateListArrow import Control.DeepSeq import Data.ByteString.Lazy (ByteString) import Data.Char (isDigit) import Data.Function.Selector (Selector (..), chgS, idS, (.&&&.)) import qualified Data.Map as M import Text.XML.HXT.DOM.Interface -- ------------------------------------------------------------ {- datatypes -} -- | -- state datatype consists of a system state and a user state -- the user state is not fixed data XIOState us = XIOState { xioSysState :: ! XIOSysState , xioUserState :: ! us } instance (NFData us) => NFData (XIOState us) where rnf (XIOState sys usr) = rnf sys `seq` rnf usr -- | -- The arrow type for stateful arrows type IOStateArrow s b c = IOSLA (XIOState s) b c -- | -- The arrow for stateful arrows with no user defined state type IOSArrow b c = IOStateArrow () b c -- ------------------------------------------------------------ -- user state functions -- | read the user defined part of the state getUserState :: IOStateArrow s b s getUserState = IOSLA $ \ s _ -> return (s, [xioUserState s]) -- | change the user defined part of the state changeUserState :: (b -> s -> s) -> IOStateArrow s b b changeUserState cf = IOSLA $ \ s v -> let s' = s { xioUserState = cf v (xioUserState s) } in return (s', [v]) -- | set the user defined part of the state setUserState :: IOStateArrow s s s setUserState = changeUserState const -- | extend user state -- -- Run an arrow with an extended user state component, The old component -- is stored together with a new one in a pair, the arrow is executed with this -- extended state, and the augmented state component is removed form the state -- when the arrow has finished its execution withExtendedUserState :: s1 -> IOStateArrow (s1, s0) b c -> IOStateArrow s0 b c withExtendedUserState initS1 f = IOSLA $ \ s0 x -> do ~(finalS, res) <- runIOSLA f ( XIOState { xioSysState = xioSysState s0 , xioUserState = (initS1, xioUserState s0) } ) x return ( XIOState { xioSysState = xioSysState finalS , xioUserState = snd (xioUserState finalS) } , res ) -- | change the type of user state -- -- This conversion is useful, when running a state arrow with another -- structure of the user state, e.g. with () when executing some IO arrows withOtherUserState :: s1 -> IOStateArrow s1 b c -> IOStateArrow s0 b c withOtherUserState s1 f = IOSLA $ \ s x -> do (s', res) <- runIOSLA f ( XIOState { xioSysState = xioSysState s , xioUserState = s1 } ) x return ( XIOState { xioSysState = xioSysState s' , xioUserState = xioUserState s } , res ) withoutUserState :: IOSArrow b c -> IOStateArrow s0 b c withoutUserState = withOtherUserState () -- ------------------------------------------------------------ -- system state structure and acces functions -- | -- predefined system state data type with all components for the -- system functions, like trace, error handling, ... data XIOSysState = XIOSys { xioSysWriter :: ! XIOSysWriter , xioSysEnv :: ! XIOSysEnv } instance NFData XIOSysState where rnf x = seq x () -- all fields of interest are strict data XIOSysWriter = XIOwrt { xioErrorStatus :: ! Int , xioErrorMsgList :: ! XmlTrees , xioExpatErrors :: IOSArrow XmlTree XmlTree , xioRelaxNoOfErrors :: ! Int , xioRelaxDefineId :: ! Int , xioRelaxAttrList :: AssocList String XmlTrees } data XIOSysEnv = XIOEnv { xioTraceLevel :: ! Int , xioTraceCmd :: Int -> String -> IO () , xioErrorMsgHandler :: String -> IO () , xioErrorMsgCollect :: ! Bool , xioBaseURI :: ! String , xioDefaultBaseURI :: ! String , xioAttrList :: ! Attributes , xioInputConfig :: ! XIOInputConfig , xioParseConfig :: ! XIOParseConfig , xioOutputConfig :: ! XIOOutputConfig , xioRelaxConfig :: ! XIORelaxConfig , xioXmlSchemaConfig :: ! XIOXmlSchemaConfig , xioCacheConfig :: ! XIOCacheConfig } data XIOInputConfig = XIOIcgf { xioStrictInput :: ! Bool , xioEncodingErrors :: ! Bool , xioInputEncoding :: String , xioHttpHandler :: IOSArrow XmlTree XmlTree , xioInputOptions :: ! Attributes , xioRedirect :: ! Bool , xioProxy :: String } data XIOParseConfig = XIOPcfg { xioMimeTypes :: MimeTypeTable , xioMimeTypeHandlers :: MimeTypeHandlers , xioMimeTypeFile :: String , xioAcceptedMimeTypes :: [String] , xioFileMimeType :: String , xioWarnings :: ! Bool , xioRemoveWS :: ! Bool , xioParseByMimeType :: ! Bool , xioParseHTML :: ! Bool , xioLowerCaseNames :: ! Bool , xioPreserveComment :: ! Bool , xioValidate :: ! Bool , xioSubstDTDEntities :: ! Bool , xioSubstHTMLEntities :: ! Bool , xioCheckNamespaces :: ! Bool , xioCanonicalize :: ! Bool , xioIgnoreNoneXmlContents :: ! Bool , xioTagSoup :: ! Bool , xioTagSoupParser :: IOSArrow XmlTree XmlTree , xioExpat :: ! Bool , xioExpatParser :: IOSArrow XmlTree XmlTree } data XIOOutputConfig = XIOOcfg { xioIndent :: ! Bool , xioOutputEncoding :: ! String , xioOutputFmt :: ! XIOXoutConfig , xioXmlPi :: ! Bool , xioNoEmptyElemFor :: ! [String] , xioAddDefaultDTD :: ! Bool , xioTextMode :: ! Bool , xioShowTree :: ! Bool , xioShowHaskell :: ! Bool } data XIOXoutConfig = XMLoutput | XHTMLoutput | HTMLoutput | PLAINoutput deriving (Eq) data XIORelaxConfig = XIORxc { xioRelaxValidate :: ! Bool , xioRelaxSchema :: String , xioRelaxCheckRestr :: ! Bool , xioRelaxValidateExtRef :: ! Bool , xioRelaxValidateInclude :: ! Bool , xioRelaxCollectErrors :: ! Bool , xioRelaxValidator :: IOSArrow XmlTree XmlTree } data XIOXmlSchemaConfig = XIOScc { xioXmlSchemaValidate :: ! Bool , xioXmlSchemaSchema :: String , xioXmlSchemaValidator :: IOSArrow XmlTree XmlTree } data XIOCacheConfig = XIOCch { xioBinaryCompression :: CompressionFct , xioBinaryDeCompression :: DeCompressionFct , xioWithCache :: ! Bool , xioCacheDir :: ! String , xioDocumentAge :: ! Int , xioCache404Err :: ! Bool , xioCacheRead :: String -> IOSArrow XmlTree XmlTree , xioStrictDeserialize :: ! Bool } type MimeTypeHandlers = M.Map String (IOSArrow XmlTree XmlTree) type CompressionFct = ByteString -> ByteString type DeCompressionFct = ByteString -> ByteString type SysConfig = XIOSysState -> XIOSysState type SysConfigList = [SysConfig] -- ---------------------------------------- theSysState :: Selector (XIOState us) XIOSysState theSysState = S { getS = xioSysState , setS = \ x s -> s { xioSysState = x} } theUserState :: Selector (XIOState us) us theUserState = S { getS = xioUserState , setS = \ x s -> s { xioUserState = x} } -- ---------------------------------------- theSysWriter :: Selector XIOSysState XIOSysWriter theSysWriter = S { getS = xioSysWriter , setS = \ x s -> s { xioSysWriter = x} } theErrorStatus :: Selector XIOSysState Int theErrorStatus = theSysWriter >>> S { getS = xioErrorStatus , setS = \ x s -> s { xioErrorStatus = x } } theErrorMsgList :: Selector XIOSysState XmlTrees theErrorMsgList = theSysWriter >>> S { getS = xioErrorMsgList , setS = \ x s -> s { xioErrorMsgList = x } } theRelaxNoOfErrors :: Selector XIOSysState Int theRelaxNoOfErrors = theSysWriter >>> S { getS = xioRelaxNoOfErrors , setS = \ x s -> s { xioRelaxNoOfErrors = x} } theRelaxDefineId :: Selector XIOSysState Int theRelaxDefineId = theSysWriter >>> S { getS = xioRelaxDefineId , setS = \ x s -> s { xioRelaxDefineId = x} } theRelaxAttrList :: Selector XIOSysState (AssocList String XmlTrees) theRelaxAttrList = theSysWriter >>> S { getS = xioRelaxAttrList , setS = \ x s -> s { xioRelaxAttrList = x} } -- ---------------------------------------- theSysEnv :: Selector XIOSysState XIOSysEnv theSysEnv = S { getS = xioSysEnv , setS = \ x s -> s { xioSysEnv = x} } theInputConfig :: Selector XIOSysState XIOInputConfig theInputConfig = theSysEnv >>> S { getS = xioInputConfig , setS = \ x s -> s { xioInputConfig = x} } theStrictInput :: Selector XIOSysState Bool theStrictInput = theInputConfig >>> S { getS = xioStrictInput , setS = \ x s -> s { xioStrictInput = x} } theEncodingErrors :: Selector XIOSysState Bool theEncodingErrors = theInputConfig >>> S { getS = xioEncodingErrors , setS = \ x s -> s { xioEncodingErrors = x} } theInputEncoding :: Selector XIOSysState String theInputEncoding = theInputConfig >>> S { getS = xioInputEncoding , setS = \ x s -> s { xioInputEncoding = x} } theHttpHandler :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theHttpHandler = theInputConfig >>> S { getS = xioHttpHandler , setS = \ x s -> s { xioHttpHandler = x} } theInputOptions :: Selector XIOSysState Attributes theInputOptions = theInputConfig >>> S { getS = xioInputOptions , setS = \ x s -> s { xioInputOptions = x} } theRedirect :: Selector XIOSysState Bool theRedirect = theInputConfig >>> S { getS = xioRedirect , setS = \ x s -> s { xioRedirect = x} } theProxy :: Selector XIOSysState String theProxy = theInputConfig >>> S { getS = xioProxy , setS = \ x s -> s { xioProxy = x} } -- ---------------------------------------- theOutputConfig :: Selector XIOSysState XIOOutputConfig theOutputConfig = theSysEnv >>> S { getS = xioOutputConfig , setS = \ x s -> s { xioOutputConfig = x} } theIndent :: Selector XIOSysState Bool theIndent = theOutputConfig >>> S { getS = xioIndent , setS = \ x s -> s { xioIndent = x} } theOutputEncoding :: Selector XIOSysState String theOutputEncoding = theOutputConfig >>> S { getS = xioOutputEncoding , setS = \ x s -> s { xioOutputEncoding = x} } theOutputFmt :: Selector XIOSysState XIOXoutConfig theOutputFmt = theOutputConfig >>> S { getS = xioOutputFmt , setS = \ x s -> s { xioOutputFmt = x} } theXmlPi :: Selector XIOSysState Bool theXmlPi = theOutputConfig >>> S { getS = xioXmlPi , setS = \ x s -> s { xioXmlPi = x} } theNoEmptyElemFor :: Selector XIOSysState [String] theNoEmptyElemFor = theOutputConfig >>> S { getS = xioNoEmptyElemFor , setS = \ x s -> s { xioNoEmptyElemFor = x} } theAddDefaultDTD :: Selector XIOSysState Bool theAddDefaultDTD = theOutputConfig >>> S { getS = xioAddDefaultDTD , setS = \ x s -> s { xioAddDefaultDTD = x} } theTextMode :: Selector XIOSysState Bool theTextMode = theOutputConfig >>> S { getS = xioTextMode , setS = \ x s -> s { xioTextMode = x} } theShowTree :: Selector XIOSysState Bool theShowTree = theOutputConfig >>> S { getS = xioShowTree , setS = \ x s -> s { xioShowTree = x} } theShowHaskell :: Selector XIOSysState Bool theShowHaskell = theOutputConfig >>> S { getS = xioShowHaskell , setS = \ x s -> s { xioShowHaskell = x} } -- ---------------------------------------- theRelaxConfig :: Selector XIOSysState XIORelaxConfig theRelaxConfig = theSysEnv >>> S { getS = xioRelaxConfig , setS = \ x s -> s { xioRelaxConfig = x} } theRelaxValidate :: Selector XIOSysState Bool theRelaxValidate = theRelaxConfig >>> S { getS = xioRelaxValidate , setS = \ x s -> s { xioRelaxValidate = x} } theRelaxSchema :: Selector XIOSysState String theRelaxSchema = theRelaxConfig >>> S { getS = xioRelaxSchema , setS = \ x s -> s { xioRelaxSchema = x} } theRelaxCheckRestr :: Selector XIOSysState Bool theRelaxCheckRestr = theRelaxConfig >>> S { getS = xioRelaxCheckRestr , setS = \ x s -> s { xioRelaxCheckRestr = x} } theRelaxValidateExtRef :: Selector XIOSysState Bool theRelaxValidateExtRef = theRelaxConfig >>> S { getS = xioRelaxValidateExtRef , setS = \ x s -> s { xioRelaxValidateExtRef = x} } theRelaxValidateInclude :: Selector XIOSysState Bool theRelaxValidateInclude = theRelaxConfig >>> S { getS = xioRelaxValidateInclude , setS = \ x s -> s { xioRelaxValidateInclude = x} } theRelaxCollectErrors :: Selector XIOSysState Bool theRelaxCollectErrors = theRelaxConfig >>> S { getS = xioRelaxCollectErrors , setS = \ x s -> s { xioRelaxCollectErrors = x} } theRelaxValidator :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theRelaxValidator = theRelaxConfig >>> S { getS = xioRelaxValidator , setS = \ x s -> s { xioRelaxValidator = x} } -- ---------------------------------------- theXmlSchemaConfig :: Selector XIOSysState XIOXmlSchemaConfig theXmlSchemaConfig = theSysEnv >>> S { getS = xioXmlSchemaConfig , setS = \ x s -> s { xioXmlSchemaConfig = x} } theXmlSchemaValidate :: Selector XIOSysState Bool theXmlSchemaValidate = theXmlSchemaConfig >>> S { getS = xioXmlSchemaValidate , setS = \ x s -> s { xioXmlSchemaValidate = x} } theXmlSchemaSchema :: Selector XIOSysState String theXmlSchemaSchema = theXmlSchemaConfig >>> S { getS = xioXmlSchemaSchema , setS = \ x s -> s { xioXmlSchemaSchema = x} } theXmlSchemaValidator :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theXmlSchemaValidator = theXmlSchemaConfig >>> S { getS = xioXmlSchemaValidator , setS = \ x s -> s { xioXmlSchemaValidator = x} } -- ---------------------------------------- theParseConfig :: Selector XIOSysState XIOParseConfig theParseConfig = theSysEnv >>> S { getS = xioParseConfig , setS = \ x s -> s { xioParseConfig = x} } theErrorMsgHandler :: Selector XIOSysState (String -> IO ()) theErrorMsgHandler = theSysEnv >>> S { getS = xioErrorMsgHandler , setS = \ x s -> s { xioErrorMsgHandler = x } } theErrorMsgCollect :: Selector XIOSysState Bool theErrorMsgCollect = theSysEnv >>> S { getS = xioErrorMsgCollect , setS = \ x s -> s { xioErrorMsgCollect = x } } theBaseURI :: Selector XIOSysState String theBaseURI = theSysEnv >>> S { getS = xioBaseURI , setS = \ x s -> s { xioBaseURI = x } } theDefaultBaseURI :: Selector XIOSysState String theDefaultBaseURI = theSysEnv >>> S { getS = xioDefaultBaseURI , setS = \ x s -> s { xioDefaultBaseURI = x } } theTraceLevel :: Selector XIOSysState Int theTraceLevel = theSysEnv >>> S { getS = xioTraceLevel , setS = \ x s -> s { xioTraceLevel = x } } theTraceCmd :: Selector XIOSysState (Int -> String -> IO ()) theTraceCmd = theSysEnv >>> S { getS = xioTraceCmd , setS = \ x s -> s { xioTraceCmd = x } } theTrace :: Selector XIOSysState (Int, Int -> String -> IO ()) theTrace = theTraceLevel .&&&. theTraceCmd theAttrList :: Selector XIOSysState Attributes theAttrList = theSysEnv >>> S { getS = xioAttrList , setS = \ x s -> s { xioAttrList = x } } theMimeTypes :: Selector XIOSysState MimeTypeTable theMimeTypes = theParseConfig >>> S { getS = xioMimeTypes , setS = \ x s -> s { xioMimeTypes = x } } theMimeTypeHandlers :: Selector XIOSysState MimeTypeHandlers theMimeTypeHandlers = theParseConfig >>> S { getS = xioMimeTypeHandlers , setS = \ x s -> s { xioMimeTypeHandlers = x } } theMimeTypeFile :: Selector XIOSysState String theMimeTypeFile = theParseConfig >>> S { getS = xioMimeTypeFile , setS = \ x s -> s { xioMimeTypeFile = x } } theAcceptedMimeTypes :: Selector XIOSysState [String] theAcceptedMimeTypes = theParseConfig >>> S { getS = xioAcceptedMimeTypes , setS = \ x s -> s { xioAcceptedMimeTypes = x } } theFileMimeType :: Selector XIOSysState String theFileMimeType = theParseConfig >>> S { getS = xioFileMimeType , setS = \ x s -> s { xioFileMimeType = x } } theWarnings :: Selector XIOSysState Bool theWarnings = theParseConfig >>> S { getS = xioWarnings , setS = \ x s -> s { xioWarnings = x } } theRemoveWS :: Selector XIOSysState Bool theRemoveWS = theParseConfig >>> S { getS = xioRemoveWS , setS = \ x s -> s { xioRemoveWS = x } } thePreserveComment :: Selector XIOSysState Bool thePreserveComment = theParseConfig >>> S { getS = xioPreserveComment , setS = \ x s -> s { xioPreserveComment = x } } theParseByMimeType :: Selector XIOSysState Bool theParseByMimeType = theParseConfig >>> S { getS = xioParseByMimeType , setS = \ x s -> s { xioParseByMimeType = x } } theParseHTML :: Selector XIOSysState Bool theParseHTML = theParseConfig >>> S { getS = xioParseHTML , setS = \ x s -> s { xioParseHTML = x } } theLowerCaseNames :: Selector XIOSysState Bool theLowerCaseNames = theParseConfig >>> S { getS = xioLowerCaseNames , setS = \ x s -> s { xioLowerCaseNames = x } } theValidate :: Selector XIOSysState Bool theValidate = theParseConfig >>> S { getS = xioValidate , setS = \ x s -> s { xioValidate = x } } theSubstDTDEntities :: Selector XIOSysState Bool theSubstDTDEntities = theParseConfig >>> S { getS = xioSubstDTDEntities , setS = \ x s -> s { xioSubstDTDEntities = x } } theSubstHTMLEntities :: Selector XIOSysState Bool theSubstHTMLEntities = theParseConfig >>> S { getS = xioSubstHTMLEntities , setS = \ x s -> s { xioSubstHTMLEntities = x } } theCheckNamespaces :: Selector XIOSysState Bool theCheckNamespaces = theParseConfig >>> S { getS = xioCheckNamespaces , setS = \ x s -> s { xioCheckNamespaces = x } } theCanonicalize :: Selector XIOSysState Bool theCanonicalize = theParseConfig >>> S { getS = xioCanonicalize , setS = \ x s -> s { xioCanonicalize = x } } theIgnoreNoneXmlContents :: Selector XIOSysState Bool theIgnoreNoneXmlContents = theParseConfig >>> S { getS = xioIgnoreNoneXmlContents , setS = \ x s -> s { xioIgnoreNoneXmlContents = x } } theTagSoup :: Selector XIOSysState Bool theTagSoup = theParseConfig >>> S { getS = xioTagSoup , setS = \ x s -> s { xioTagSoup = x } } theTagSoupParser :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theTagSoupParser = theParseConfig >>> S { getS = xioTagSoupParser , setS = \ x s -> s { xioTagSoupParser = x } } theExpat :: Selector XIOSysState Bool theExpat = theParseConfig >>> S { getS = xioExpat , setS = \ x s -> s { xioExpat = x } } theExpatParser :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theExpatParser = theParseConfig >>> S { getS = xioExpatParser , setS = \ x s -> s { xioExpatParser = x } } theExpatErrors :: Selector XIOSysState (IOSArrow XmlTree XmlTree) theExpatErrors = theSysWriter >>> S { getS = xioExpatErrors , setS = \ x s -> s { xioExpatErrors = x } } -- ---------------------------------------- theCacheConfig :: Selector XIOSysState XIOCacheConfig theCacheConfig = theSysEnv >>> S { getS = xioCacheConfig , setS = \ x s -> s { xioCacheConfig = x} } theBinaryCompression :: Selector XIOSysState (ByteString -> ByteString) theBinaryCompression = theCacheConfig >>> S { getS = xioBinaryCompression , setS = \ x s -> s { xioBinaryCompression = x} } theBinaryDeCompression :: Selector XIOSysState (ByteString -> ByteString) theBinaryDeCompression = theCacheConfig >>> S { getS = xioBinaryDeCompression , setS = \ x s -> s { xioBinaryDeCompression = x} } theWithCache :: Selector XIOSysState Bool theWithCache = theCacheConfig >>> S { getS = xioWithCache , setS = \ x s -> s { xioWithCache = x} } theCacheDir :: Selector XIOSysState String theCacheDir = theCacheConfig >>> S { getS = xioCacheDir , setS = \ x s -> s { xioCacheDir = x} } theDocumentAge :: Selector XIOSysState Int theDocumentAge = theCacheConfig >>> S { getS = xioDocumentAge , setS = \ x s -> s { xioDocumentAge = x} } theCache404Err :: Selector XIOSysState Bool theCache404Err = theCacheConfig >>> S { getS = xioCache404Err , setS = \ x s -> s { xioCache404Err = x} } theCacheRead :: Selector XIOSysState (String -> IOSArrow XmlTree XmlTree) theCacheRead = theCacheConfig >>> S { getS = xioCacheRead , setS = \ x s -> s { xioCacheRead = x} } theStrictDeserialize :: Selector XIOSysState Bool theStrictDeserialize = theCacheConfig >>> S { getS = xioStrictDeserialize , setS = \ x s -> s { xioStrictDeserialize = x} } -- ------------------------------------------------------------ getSysVar :: Selector XIOSysState c -> IOStateArrow s b c getSysVar sel = IOSLA $ \ s _x -> return (s, (:[]) . getS (theSysState >>> sel) $ s) setSysVar :: Selector XIOSysState c -> IOStateArrow s c c setSysVar sel = (\ v -> configSysVar $ setS sel v) $< this chgSysVar :: Selector XIOSysState c -> (b -> c -> c) -> IOStateArrow s b b chgSysVar sel op = (\ v -> configSysVar $ chgS sel (op v)) $< this configSysVar :: SysConfig -> IOStateArrow s c c configSysVar cf = IOSLA $ \ s v -> return (chgS theSysState cf s, [v]) configSysVars :: SysConfigList -> IOStateArrow s c c configSysVars cfs = configSysVar $ foldr (>>>) id $ cfs localSysVar :: Selector XIOSysState c -> IOStateArrow s a b -> IOStateArrow s a b localSysVar sel f = IOSLA $ \ s0 v -> let sel' = theSysState >>> sel in let c0 = getS sel' s0 in do (s1, res) <- runIOSLA f s0 v return (setS sel' c0 s1, res) localSysEnv :: IOStateArrow s a b -> IOStateArrow s a b localSysEnv = localSysVar theSysEnv incrSysVar :: Selector XIOSysState Int -> IOStateArrow s a Int incrSysVar cnt = getSysVar cnt >>> arr (+1) >>> setSysVar cnt >>> arr (\ x -> x - 1) -- ------------------------------ -- | store a string in global state under a given attribute name setSysAttr :: String -> IOStateArrow s String String setSysAttr n = chgSysVar theAttrList (addEntry n) -- | remove an entry in global state, arrow input remains unchanged unsetSysAttr :: String -> IOStateArrow s b b unsetSysAttr n = configSysVar $ chgS theAttrList (delEntry n) -- | read an attribute value from global state getSysAttr :: String -> IOStateArrow s b String getSysAttr n = getSysVar theAttrList >>^ lookup1 n -- | read all attributes from global state getAllSysAttrs :: IOStateArrow s b Attributes getAllSysAttrs = getSysVar theAttrList setSysAttrString :: String -> String -> IOStateArrow s b b setSysAttrString n v = perform ( constA v >>> setSysAttr n ) -- | store an int value in global state setSysAttrInt :: String -> Int -> IOStateArrow s b b setSysAttrInt n v = setSysAttrString n (show v) -- | read an int value from global state -- -- > getSysAttrInt 0 myIntAttr getSysAttrInt :: Int -> String -> IOStateArrow s b Int getSysAttrInt def n = getSysAttr n >>^ toInt def toInt :: Int -> String -> Int toInt def s | not (null s) && all isDigit s = read s | otherwise = def -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/MimeTypeTable.hs0000644000000000000000000000431712465166667021477 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.MimeTypeTable Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the mime type configuration functions -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.MimeTypeTable where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIO import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------------------------------------ -- | set the table mapping of file extensions to mime types in the system state -- -- Default table is defined in 'Text.XML.HXT.DOM.MimeTypeDefaults'. -- This table is used when reading loacl files, (file: protocol) to determine the mime type setMimeTypeTable :: MimeTypeTable -> IOStateArrow s b b setMimeTypeTable mtt = configSysVar $ setS (theMimeTypes .&&&. theMimeTypeFile) (mtt, "") -- | set the table mapping of file extensions to mime types by an external config file -- -- The config file must follow the conventions of /etc/mime.types on a debian linux system, -- that means all empty lines and all lines starting with a # are ignored. The other lines -- must consist of a mime type followed by a possible empty list of extensions. -- The list of extenstions and mime types overwrites the default list in the system state -- of the IOStateArrow setMimeTypeTableFromFile :: FilePath -> IOStateArrow s b b setMimeTypeTableFromFile file = configSysVar $ setS theMimeTypeFile file -- | read the system mimetype table getMimeTypeTable :: IOStateArrow s b MimeTypeTable getMimeTypeTable = getMime $< getSysVar (theMimeTypes .&&&. theMimeTypeFile) where getMime (mtt, "") = constA mtt getMime (_, mtf) = perform (setMimeTypeTable $< arrIO0 ( readMimeTypeTable mtf)) >>> getMimeTypeTable -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/URIHandling.hs0000644000000000000000000002107312465166667021100 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.URIHandling Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable the basic state arrows for URI handling -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.URIHandling where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Monad ( mzero , mplus ) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlState.TraceHandling import Data.Maybe import Network.URI ( URI , escapeURIChar , isUnescapedInURI , nonStrictRelativeTo , parseURIReference , uriAuthority , uriFragment , uriPath , uriPort , uriQuery , uriRegName , uriScheme , uriUserInfo ) import System.Directory ( getCurrentDirectory ) -- ------------------------------------------------------------ -- | set the base URI of a document, used e.g. for reading includes, e.g. external entities, -- the input must be an absolute URI setBaseURI :: IOStateArrow s String String setBaseURI = setSysVar theBaseURI >>> traceValue 2 (("setBaseURI: new base URI is " ++) . show) -- | read the base URI from the globale state getBaseURI :: IOStateArrow s b String getBaseURI = getSysVar theBaseURI >>> ( ( getDefaultBaseURI >>> setBaseURI >>> getBaseURI ) `when` isA null -- set and get it, if not yet done ) -- | change the base URI with a possibly relative URI, can be used for -- evaluating the xml:base attribute. Returns the new absolute base URI. -- Fails, if input is not parsable with parseURIReference -- -- see also: 'setBaseURI', 'mkAbsURI' changeBaseURI :: IOStateArrow s String String changeBaseURI = mkAbsURI >>> setBaseURI -- | set the default base URI, if parameter is null, the system base (@ file:\/\/\/\\/ @) is used, -- else the parameter, must be called before any document is read setDefaultBaseURI :: String -> IOStateArrow s b String setDefaultBaseURI base = ( if null base then arrIO getDir else constA base ) >>> setSysVar theDefaultBaseURI >>> traceValue 2 (("setDefaultBaseURI: new default base URI is " ++) . show) where getDir _ = do cwd <- getCurrentDirectory return ("file://" ++ normalize cwd ++ "/") -- under Windows getCurrentDirectory returns something like: "c:\path\to\file" -- backslaches are not allowed in URIs and paths must start with a / -- so this is transformed into "/c:/path/to/file" normalize wd'@(d : ':' : _) | d `elem` ['A'..'Z'] || d `elem` ['a'..'z'] = '/' : concatMap win32ToUriChar wd' normalize wd' = concatMap escapeNonUriChar wd' win32ToUriChar '\\' = "/" win32ToUriChar c = escapeNonUriChar c escapeNonUriChar c = escapeURIChar isUnescapedInURI c -- from Network.URI -- | get the default base URI getDefaultBaseURI :: IOStateArrow s b String getDefaultBaseURI = getSysVar theDefaultBaseURI -- read default uri in system state >>> ( ( setDefaultBaseURI "" -- set the default uri in system state >>> getDefaultBaseURI ) `when` isA null ) -- when uri not yet set -- ------------------------------------------------------------ -- | remember base uri, run an arrow and restore the base URI, used with external entity substitution runInLocalURIContext :: IOStateArrow s b c -> IOStateArrow s b c runInLocalURIContext f = localSysVar theBaseURI f -- ---------------------------------------------------------- -- | parse a URI reference, in case of a failure, -- try to escape unescaped chars, convert backslashes to slashes for windows paths, -- and try parsing again parseURIReference' :: String -> Maybe URI parseURIReference' uri = parseURIReference uri `mplus` ( if unesc then parseURIReference uri' else mzero ) where unesc = not . all isUnescapedInURI $ uri escape '\\' = "/" escape c = escapeURIChar isUnescapedInURI c uri' = concatMap escape uri -- | compute the absolut URI for a given URI and a base URI expandURIString :: String -> String -> Maybe String expandURIString uri base = do base' <- parseURIReference' base uri' <- parseURIReference' uri -- abs' <- nonStrictRelativeTo uri' base' let abs' = nonStrictRelativeTo uri' base' return $ show abs' -- | arrow variant of 'expandURIString', fails if 'expandURIString' returns Nothing expandURI :: ArrowXml a => a (String, String) String expandURI = arrL (maybeToList . uncurry expandURIString) -- | arrow for expanding an input URI into an absolute URI using global base URI, fails if input is not a legal URI mkAbsURI :: IOStateArrow s String String mkAbsURI = ( this &&& getBaseURI ) >>> expandURI -- | arrow for selecting the scheme (protocol) of the URI, fails if input is not a legal URI. -- -- See Network.URI for URI components getSchemeFromURI :: ArrowList a => a String String getSchemeFromURI = getPartFromURI scheme where scheme = init . uriScheme -- | arrow for selecting the registered name (host) of the URI, fails if input is not a legal URI getRegNameFromURI :: ArrowList a => a String String getRegNameFromURI = getPartFromURI host where host = maybe "" uriRegName . uriAuthority -- | arrow for selecting the port number of the URI without leading \':\', fails if input is not a legal URI getPortFromURI :: ArrowList a => a String String getPortFromURI = getPartFromURI port where port = dropWhile (==':') . maybe "" uriPort . uriAuthority -- | arrow for selecting the user info of the URI without trailing \'\@\', fails if input is not a legal URI getUserInfoFromURI :: ArrowList a => a String String getUserInfoFromURI = getPartFromURI ui where ui = reverse . dropWhile (=='@') . reverse . maybe "" uriUserInfo . uriAuthority -- | arrow for computing the path component of an URI, fails if input is not a legal URI getPathFromURI :: ArrowList a => a String String getPathFromURI = getPartFromURI uriPath -- | arrow for computing the query component of an URI, fails if input is not a legal URI getQueryFromURI :: ArrowList a => a String String getQueryFromURI = getPartFromURI uriQuery -- | arrow for computing the fragment component of an URI, fails if input is not a legal URI getFragmentFromURI :: ArrowList a => a String String getFragmentFromURI = getPartFromURI uriFragment -- | arrow for computing the path component of an URI, fails if input is not a legal URI getPartFromURI :: ArrowList a => (URI -> String) -> a String String getPartFromURI sel = arrL (maybeToList . getPart) where getPart s = do uri <- parseURIReference' s return (sel uri) -- ------------------------------------------------------------ hxt-9.3.1.15/src/Text/XML/HXT/Arrow/XmlState/SystemConfig.hs0000644000000000000000000002510412465166667021405 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlState.SystemConfig Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable system configuration and common options options -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlState.SystemConfig where import Control.Arrow import Data.Map ( insert ) import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlState.ErrorHandling import Text.XML.HXT.Arrow.XmlState.TypeDefs -- ------------------------------ -- config options -- | @withTace level@ : system option, set the trace level, (0..4) withTrace :: Int -> SysConfig withTrace = setS theTraceLevel -- | @withSysAttr key value@ : store an arbitarty key value pair in system state withSysAttr :: String -> String -> SysConfig withSysAttr n v = chgS theAttrList (addEntry n v) -- | Specify the set of accepted mime types. -- -- All contents of documents for which the mime type is not found in this list -- are discarded. withAcceptedMimeTypes :: [String] -> SysConfig withAcceptedMimeTypes = setS theAcceptedMimeTypes -- | Specify a content handler for documents of a given mime type withMimeTypeHandler :: String -> IOSArrow XmlTree XmlTree -> SysConfig withMimeTypeHandler mt pa = chgS theMimeTypeHandlers $ insert mt pa -- | @withMimeTypeFile filename@ : input option, -- set the mime type table for @file:@ documents by given file. -- The format of this config file must be in the syntax of a debian linux \"mime.types\" config file withMimeTypeFile :: String -> SysConfig withMimeTypeFile = setS theMimeTypeFile -- | Force a given mime type for all file contents. -- -- The mime type for file access will then not be computed by looking into a mime.types file withFileMimeType :: String -> SysConfig withFileMimeType = setS theFileMimeType -- | @withWarnings yes/no@ : system option, issue warnings during reading, HTML parsing and processing, -- default is 'yes' withWarnings :: Bool -> SysConfig withWarnings = setS theWarnings -- | @withErrors yes/no@ : system option for suppressing error messages, default is 'no' withErrors :: Bool -> SysConfig withErrors b = setS theErrorMsgHandler h where h | b = errorOutputToStderr | otherwise = const $ return () -- | @withRemoveWS yes/no@ : read and write option, remove all whitespace, used for document indentation, default is 'no' withRemoveWS :: Bool -> SysConfig withRemoveWS = setS theRemoveWS -- | @withPreserveComment yes/no@ : read option, preserve comments during canonicalization, default is 'no' withPreserveComment :: Bool -> SysConfig withPreserveComment = setS thePreserveComment -- | @withParseByMimeType yes/no@ : read option, select the parser by the mime type of the document -- (pulled out of the HTTP header). -- -- When the mime type is set to \"text\/html\" -- the configured HTML parser is taken, when it\'s set to -- \"text\/xml\" or \"text\/xhtml\" the configured XML parser is taken. -- If the mime type is something else, no further processing is performed, -- the contents is given back to the application in form of a single text node. -- If the default document encoding is set to isoLatin1, this even enables processing -- of arbitray binary data. withParseByMimeType :: Bool -> SysConfig withParseByMimeType = setS theParseByMimeType -- | @withParseHTML yes/no@: read option, use HTML parser, default is 'no' (use XML parser) withParseHTML :: Bool -> SysConfig withParseHTML = setS theParseHTML -- | @withValidate yes/no@: read option, validate document against DTD, default is 'yes' withValidate :: Bool -> SysConfig withValidate = setS theValidate -- | @withSubstDTDEntities yes/no@: read option, substitute general entities defined in DTD, default is 'yes'. -- switching this option and the validate option off can lead to faster parsing, because then -- there is no need to access the DTD withSubstDTDEntities :: Bool -> SysConfig withSubstDTDEntities = setS theSubstDTDEntities -- | @withSubstHTMLEntities yes/no@: read option, substitute general entities defined in HTML DTD, default is 'no'. -- switching this option on and the substDTDEntities and validate options off can lead to faster parsing -- because there is no need to access a DTD, but still the HTML general entities are substituted withSubstHTMLEntities :: Bool -> SysConfig withSubstHTMLEntities = setS theSubstHTMLEntities -- | @withCheckNamespaces yes/no@: read option, check namespaces, default is 'no' withCheckNamespaces :: Bool -> SysConfig withCheckNamespaces = setS theCheckNamespaces -- | @withCanonicalize yes/no@ : read option, canonicalize document, default is 'yes' withCanonicalize :: Bool -> SysConfig withCanonicalize = setS theCanonicalize -- | @withIgnoreNoneXmlContents yes\/no@ : input option, ignore document contents of none XML\/HTML documents. -- -- This option can be useful for implementing crawler like applications, e.g. an URL checker. -- In those cases net traffic can be reduced. withIgnoreNoneXmlContents :: Bool -> SysConfig withIgnoreNoneXmlContents = setS theIgnoreNoneXmlContents -- ------------------------------------------------------------ -- | @withStrictInput yes/no@ : input option, input of file and HTTP contents is read eagerly, default is 'no' withStrictInput :: Bool -> SysConfig withStrictInput = setS theStrictInput -- | @withEncodingErrors yes/no@ : input option, ignore all encoding errors, default is 'no' withEncodingErrors :: Bool -> SysConfig withEncodingErrors = setS theEncodingErrors -- | @withInputEncoding encodingName@ : input option -- -- Set default document encoding ('utf8', 'isoLatin1', 'usAscii', 'iso8859_2', ... , 'iso8859_16', ...). -- Only XML, HTML and text documents are decoded, -- default decoding for XML\/HTML is utf8, for text iso latin1 (no decoding). withInputEncoding :: String -> SysConfig withInputEncoding = setS theInputEncoding -- | @withDefaultBaseURI URI@ , input option, set the default base URI -- -- This option can be useful when parsing documents from stdin or contained in a string, and interpreting -- relative URIs within the document withDefaultBaseURI :: String -> SysConfig withDefaultBaseURI = setS theDefaultBaseURI withInputOption :: String -> String -> SysConfig withInputOption n v = chgS theInputOptions (addEntry n v) withInputOptions :: Attributes -> SysConfig withInputOptions = foldr (>>>) id . map (uncurry withInputOption) -- | @withRedirect yes/no@ : input option, automatically follow redirected URIs, default is 'yes' withRedirect :: Bool -> SysConfig withRedirect = setS theRedirect -- | @withProxy \"host:port\"@ : input option, configure a proxy for HTTP access, e.g. www-cache:3128 withProxy :: String -> SysConfig withProxy = setS theProxy -- ------------------------------------------------------------ -- | @withIndent yes/no@ : output option, indent document before output, default is 'no' withIndent :: Bool -> SysConfig withIndent = setS theIndent -- | @withOutputEncoding encoding@ , output option, -- default is the default input encoding or utf8, if input encoding is not set withOutputEncoding :: String -> SysConfig withOutputEncoding = setS theOutputEncoding -- | @withOutputXML@ : output option, default writing -- -- Default is writing XML: quote special XML chars \>,\<,\",\',& where neccessary, -- add XML processing instruction -- and encode document with respect to 'withOutputEncoding' withOutputXML :: SysConfig withOutputXML = setS theOutputFmt XMLoutput -- | Write XHTML: quote all special XML chars, use HTML entity refs or char refs for none ASCII chars withOutputHTML :: SysConfig withOutputHTML = setS theOutputFmt HTMLoutput -- | Write XML: quote only special XML chars, don't substitute chars by HTML entities, -- and don\'t generate empty elements for HTML elements, -- which may contain any contents, e.g. @@ instead of @