hxt-curl-9.1.1/0000755000000000000000000000000011701313554011456 5ustar0000000000000000hxt-curl-9.1.1/hxt-curl.cabal0000644000000000000000000000167011701313554014214 0ustar0000000000000000-- arch-tag: Haskell XML Toolbox main description file Name: hxt-curl Version: 9.1.1 Synopsis: LibCurl interface for HXT Description: LibCurl interface for HXT License: OtherLicense License-file: LICENSE Author: Uwe Schmidt Maintainer: Uwe Schmidt Stability: Stable Category: XML Homepage: http://www.fh-wedel.de/~si/HXmlToolbox/index.html Copyright: Copyright (c) 2010 Uwe Schmidt Build-type: Simple Cabal-version: >=1.6 library exposed-modules: Text.XML.HXT.Curl other-modules: Text.XML.HXT.Arrow.LibCurlInput, Text.XML.HXT.IO.GetHTTPLibCurl hs-source-dirs: src ghc-options: -Wall ghc-prof-options: -auto-all -caf-all extensions: build-depends: base >= 4 && < 5, parsec >= 2.1 && < 4, bytestring >= 0.9 && < 1, curl >= 1.3 && < 2, hxt >= 9.1 && < 10 hxt-curl-9.1.1/Setup.hs0000644000000000000000000000011011701313554013102 0ustar0000000000000000#!/usr/bin/env runhaskell import Distribution.Simple main = defaultMain hxt-curl-9.1.1/LICENSE0000644000000000000000000000212011701313554012456 0ustar0000000000000000The MIT License Copyright (c) 2005 Uwe Schmidt, Martin Schmidt, Torben Kuseler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hxt-curl-9.1.1/src/0000755000000000000000000000000011701313554012245 5ustar0000000000000000hxt-curl-9.1.1/src/Text/0000755000000000000000000000000011701313554013171 5ustar0000000000000000hxt-curl-9.1.1/src/Text/XML/0000755000000000000000000000000011701313554013631 5ustar0000000000000000hxt-curl-9.1.1/src/Text/XML/HXT/0000755000000000000000000000000011701313554014274 5ustar0000000000000000hxt-curl-9.1.1/src/Text/XML/HXT/Curl.hs0000644000000000000000000000110311701313554015530 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Curl Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable libcurl input -} -- ------------------------------------------------------------ module Text.XML.HXT.Curl ( getLibCurlContents , a_use_curl , withCurl , curlOptions ) where import Text.XML.HXT.Arrow.LibCurlInput -- ---------------------------------------------------------- hxt-curl-9.1.1/src/Text/XML/HXT/Arrow/0000755000000000000000000000000011701313554015366 5ustar0000000000000000hxt-curl-9.1.1/src/Text/XML/HXT/Arrow/LibCurlInput.hs0000644000000000000000000000647311701313554020310 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.LibCurlInput Copyright : Copyright (C) 2005 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable libcurl input -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.LibCurlInput ( getLibCurlContents , a_use_curl , withCurl , curlOptions ) where import Control.Arrow -- arrow classes import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ArrowIO import qualified Data.ByteString.Lazy as B -- import qualified Data.ByteString.Lazy.Char8 as C import System.Console.GetOpt import Text.XML.HXT.Arrow.DocumentInput ( addInputError ) import qualified Text.XML.HXT.IO.GetHTTPLibCurl as LibCURL import Text.XML.HXT.DOM.Interface import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.XmlOptions ( a_proxy , a_redirect ) -- ---------------------------------------------------------- getLibCurlContents :: IOSArrow XmlTree XmlTree getLibCurlContents = getC $<< ( getAttrValue transferURI &&& getSysVar (theInputOptions .&&&. theRedirect .&&&. theProxy .&&&. theStrictInput ) ) where getC uri (options, (redirect, (proxy, strictInput))) = applyA ( ( traceMsg 2 ( "get HTTP via libcurl, uri=" ++ show uri ++ " options=" ++ show options' ) >>> arrIO0 ( LibCURL.getCont strictInput options' uri ) ) >>> ( arr (uncurry addInputError) ||| arr addContent ) ) where options' = (a_proxy, proxy) : (a_redirect, show . fromEnum $ redirect) : options addContent :: (Attributes, B.ByteString) -> IOSArrow XmlTree XmlTree addContent (al, bc) = replaceChildren (blb bc) -- add the contents >>> seqA (map (uncurry addAttr) al) -- add the meta info (HTTP headers, ...) -- ------------------------------------------------------------ a_use_curl :: String a_use_curl = "use-curl" withCurl :: Attributes -> SysConfig withCurl curlOpts = setS theHttpHandler getLibCurlContents >>> withInputOptions curlOpts curlOptions :: [OptDescr SysConfig] curlOptions = [ Option "" [a_use_curl] (NoArg (withCurl [])) "enable HTTP input with libcurl" ] -- ------------------------------------------------------------ hxt-curl-9.1.1/src/Text/XML/HXT/IO/0000755000000000000000000000000011701313554014603 5ustar0000000000000000hxt-curl-9.1.1/src/Text/XML/HXT/IO/GetHTTPLibCurl.hs0000644000000000000000000002640311701313554017640 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.IO.GetHTTPLibCurl Copyright : Copyright (C) 2008 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : stable Portability: portable GET for http access with libcurl -} -- ------------------------------------------------------------ module Text.XML.HXT.IO.GetHTTPLibCurl ( getCont ) where import Control.Arrow ( first , (>>>) ) import Control.Concurrent.MVar import Control.Monad ( when ) import qualified Data.ByteString.Lazy as B import Data.Char ( isDigit , isSpace ) import Data.List ( isPrefixOf ) import Network.Curl import System.IO import System.IO.Unsafe ( unsafePerformIO ) import Text.ParserCombinators.Parsec ( parse ) import Text.XML.HXT.DOM.Util ( stringToLower ) import Text.XML.HXT.DOM.XmlKeywords ( transferStatus , transferMessage , transferVersion , httpPrefix ) import Text.XML.HXT.Arrow.XmlOptions ( a_proxy , a_redirect , a_if_modified_since , a_if_unmodified_since ) import Text.XML.HXT.Parser.ProtocolHandlerUtil ( parseContentType ) import Text.XML.HXT.Version -- ------------------------------------------------------------ -- -- the global flag for initializing curl in the 1. call -- this is a hack, but until now no better solution found isInitCurl :: MVar Bool isInitCurl = unsafePerformIO $ newMVar False {-# NOINLINE isInitCurl #-} initCurl :: IO () initCurl = do i <- takeMVar isInitCurl when (not i) ( curl_global_init 3 >> return () ) putMVar isInitCurl True -- ------------------------------------------------------------ -- The curl lib is not thread save curlResource :: MVar () curlResource = unsafePerformIO $ newMVar () {-# NOINLINE curlResource #-} requestCurl :: IO () requestCurl = takeMVar curlResource releaseCurl :: IO () releaseCurl = putMVar curlResource () -- ------------------------------------------------------------ -- -- the http protocol handler implemented by calling libcurl -- () -- via the curl binding -- -- This function tries to support mostly all curl options concerning HTTP requests. -- The naming convetion is as follows: A curl option must be prefixed by the string -- \"curl\" and then written exactly as described in the curl man page -- (). -- -- Example: -- -- > getCont [("--user-agent","My first HXT app"),("-e","http://the.referer.url/")] "http://..." -- -- will set the user agent and the referer URL for this request. getCont :: Bool -> [(String, String)] -> String -> IO (Either ([(String, String)], String) ([(String, String)], B.ByteString)) getCont strictInput options uri = do initCurl requestCurl resp <- curlGetResponse_ uri curlOptions let resp' = evalResponse resp resp' `seq` releaseCurl -- dumpResponse return resp' where _dumpResponse r = do hPutStrLn stderr $ show $ respCurlCode r hPutStrLn stderr $ show $ respStatus r hPutStrLn stderr $ respStatusLine r hPutStrLn stderr $ show $ respHeaders r hPutStrLn stderr $ respBody r curlOptions = defaultOptions ++ concatMap (uncurry copt) options ++ standardOptions defaultOptions -- these options may be overwritten = [ CurlUserAgent ("hxt/" ++ hxt_version ++ " via libcurl") , CurlFollowLocation True ] standardOptions -- these options can't be overwritten = [ CurlFailOnError False , CurlHeader False , CurlNoProgress True ] evalResponse r | rc /= CurlOK = Left ( [ mkH transferStatus "999" , mkH transferMessage $ "curl library rc: " ++ show rc ] , "curl library error when requesting URI " ++ show uri ++ ": (curl return code=" ++ show rc ++ ") " ) | rs < 200 && rs >= 300 = Left ( contentT rsh ++ headers , "http error when accessing URI " ++ show uri ++ ": " ++ show rsl ) | otherwise = Right ( contentT rsh ++ headers , body ) where body :: B.ByteString body | strictInput = B.length body' `seq` body' | otherwise = body' where body' = respBody r mkH x y = (x, dropWhile isSpace y) headers = map (\ (k, v) -> mkH (httpPrefix ++ stringToLower k) v) rsh ++ statusLine (words rsl) contentT = map (first stringToLower) -- all header names to lowercase >>> filter ((== "content-type") . fst) -- select content-type header >>> reverse -- when libcurl is called with automatic redirects, >>> -- there are more than one content-type headers take 1 -- take the last one, (if at leat one is found) >>> map snd -- select content-type value >>> map ( either (const []) id . parse parseContentType "" -- parse the content-type for mimetype and charset ) >>> concat statusLine (vers : _code : msg) -- the status line of the curl response can be an old one, -- e.g. in the case of a redirect, = [ mkH transferVersion vers -- so the return code is taken from that status field, , mkH transferMessage $ unwords msg -- which is contains the last status , mkH transferStatus $ show rs ] statusLine _ = [] rc = respCurlCode r rs = respStatus r rsl = respStatusLine r rsh = respHeaders r -- ------------------------------------------------------------ copt :: String -> String -> [CurlOption] copt k v | "curl-" `isPrefixOf` k = copt (drop 4 k) v -- throw away curl prefix | "--" `isPrefixOf` k = opt2copt (drop 2 k) v | k `elem` [ a_proxy , a_redirect] = opt2copt k v | otherwise = opt2copt k v opt2copt :: String -> String -> [CurlOption] opt2copt k v | k `elem` ["-A", "user-agent"] = [CurlUserAgent v] | k `elem` ["-b", "cookie"] = [CurlCookie v] | k == "connect-timeout" && isIntArg v = [CurlConnectTimeout $ read v] | k == "crlf" = [CurlCRLF $ isTrue v] | k `elem` ["-d", "data"] = [CurlPostFields $ lines v] | k `elem` ["-e", "referer"] = [CurlReferer v] | k `elem` ["-H", "header"] = [CurlHttpHeaders $ lines v] | k == "ignore-content-length" = [CurlIgnoreContentLength $ isTrue v] | k `elem` ["-I", "head"] = [CurlNoBody $ isTrue v] | k `elem` [ "-L" , "location" , a_redirect ] = [CurlFollowLocation $ isTrue v] | k == "max-filesize" && isIntArg v = [CurlMaxFileSizeLarge $ read v] | k `elem` ["-m", "max-time"] && isIntArg v = [CurlTimeoutMS $ read v] | k `elem` ["-n", "netrc"] = [CurlNetrcFile v] | k `elem` ["ssl-verify-peer"] && isIntArg v = [CurlSSLVerifyPeer $ read v] | k `elem` ["-R", "remote-time"] = [CurlFiletime $ isTrue v] | k `elem` ["-u", "user"] = [CurlUserPwd v] | k `elem` ["-U", "proxy-user"] = [CurlProxyUserPwd v] | k `elem` [ "-x" , "proxy" , a_proxy ] = proxyOptions | k `elem` ["-X", "request"] = [CurlCustomRequest v] | k `elem` ["-y", "speed-time"] && isIntArg v = [CurlLowSpeedTime $ read v] | k `elem` ["-Y", "speed-limit"] && isIntArg v = [CurlLowSpeed $ read v] | k == a_if_modified_since = [CurlHttpHeaders $ ["If-Modified-Since: " ++ v] ] | k == a_if_unmodified_since = [CurlHttpHeaders $ ["If-Unmodified-Since: " ++ v] ] -- CurlTimeValue seems to be buggy, therefore the above workaround | k `elem` [ "-z" , "time-cond" , a_if_modified_since ] = ifModifiedOptions | k == "max-redirs" && isIntArg v = [CurlMaxRedirs $ read v] | k `elem` ["-0", "http1.0"] = [CurlHttpVersion HttpVersion10] | otherwise = [] where ifModifiedOptions | "-" `isPrefixOf` v && isIntArg v' = [CurlTimeCondition TimeCondIfUnmodSince ,CurlTimeValue $ read v' ] | isIntArg v = [CurlTimeCondition TimeCondIfModSince ,CurlTimeValue $ read v' ] | otherwise = [] where v' = tail v proxyOptions = [ CurlProxyPort pport , CurlProxy phost ] where pport | isIntArg ppp = read ppp | otherwise = 1080 (phost, pp) = span (/=':') v ppp = drop 1 pp isTrue :: String -> Bool isTrue s = null s || (s `elem` ["1", "True", "true", "Yes", "yes"]) isIntArg :: String -> Bool isIntArg s = not (null s) && all isDigit s -- ------------------------------------------------------------