download-curl-0.1.4/0000755000000000000000000000000011725425660012464 5ustar0000000000000000download-curl-0.1.4/Setup.lhs0000644000000000000000000000011411725425660014270 0ustar0000000000000000#!/usr/bin/env runhaskell > import Distribution.Simple > main = defaultMain download-curl-0.1.4/download-curl.cabal0000644000000000000000000000312511725425660016223 0ustar0000000000000000name: download-curl version: 0.1.4 homepage: http://code.haskell.org/~dons/code/download-curl synopsis: High-level file download based on URLs description: High-level file download based on URLs . Download web content as strict or lazy bytestringrs, strings, HTML tags, XML, RSS or Atom feeds or JSON, using the curl network library. . Importing the library: . > import Network.Curl.Download . Loading a webpage as a "ByteString": . > doc <- openURI "http://haskell.org" . Loading from a file: . > doc <- openURI "file:///tmp/A.hs" . Loading a HTML page as a list of tags: . > tags <- openAsTags "http://haskell.org" . Loading a HTML page as XML: . > tags <- openAsXML "http://haskell.org" . Loading an RSS or Atom feed: . > feed <- openAsFeed "http://haskell.org" . These data types can the be processed further with the XML, Feed and TagSoup libraries. . category: Network license: BSD3 license-file: LICENSE copyright: (c) 2008, Don Stewart author: Don Stewart maintainer: Don Stewart cabal-version: >= 1.2.0 build-type: Simple tested-with: GHC ==6.8.2 flag small_base description: Build with new smaller base library library exposed-modules: Network.Curl.Download Network.Curl.Download.Lazy ghc-options: -Wall -fno-warn-unused-do-bind if flag(small_base) build-depends: base >= 3 && < 6, bytestring else build-depends: base > 3 && < 4 build-depends: curl, tagsoup >= 0.8 && < 0.13, feed, xml download-curl-0.1.4/LICENSE0000644000000000000000000000236211725425660013474 0ustar0000000000000000Copyright (c) 2008 Don Stewart All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. download-curl-0.1.4/Network/0000755000000000000000000000000011725425660014115 5ustar0000000000000000download-curl-0.1.4/Network/Curl/0000755000000000000000000000000011725425660015022 5ustar0000000000000000download-curl-0.1.4/Network/Curl/Download.hs0000644000000000000000000001121311725425660017123 0ustar0000000000000000 -------------------------------------------------------------------- -- | -- Module : Network.Curl.Download -- Copyright : (c) Don Stewart -- License : BSD3 -- -- Maintainer: Don Stewart -- Stability : provisional -- Portability: posix -- -- A binding to curl, an efficient, high level library for -- retrieving files using Uniform Resource Locators (URLs). -- -- Content may be retrieved as a strings, "ByteString" or parsed -- as HTML tags, XML or RSS and Atom feeds. -- -- Error handling is encapsulated in the "Either" type. -- -------------------------------------------------------------------- module Network.Curl.Download ( -- * The basic interface to network content openURI , openURIString -- * Parsers for common formats , openAsTags , openAsXML , openAsFeed -- * A lower level interface , openURIWithOpts ) where import Network.Curl import Foreign import Data.IORef import qualified Data.ByteString.Internal as S import qualified Data.ByteString.Char8 as Char8 -- Parsers import qualified Text.HTML.TagSoup as TagSoup import qualified Text.XML.Light as XML import qualified Text.Feed.Import as Feed import qualified Text.Feed.Types as Feed ------------------------------------------------------------------------ -- | Download content specified by a url using curl, returning the -- content as a strict "ByteString". -- -- If an error occurs, "Left" is returned, with a -- protocol-specific error string. -- -- Examples: -- -- > openURI "http://haskell.org" -- openURI :: String -> IO (Either String S.ByteString) openURI s = openURIWithOpts [] s -- | Like openURI, but returns the result as a 'String' -- -- Examples: -- -- > openURIString "http://haskell.org" -- openURIString :: String -> IO (Either String String) openURIString s = (fmap Char8.unpack) `fmap` openURI s -- | Like openURI, but takes curl options. -- -- Examples: -- -- > openURIWithOpts [CurlPost True] "http://haskell.org" -- openURIWithOpts :: [CurlOption] -> String -> IO (Either String S.ByteString) openURIWithOpts opts s = case parseURL s of Nothing -> return $ Left $ "Malformed url: "++ s Just url -> do e <- getFile url opts return $ case e of Left err -> Left $ "Failed to connect: " ++ err Right src -> Right src ------------------------------------------------------------------------ -- Parser interface: -- | Download the content as for "openURI", but return it as a list of -- parsed tags using the tagsoup html parser. -- openAsTags:: String -> IO (Either String [TagSoup.Tag String]) openAsTags s = (fmap TagSoup.parseTags) `fmap` openURIString s -- | Download the content as for "openURI", but return it as parsed XML, -- using the xml-light parser. -- openAsXML:: String -> IO (Either String [XML.Content]) openAsXML s = (fmap XML.parseXML) `fmap` openURIString s -- | Download the content as for "openURI", but return it as parsed RSS -- or Atom content, using the feed library parser. -- openAsFeed :: String -> IO (Either String Feed.Feed) openAsFeed s = do e <- openURIString s return $ case e of Left err -> Left err -- gluing Either -> Maybe Right src -> case Feed.parseFeedString src of Nothing -> Left "Unable to parse feed" Just src' -> Right src' ------------------------------------------------------------------------ -- Internal: -- newtype URL = URL String parseURL :: String -> Maybe URL parseURL s = Just (URL s) -- no parsing getFile :: URL -> [CurlOption] -> IO (Either String S.ByteString) getFile (URL url) flags = do h <- initialize let start = 1024 buf <- mallocBytes start ref <- newIORef (P buf 0) setopt h (CurlFailOnError True) setDefaultSSLOpts h url setopt h (CurlURL url) setopt h (CurlWriteFunction (gather ref)) mapM_ (setopt h) flags rc <- perform h P buf' sz <- readIORef ref if rc /= CurlOK then do free buf' return $ Left (show rc) else do fp <- newForeignPtr finalizerFree buf' return (Right $! S.fromForeignPtr fp 0 (fromIntegral sz)) data P = P !(Ptr Word8) !Int gather :: IORef P -> WriteFunction gather r = writer $ \(src, m) -> do P dest n <- readIORef r dest' <- reallocBytes dest (n + m) S.memcpy (dest' `plusPtr` n) src (fromIntegral m) writeIORef r (P dest' (n + m)) -- memcpy chunks of data into our bytestring. writer :: ((Ptr Word8, Int) -> IO ()) -> WriteFunction writer f src sz nelems _ = do let n' = sz * nelems f (castPtr src, fromIntegral n') return n' download-curl-0.1.4/Network/Curl/Download/0000755000000000000000000000000011725425660016571 5ustar0000000000000000download-curl-0.1.4/Network/Curl/Download/Lazy.hs0000644000000000000000000000624711725425660020055 0ustar0000000000000000 -------------------------------------------------------------------- -- | -- Module : Network.Curl.Download.Lazy -- Copyright : (c) Don Stewart -- License : BSD3 -- -- Maintainer: Don Stewart -- Stability : provisional -- Portability: posix -- -- A binding to curl, an efficient, high level library for -- retrieving files using Uniform Resource Locators (URLs). -- -- Content may be retrieved as a lazy "ByteString". -- -- Error handling is encapsulated in the "Either" type. -- -------------------------------------------------------------------- module Network.Curl.Download.Lazy ( -- * The basic lazy interface to network content openLazyURI , openLazyURIWithOpts ) where import Network.Curl import Foreign import Data.IORef import qualified Data.ByteString.Lazy.Internal as L import qualified Data.ByteString.Internal as S ------------------------------------------------------------------------ -- | Download content specified by a url using curl, returning the -- content as a lazy "ByteString". -- -- If an error occurs, "Left" is returned, with a -- protocol-specific error string. -- -- Examples: -- -- > openLazyURI "http://haskell.org" -- openLazyURI :: String -> IO (Either String L.ByteString) openLazyURI s = openLazyURIWithOpts [] s -- | Like openURI, but takes curl options. -- -- Examples: -- -- > openLazyURIWithOpts [CurlPost True] "http://haskell.org" -- openLazyURIWithOpts :: [CurlOption] -> String -> IO (Either String L.ByteString) openLazyURIWithOpts opts s = case parseURL s of Nothing -> return $ Left $ "Malformed url: "++ s Just url -> do e <- getFile url opts return $ case e of Left err -> Left $ "Failed to connect: " ++ err Right src -> Right src ------------------------------------------------------------------------ -- Internal: -- newtype URL = URL String parseURL :: String -> Maybe URL parseURL s = Just (URL s) -- no parsing getFile :: URL -> [CurlOption] -> IO (Either String L.ByteString) getFile (URL url) flags = do h <- initialize ref <- newIORef L.Empty setopt h (CurlFailOnError True) setDefaultSSLOpts h url setopt h (CurlURL url) setopt h (CurlWriteFunction (gather ref)) mapM_ (setopt h) flags rc <- perform h chunks <- readIORef ref return $ if rc /= CurlOK then Left (show rc) else Right $! rev'spine chunks -- fp <- newForeignPtr finalizerFree buf' -- return (Right $! S.fromForeignPtr fp 0 (fromIntegral sz)) gather :: IORef L.ByteString -> WriteFunction gather r = writer $ \chunk -> do chunks <- readIORef r let chunks' = L.Chunk chunk chunks writeIORef r $! chunks' -- memcpy chunks of data into our bytestring. writer :: (S.ByteString -> IO ()) -> WriteFunction writer f src sz nelems _ = do let n' = sz * nelems f =<< (S.create (fromIntegral n') $ \dest -> S.memcpy dest (castPtr src) (fromIntegral n')) return n' -- reverse just the spine of a lazy bytestring rev'spine :: L.ByteString -> L.ByteString rev'spine l = rev l L.Empty where rev L.Empty a = a rev (L.Chunk x xs) a = rev xs (L.Chunk x a)