hxt-cache-9.1.0/0000755000000000000000000000000012105727654011564 5ustar0000000000000000hxt-cache-9.1.0/LICENSE0000644000000000000000000000212012105727654012564 0ustar0000000000000000The MIT License Copyright (c) 2005 Uwe Schmidt, Martin Schmidt, Torben Kuseler Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. hxt-cache-9.1.0/hxt-cache.cabal0000644000000000000000000000275312105727654014423 0ustar0000000000000000Name: hxt-cache Version: 9.1.0 Synopsis: Cache for HXT XML Documents and other binary data Description: Extension for caching XML documents and other binary data in cache directory of the local filesystem . Changes from 9.0.2: dependency of old-time changed to time, dependecy of directory changed to >=1.2 to work with ghc-7.6 . License: OtherLicense License-file: LICENSE Author: Uwe Schmidt Maintainer: Uwe Schmidt Stability: Stable Category: XML Homepage: http://www.fh-wedel.de/~si/HXmlToolbox/index.html Copyright: Copyright (c) 2010 Uwe Schmidt Build-type: Simple Cabal-version: >=1.6 extra-source-files: test/TestXmlCache.hs examples/mini/Cache.hs library exposed-modules: Text.XML.HXT.Cache Text.XML.HXT.Arrow.XmlCache Control.Concurrent.ResourceTable hs-source-dirs: src ghc-options: -Wall ghc-prof-options: -auto-all -caf-all build-depends: base >= 4 && < 5, bytestring >= 0.9 && < 1, binary >= 0.5 && < 1, containers >= 0.2 && < 1, deepseq >= 1.1 && < 2, directory >= 1.2 && < 2, filepath >= 1.1 && < 2, hxt >= 9 && < 10, old-locale >= 1 && < 2, time >= 1.4 && < 2, unix >= 2.3 && < 3, SHA >= 1.4 && < 2 hxt-cache-9.1.0/Setup.lhs0000644000000000000000000000015712105727654013377 0ustar0000000000000000#!/usr/bin/runhaskell > module Main where > import Distribution.Simple > main :: IO () > main = defaultMain hxt-cache-9.1.0/src/0000755000000000000000000000000012105727654012353 5ustar0000000000000000hxt-cache-9.1.0/src/Control/0000755000000000000000000000000012105727654013773 5ustar0000000000000000hxt-cache-9.1.0/src/Control/Concurrent/0000755000000000000000000000000012105727654016115 5ustar0000000000000000hxt-cache-9.1.0/src/Control/Concurrent/ResourceTable.hs0000644000000000000000000000343412105727654021214 0ustar0000000000000000----------------------------------------------------------------------------- module Control.Concurrent.ResourceTable where import Control.Concurrent.MVar import qualified Data.Map as M import Data.Maybe ----------------------------------------------------------------------------- type ResourceTable a = MVar (M.Map a ResourceLock) type ResourceLock = (MVar (), Int) ----------------------------------------------------------------------------- requestResource :: (Ord a) => ResourceTable a -> a -> IO () requestResource theLocks r = do rt <- takeMVar theLocks (lk, cnt) <- case M.lookup r rt of Nothing -> do lk' <- newMVar () return (lk', 0) Just l -> return l putMVar theLocks $ M.insert r (lk, cnt + 1) rt takeMVar lk releaseResource :: (Ord a) => ResourceTable a -> a -> IO () releaseResource theLocks r = do rt <- takeMVar theLocks let (lk, cnt) = fromJust . M.lookup r $ rt putMVar theLocks $ if cnt == 1 then M.delete r rt else M.insert r (lk, cnt - 1) rt putMVar lk () newResourceTable :: IO (ResourceTable a) newResourceTable = newMVar M.empty {-# NOINLINE newResourceTable #-} ----------------------------------------------------------------------------- hxt-cache-9.1.0/src/Text/0000755000000000000000000000000012105727654013277 5ustar0000000000000000hxt-cache-9.1.0/src/Text/XML/0000755000000000000000000000000012105727654013737 5ustar0000000000000000hxt-cache-9.1.0/src/Text/XML/HXT/0000755000000000000000000000000012105727654014402 5ustar0000000000000000hxt-cache-9.1.0/src/Text/XML/HXT/Cache.hs0000644000000000000000000000111412105727654015736 0ustar0000000000000000-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.Cache Copyright : Copyright (C) 2010 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Caching of XML document trees and other binary data -} -- ------------------------------------------------------------ module Text.XML.HXT.Cache ( withCache , withoutCache ) where import Text.XML.HXT.Arrow.XmlCache -- ------------------------------------------------------------ hxt-cache-9.1.0/src/Text/XML/HXT/Arrow/0000755000000000000000000000000012105727654015474 5ustar0000000000000000hxt-cache-9.1.0/src/Text/XML/HXT/Arrow/XmlCache.hs0000644000000000000000000003327312105727654017524 0ustar0000000000000000{-# OPTIONS -fno-warn-unused-imports #-} -- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.XmlCache Copyright : Copyright (C) 2009 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable Caching of XML document trees and other binary data -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.XmlCache ( withCache , withoutCache , isInCache , lookupCache , readCache , writeCache , sha1HashValue , sha1HashString ) where import Control.DeepSeq import Control.Concurrent.ResourceTable import Control.Exception ( SomeException , try ) import Data.Binary import qualified Data.ByteString.Lazy as B import Data.Char import Data.Either import Data.Maybe import Data.Digest.Pure.SHA import Data.Time import System.FilePath import System.Directory import System.IO import System.Locale import System.Posix ( touchFile ) -- import System.Time import System.IO.Unsafe ( unsafePerformIO ) import Text.XML.HXT.Core import Text.XML.HXT.Arrow.XmlState.TypeDefs import Text.XML.HXT.Arrow.Binary -- ------------------------------------------------------------ -- | withCache enables reading documents with caching. -- -- When the cache is configured and enabled, every document read and parsed is serialized and stored in binary -- form in the cache. When reading the same document again, it is just deserialized, no parsing is performed. -- -- The cache is configured by a path pointing to a directory for storing the documents, -- by a maximal time span in second for valid documents. After that time span, the documents are read again -- and the cache is updated. -- The flag contols, whether documents returning 404 or other errors will be cached. -- If set, the cache is even activated for 404 (not found) responses, default is false. -- -- The serialized documents can be compressed, e.g. with bzip, to save disk space and IO time. -- The compression can be configured by 'Text.XML.HXT.Arrow.XmlState.withCompression' -- -- example: -- -- > import Text.XML.HXT.Core -- > import Text.XML.HXT.Cache -- > import Codec.Compression.BZip (compress, decompress) -- > ... -- > readDocument [ withCache "/tmp/cache" 3600 False -- > , withCompression (compress, decompress) -- > , .... -- > ] "http://www.haskell.org/" -- > -- -- In the example the document is read and stored in binary serialized form under \/tmp\/cache. -- The cached document remains valid for the next hour. -- It is compressed, before written to disk. withCache :: String -> Int -> Bool -> SysConfig withCache cachePath documentAge cache404 = setS (theWithCache .&&&. theCacheDir .&&&. theDocumentAge .&&&. theCache404Err .&&&. theCacheRead ) (True, (cachePath, (documentAge, (cache404, readDocCache)))) -- | Disable use of cache withoutCache :: SysConfig withoutCache = setS theWithCache False -- ------------------------------------------------------------ readDocCache :: String -> IOStateArrow s b XmlTree readDocCache src = localSysVar theWithCache $ configSysVar withoutCache >>> ( flip readDocCache' src $< getSysVar (theCacheDir .&&&. theDocumentAge .&&&. theCache404Err ) ) where readDocCache' config src' = applyA $ arrIO0 (lookupCache' config src') -- ------------------------------------------------------------ -- | Predicate arrow for checking if a document is in the cache. -- The arrow fails if document not there or is not longer valid, else the file name is returned. isInCache :: IOStateArrow s String String isInCache = uncurry isInC $< getSysVar (theDocumentAge .&&&. theCacheDir) where isInC age cdir = ( traceValue 2 (\ x -> "isInCache: file=" ++ show x ++ " age=" ++ show age ++ " cache dir=" ++ show cdir) >>> arrIO (isInCache' age cdir) >>> arrL ( \ x -> case x of Just Nothing -> [x] _ -> [] ) ) `guards` this isInCache' age cdir f = cacheHit age cf where cf = uncurry () $ cacheFile cdir f -- ------------------------------------------------------------ lookupCache' :: (FilePath, (Int, Bool)) -> String -> IO (IOStateArrow s a XmlTree) lookupCache' (dir, (age, e404)) src = do ch <- cacheHit age cf return $ case ch of Nothing -> readAndCacheDocument Just Nothing -> readDocumentFromCache Just (Just mt) -> readDocumentCond mt where cf = uncurry () $ cacheFile dir src is200 | e404 = hasAttrValue transferStatus (`elem` ["200", "404"]) | otherwise = hasAttrValue transferStatus (== "200") is304 = hasAttrValue transferStatus (== "304") readDocumentFromCache = traceMsg 1 ("cache hit for " ++ show src ++ " reading " ++ show cf) >>> ( readCache' cf >>> traceMsg 2 "cache read" ) `orElse` ( clearErrStatus >>> traceMsg 1 "cache file was corrupted, reading original" >>> readAndCacheDocument ) readAndCacheDocument = traceMsg 1 ("cache miss, reading original document " ++ show src) >>> readDocument [] src >>> perform ( choiceA [ is200 :-> ( writeCache src >>> none ) , this :-> traceMsg 1 "transfer status /= 200, page not cached" ] ) readDocumentCond mt = traceMsg 1 ("cache out of date, read original document if modified " ++ show src) >>> readDocument [withInputOption a_if_modified_since (fmtTime mt)] src >>> choiceA [ is304 :-> ( traceMsg 1 ("document not modified, using cache data from " ++ show cf) >>> perform (arrIO0 $ touchFile cf) >>> readDocumentFromCache ) , is200 :-> ( traceMsg 1 "document read, cache will be updated" >>> perform (writeCache src >>> traceMsg 2 "cache is updated" ) ) , this :-> ( traceMsg 1 "document read without caching" >>> perform ( arrIO0 $ remFile cf ) ) ] where fmtTime = formatTime defaultTimeLocale rfc822DateFormat -- ------------------------------------------------------------ lookupCache :: (NFData b, Binary b) => String -> IOStateArrow s a b lookupCache f = uncurry lookupC $< getSysVar (theDocumentAge .&&&. theCacheDir) where lookupC age cdir = isIOA (const $ hit) `guards` readCache' cf where cf = uncurry () $ cacheFile cdir f hit = do ch <- cacheHit age cf return $ case ch of Just Nothing -> True _ -> False -- ------------------------------------------------------------ readCache :: (NFData c, Binary c) => String -> IOStateArrow s b c readCache f = readC $< getSysVar theCacheDir where readC cdir = readCache' $ uncurry () $ cacheFile cdir f readCache' :: (NFData c, Binary c) => String -> IOStateArrow s b c readCache' cf = rnfA $ withLock cf $ readBinaryValue cf writeCache :: (Binary b) => String -> IOStateArrow s b () writeCache f = writeC $< getSysVar theCacheDir where writeC cdir = traceMsg 1 ("writing cache file " ++ show cf ++ " for document " ++ show f) >>> perform (arrIO0 createDir) >>> withLock cf (writeBinaryValue cf) >>> perform (withLock ixf (arrIO0 $ writeIndex ixf f cf)) where cf = dir file ixf = cdir "index" (dir, file) = cacheFile cdir f createDir = createDirectoryIfMissing True dir -- ------------------------------------------------------------ remFile :: FilePath -> IO () remFile f = ( try' $ do ex <- doesFileExist f if ex then removeFile f else return () ) >> return () -- ------------------------------------------------------------ cacheFile :: FilePath -> String -> (FilePath, FilePath) cacheFile dir f = (dir fd, fn) where (fd, fn) = splitAt 2 . sha1HashString $ f -- ------------------------------------------------------------ -- result interpretation for cacheHit -- -- Nothing : cache miss: get document -- Just Nothing : cache hit, cache data valid: use cache data -- Just (Just t) : cache hit, but cache data out of date: get document conditionally with if-modified-since t cacheHit :: Int -> FilePath -> IO (Maybe (Maybe UTCTime)) cacheHit age cf = ( try' $ do e <- doesFileExist cf if not e then return Nothing else do mt <- getModificationTime cf ct <- getCurrentTime return . Just $ if (dt `addUTCTime` mt) >= ct then Nothing else Just mt ) >>= return . either (const Nothing) id where dt = fromInteger . toInteger $ age try' :: IO a -> IO (Either SomeException a) try' = try writeIndex :: String -> String -> FilePath -> IO () writeIndex ixf f cf = ( try' $ do h <- openBinaryFile ixf AppendMode hPutStrLn h $ show (cf, f) hClose h return () ) >> return () -- ------------------------------------------------------------ -- | Compute the SHA1 hash is hexadecimal format for an arbitray serializable value sha1HashValue :: (Arrow a, Binary b) => a b Integer sha1HashValue = arr $ integerDigest . sha1 . encode sha1HashString :: (Arrow a, Binary b) => a b String sha1HashString = arr $ showDigest . sha1 . encode -- ------------------------------------------------------------ -- | the internal table of file locks theLockedFiles :: ResourceTable String theLockedFiles = unsafePerformIO newResourceTable {-# NOINLINE theLockedFiles #-} lockFile, unlockFile :: String -> IO () lockFile = requestResource theLockedFiles unlockFile = releaseResource theLockedFiles withLock :: String -> IOStateArrow s b c -> IOStateArrow s b c withLock l a = ( perform (arrIO0 $ lockFile l) >>> listA a >>> perform (arrIO0 $ unlockFile l) ) >>> unlistA ----------------------------------------------------------------------------- hxt-cache-9.1.0/test/0000755000000000000000000000000012105727654012543 5ustar0000000000000000hxt-cache-9.1.0/test/TestXmlCache.hs0000644000000000000000000000255012105727654015425 0ustar0000000000000000-- ----------------------------------------------------------------------------- module Main -- TestXmlCache where import Data.Maybe import System.Environment import Text.XML.HXT.Arrow hiding ( readDocument ) import Text.XML.HXT.Arrow.XmlCache main' :: String -> IO () main' url = runX ( readDocument [ ( a_trace, v_1 ) , ( a_parse_html, v_1 ) , ( a_issue_warnings, v_0 ) , ( a_cache, "./cache" ) , ( a_document_age, "10" ) -- 10 sec., just for testing , ( a_compress, v_1 ) ] url >>> writeDocument [ ( a_indent, v_1 ) ] "t.xml" ) >> return () main :: IO () main = do as <- getArgs main' . fromMaybe "http://www.haskell.org/" . listToMaybe $ as -- ----------------------------------------------------------------------------- hxt-cache-9.1.0/examples/0000755000000000000000000000000012105727654013402 5ustar0000000000000000hxt-cache-9.1.0/examples/mini/0000755000000000000000000000000012105727654014336 5ustar0000000000000000hxt-cache-9.1.0/examples/mini/Cache.hs0000644000000000000000000000243512105727654015701 0ustar0000000000000000module Main where import Text.XML.HXT.Core import Text.XML.HXT.Curl import Text.XML.HXT.Cache import Codec.Compression.BZip (compress, decompress) import System main = runX ( readDocument [ withParseHTML yes , withWarnings no , withRemoveWS yes , withCache "/tmp" 10 False -- enable /tmp as cache dir -- documents remain valid 10 seconds (for testing) -- no 404 documents are cached , withCompression (compress, decompress) -- the cached files will be BZip compressed , withStrictDeserialize yes -- cache file will be read and closed immediatly , withTrace 2 , withCurl [] -- curl is taken for HTTP access ] "http://www.fh-wedel.de/" >>> -- perform (arrIO0 $ system "/usr/bin/lsof") -- >>> processChildren (hasName "html" /> hasName "body" //> isText) >>> writeDocument [] "" ) >> return ()