patience-0.1.1/0000755000000000000000000000000011622603156011471 5ustar0000000000000000patience-0.1.1/Setup.hs0000644000000000000000000000010611622603156013122 0ustar0000000000000000#! /usr/bin/runhaskell import Distribution.Simple main = defaultMain patience-0.1.1/LICENSE0000644000000000000000000000271411622603156012502 0ustar0000000000000000Copyright (c) Keegan McAllister 2011 All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the author nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. patience-0.1.1/patience.cabal0000644000000000000000000000225211622603156014246 0ustar0000000000000000name: patience version: 0.1.1 license: BSD3 license-file: LICENSE synopsis: Patience diff and longest increasing subsequence category: Algorithms, Text author: Keegan McAllister maintainer: Keegan McAllister build-type: Simple cabal-version: >=1.2 description: This library implements the \"patience diff\" algorithm, as well as the patience algorithm for the longest increasing subsequence problem. . Patience diff computes the difference between two lists, for example the lines of two versions of a source file. It provides a good balance of performance, nice output for humans, and implementation simplicity. For more information, see and . . New in version 0.1.1: relaxed @containers@ dependency, so it should build on GHC 6.10. extra-source-files: test/test.hs library exposed-modules: Data.Algorithm.Patience ghc-options: -Wall build-depends: base >= 3 && < 5 , containers >= 0.2 patience-0.1.1/Data/0000755000000000000000000000000011622603156012342 5ustar0000000000000000patience-0.1.1/Data/Algorithm/0000755000000000000000000000000011622603156014270 5ustar0000000000000000patience-0.1.1/Data/Algorithm/Patience.hs0000644000000000000000000001300411622603156016352 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable , ViewPatterns , CPP #-} -- | Implements \"patience diff\" and the patience algorithm for the longest -- increasing subsequence problem. module Data.Algorithm.Patience ( -- * Patience diff diff , Item(..), itemChar, itemValue -- * Longest increasing subsequence , longestIncreasing ) where import qualified Data.Sequence as S import Data.Sequence ( (<|), (|>), (><), ViewL(..), ViewR(..) ) import qualified Data.Foldable as F import qualified Data.Map as M import qualified Data.IntMap as IM import Data.List import Data.Ord import Data.Typeable ( Typeable ) import Data.Data ( Data ) -- If key xi is in the map, move it to xf while adjusting the value with f. adjMove :: (a -> a) -> Int -> Int -> IM.IntMap a -> IM.IntMap a adjMove f xi xf m = case IM.updateLookupWithKey (\_ _ -> Nothing) xi m of (Just v, mm) -> IM.insert xf (f v) mm (Nothing, _) -> m -- A "card" is an integer value (with annotation) plus a "backpointer" to -- a card in the previous pile, if any. data Card a = Card Int a (Maybe (Card a)) -- | Given: a list of distinct integers. Picks a subset of the integers -- in the same order, i.e. a subsequence, with the property that -- -- * it is monotonically increasing, and -- -- * it is at least as long as any other such subsequence. -- -- This function uses patience sort: -- . -- For implementation reasons, the actual list returned is the reverse of -- the subsequence. -- -- You can pair each integer with an arbitrary annotation, which will be -- carried through the algorithm. longestIncreasing :: [(Int,a)] -> [(Int,a)] longestIncreasing = extract . foldl' ins IM.empty where -- Insert a card into the proper pile. -- type Pile a = [Card a] -- type Piles a = IM.IntMap (Pile a) -- keyed by smallest element ins m (x,a) = let (lt, gt) = IM.split x m prev = (head . fst) `fmap` IM.maxView lt new = Card x a prev in case IM.minViewWithKey gt of Nothing -> IM.insert x [new] m -- new pile Just ((k,_),_) -> adjMove (new:) k x m -- top of old pile -- Walk the backpointers, starting at the top card of the -- highest-keyed pile. extract (IM.maxView -> Just (c,_)) = walk $ head c extract _ = [] walk (Card x a c) = (x,a) : maybe [] walk c -- Elements whose second component appears exactly once. unique :: (Ord t) => S.Seq (a,t) -> M.Map t a unique = M.mapMaybe id . F.foldr ins M.empty where ins (a,x) = M.insertWith' (\_ _ -> Nothing) x (Just a) -- Given two sequences of numbered "lines", returns a list of points -- where unique lines match up. solveLCS :: (Ord t) => S.Seq (Int,t) -> S.Seq (Int,t) -> [(Int,Int)] solveLCS ma mb = let xs = M.elems $ M.intersectionWith (,) (unique ma) (unique mb) in longestIncreasing $ sortBy (comparing snd) xs -- Type for decomposing a diff problem. We either have two -- lines that match, or a recursive subproblem. data Piece a = Match a a | Diff (S.Seq a) (S.Seq a) deriving (Show) -- Subdivides a diff problem according to the indices of matching lines. chop :: S.Seq t -> S.Seq t -> [(Int,Int)] -> [Piece t] chop xs ys [] | S.null xs && S.null ys = [] | otherwise = [Diff xs ys] chop xs ys ((nx,ny):ns) = let (xsr, S.viewl -> (x :< xse)) = S.splitAt nx xs (ysr, S.viewl -> (y :< yse)) = S.splitAt ny ys in Diff xse yse : Match x y : chop xsr ysr ns -- Zip a list with a Seq. zipLS :: [a] -> S.Seq b -> S.Seq (a, b) #if MIN_VERSION_containers(0,3,0) zipLS = S.zip . S.fromList #else zipLS xs = S.fromList . zip xs . F.toList #endif -- Number the elements of a Seq. number :: S.Seq t -> S.Seq (Int,t) number xs = zipLS [0..S.length xs - 1] xs -- | An element of a computed difference. data Item t = Old t -- ^ Value taken from the \"old\" list, i.e. left argument to 'diff' | New t -- ^ Value taken from the \"new\" list, i.e. right argument to 'diff' | Both t t -- ^ Value taken from both lists. Both values are provided, in case -- your type has a non-structural definition of equality. deriving (Eq, Ord, Show, Read, Typeable, Data) instance Functor Item where fmap f (Old x ) = Old (f x) fmap f (New x ) = New (f x) fmap f (Both x y) = Both (f x) (f y) -- | The difference between two lists, according to the -- \"patience diff\" algorithm. diff :: (Ord t) => [t] -> [t] -> [Item t] diff xsl ysl = F.toList $ go (S.fromList xsl) (S.fromList ysl) where -- Handle common elements at the beginning / end. go (S.viewl -> (x :< xs)) (S.viewl -> (y :< ys)) | x == y = Both x y <| go xs ys go (S.viewr -> (xs :> x)) (S.viewr -> (ys :> y)) | x == y = go xs ys |> Both x y -- Find an increasing sequence of matching unique lines, then -- subdivide at those points and recurse. go xs ys = case chop xs ys $ solveLCS (number xs) (number ys) of -- If we fail to subdivide, just record the chunk as is. [Diff _ _] -> fmap Old xs >< fmap New ys ps -> recur ps -- Apply the algorithm recursively to a decomposed problem. -- The decomposition list is in reversed order. recur [] = S.empty recur (Match x y : ps) = recur ps |> Both x y recur (Diff xs ys : ps) = recur ps >< go xs ys -- | The character @\'-\'@ or @\'+\'@ or @\' \'@ for 'Old' or 'New' or 'Both' respectively. itemChar :: Item t -> Char itemChar (Old _ ) = '-' itemChar (New _ ) = '+' itemChar (Both _ _) = ' ' -- | The value from an 'Item'. For 'Both', returns the \"old\" value. itemValue :: Item t -> t itemValue (Old x ) = x itemValue (New x ) = x itemValue (Both x _) = x patience-0.1.1/test/0000755000000000000000000000000011622603156012450 5ustar0000000000000000patience-0.1.1/test/test.hs0000644000000000000000000000225511622603156013767 0ustar0000000000000000-- Simple test for Data.Algorithm.Patience -- -- Invoke as: ./test r n -- for ints r, n -- -- Reads lines of standard input, then repeats r times: -- - Generate two documents of n lines each, by picking -- randomly from the stdin lines, with replacement -- - Compute their patience diff -- - Check that each document is recovered by keeping the -- respective side of the diff module Main(main) where import Control.Monad import Data.Array import Data.Maybe import System.Environment import System.Random import Data.Algorithm.Patience keepOld :: [Item a] -> [a] keepOld = catMaybes . map f where f (Old x ) = Just x f (New _) = Nothing f (Both x _) = Just x keepNew :: [Item a] -> [a] keepNew = catMaybes . map f where f (Old _ ) = Nothing f (New x) = Just x f (Both _ x) = Just x main :: IO () main = do [r,n] <- map read `fmap` getArgs xs <- lines `fmap` getContents let ar = listArray (0, length xs - 1) xs pick = replicateM n ((ar !) `fmap` randomRIO (bounds ar)) replicateM_ r $ do da <- pick db <- pick let d = diff da db good = (da == keepOld d) && (db == keepNew d) when (not good) $ print (da, db, d)