vector-algorithms-0.8.0.1/0000755000000000000000000000000013357002426013517 5ustar0000000000000000vector-algorithms-0.8.0.1/LICENSE0000644000000000000000000000622113357002426014525 0ustar0000000000000000Copyright (c) 2015 Dan Doel Copyright (c) 2015 Tim Baumann All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the author nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ The code in Data.Array.Vector.Algorithms.Mutable.Optimal is adapted from a C algorithm for the same purpose. The folowing is the copyright notice for said C code: Copyright (c) 2004 Paul Hsieh All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of sorttest nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vector-algorithms-0.8.0.1/vector-algorithms.cabal0000644000000000000000000000657613357002426020172 0ustar0000000000000000name: vector-algorithms version: 0.8.0.1 license: BSD3 license-file: LICENSE author: Dan Doel maintainer: Dan Doel Erik de Castro Lopo copyright: (c) 2008,2009,2010,2011,2012,2013,2014,2015 Dan Doel (c) 2015 Tim Baumann homepage: https://github.com/erikd/vector-algorithms/ category: Data synopsis: Efficient algorithms for vector arrays description: Efficient algorithms for sorting vector arrays. At some stage other vector algorithms may be added. build-type: Simple cabal-version: >= 1.9.2 flag BoundsChecks description: Enable bounds checking default: True flag UnsafeChecks description: Enable bounds checking in unsafe operations at the cost of a significant performance penalty. default: False flag InternalChecks description: Enable internal consistency checks at the cost of a significant performance penalty. default: False flag bench description: Build a benchmarking program to test vector-algorithms performance default: True flag properties description: Enable the quickcheck tests default: True -- flag dump-simpl -- description: Dumps the simplified core during compilation -- default: False flag llvm description: Build using llvm default: False source-repository head type: git location: https://github.com/erikd/vector-algorithms/ library hs-source-dirs: src build-depends: base >= 4.5 && < 5, vector >= 0.6 && < 0.13, primitive >=0.3 && <0.7, bytestring >= 0.9 && < 1.0 if ! impl (ghc >= 7.8) build-depends: tagged >= 0.4 && < 0.9 exposed-modules: Data.Vector.Algorithms.Optimal Data.Vector.Algorithms.Insertion Data.Vector.Algorithms.Intro Data.Vector.Algorithms.Merge Data.Vector.Algorithms.Radix Data.Vector.Algorithms.Search Data.Vector.Algorithms.Heap Data.Vector.Algorithms.AmericanFlag Data.Vector.Algorithms.Tim other-modules: Data.Vector.Algorithms.Common ghc-options: -funbox-strict-fields -- Cabal/Hackage complains about these -- if flag(dump-simpl) -- ghc-options: -ddump-simpl -ddump-to-file if flag(llvm) ghc-options: -fllvm include-dirs: include install-includes: vector.h if flag(BoundsChecks) cpp-options: -DVECTOR_BOUNDS_CHECKS if flag(UnsafeChecks) cpp-options: -DVECTOR_UNSAFE_CHECKS if flag(InternalChecks) cpp-options: -DVECTOR_INTERNAL_CHECKS benchmark simple-bench hs-source-dirs: bench/simple type: exitcode-stdio-1.0 if !flag(bench) buildable: False main-is: Main.hs other-modules: Blocks build-depends: base, mwc-random, vector, vector-algorithms ghc-options: -Wall -- Cabal/Hackage complains about these -- if flag(dump-simpl) -- ghc-options: -ddump-simpl -ddump-to-file if flag(llvm) ghc-options: -fllvm test-suite properties hs-source-dirs: tests/properties type: exitcode-stdio-1.0 main-is: Tests.hs other-modules: Optimal Properties Util if !flag(properties) buildable: False else build-depends: base, bytestring, containers, QuickCheck > 2.9 && < 2.13, vector, vector-algorithms if flag(llvm) ghc-options: -fllvm vector-algorithms-0.8.0.1/Setup.lhs0000644000000000000000000000011413357002426015323 0ustar0000000000000000#!/usr/bin/env runhaskell > import Distribution.Simple > main = defaultMain vector-algorithms-0.8.0.1/include/0000755000000000000000000000000013357002426015142 5ustar0000000000000000vector-algorithms-0.8.0.1/include/vector.h0000644000000000000000000000163613357002426016623 0ustar0000000000000000#define PHASE_STREAM [1] #define PHASE_INNER [0] #define INLINE_STREAM INLINE PHASE_STREAM #define INLINE_INNER INLINE PHASE_INNER #ifndef NOT_VECTOR_MODULE import qualified Data.Vector.Internal.Check as Ck #endif #define ERROR(f) (Ck.f __FILE__ __LINE__) #define ASSERT (Ck.assert __FILE__ __LINE__) #define ENSURE (Ck.f __FILE__ __LINE__) #define CHECK(f) (Ck.f __FILE__ __LINE__) #define BOUNDS_ERROR(f) (ERROR(f) Ck.Bounds) #define BOUNDS_ASSERT (ASSERT Ck.Bounds) #define BOUNDS_ENSURE (ENSURE Ck.Bounds) #define BOUNDS_CHECK(f) (CHECK(f) Ck.Bounds) #define UNSAFE_ERROR(f) (ERROR(f) Ck.Unsafe) #define UNSAFE_ASSERT (ASSERT Ck.Unsafe) #define UNSAFE_ENSURE (ENSURE Ck.Unsafe) #define UNSAFE_CHECK(f) (CHECK(f) Ck.Unsafe) #define INTERNAL_ERROR(f) (ERROR(f) Ck.Internal) #define INTERNAL_ASSERT (ASSERT Ck.Internal) #define INTERNAL_ENSURE (ENSURE Ck.Internal) #define INTERNAL_CHECK(f) (CHECK(f) Ck.Internal) vector-algorithms-0.8.0.1/src/0000755000000000000000000000000013357002426014306 5ustar0000000000000000vector-algorithms-0.8.0.1/src/Data/0000755000000000000000000000000013357002426015157 5ustar0000000000000000vector-algorithms-0.8.0.1/src/Data/Vector/0000755000000000000000000000000013357002426016421 5ustar0000000000000000vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/0000755000000000000000000000000013357002426020532 5ustar0000000000000000vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Search.hs0000644000000000000000000002156413357002426022303 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeFamilies #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Search -- Copyright : (c) 2009-2015 Dan Doel, 2015 Tim Baumann -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Non-portable (bang patterns) -- -- This module implements several methods of searching for indicies to insert -- elements into a sorted vector. module Data.Vector.Algorithms.Search ( binarySearch , binarySearchBy , binarySearchByBounds , binarySearchL , binarySearchLBy , binarySearchLByBounds , binarySearchR , binarySearchRBy , binarySearchRByBounds , binarySearchP , binarySearchPBounds , gallopingSearchLeftP , gallopingSearchLeftPBounds , gallopingSearchRightP , gallopingSearchRightPBounds , Comparison ) where import Prelude hiding (read, length) import Control.Monad.Primitive import Data.Bits import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common (Comparison, midPoint) -- | Finds an index in a given sorted vector at which the given element could -- be inserted while maintaining the sortedness of the vector. binarySearch :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> e -> m Int binarySearch = binarySearchBy compare {-# INLINE binarySearch #-} -- | Finds an index in a given vector, which must be sorted with respect to the -- given comparison function, at which the given element could be inserted while -- preserving the vector's sortedness. binarySearchBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> m Int binarySearchBy cmp vec e = binarySearchByBounds cmp vec e 0 (length vec) {-# INLINE binarySearchBy #-} -- | Given a vector sorted with respect to a given comparison function in indices -- in [l,u), finds an index in [l,u] at which the given element could be inserted -- while preserving sortedness. binarySearchByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int binarySearchByBounds cmp vec e = loop where loop !l !u | u <= l = return l | otherwise = do e' <- unsafeRead vec k case cmp e' e of LT -> loop (k+1) u EQ -> return k GT -> loop l k where k = midPoint u l {-# INLINE binarySearchByBounds #-} -- | Finds the lowest index in a given sorted vector at which the given element -- could be inserted while maintaining the sortedness. binarySearchL :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> e -> m Int binarySearchL = binarySearchLBy compare {-# INLINE binarySearchL #-} -- | Finds the lowest index in a given vector, which must be sorted with respect to -- the given comparison function, at which the given element could be inserted -- while preserving the sortedness. binarySearchLBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> m Int binarySearchLBy cmp vec e = binarySearchLByBounds cmp vec e 0 (length vec) {-# INLINE binarySearchLBy #-} -- | Given a vector sorted with respect to a given comparison function on indices -- in [l,u), finds the lowest index in [l,u] at which the given element could be -- inserted while preserving sortedness. binarySearchLByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int binarySearchLByBounds cmp vec e = binarySearchPBounds p vec where p e' = case cmp e' e of LT -> False ; _ -> True {-# INLINE binarySearchLByBounds #-} -- | Finds the greatest index in a given sorted vector at which the given element -- could be inserted while maintaining sortedness. binarySearchR :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> e -> m Int binarySearchR = binarySearchRBy compare {-# INLINE binarySearchR #-} -- | Finds the greatest index in a given vector, which must be sorted with respect to -- the given comparison function, at which the given element could be inserted -- while preserving the sortedness. binarySearchRBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> m Int binarySearchRBy cmp vec e = binarySearchRByBounds cmp vec e 0 (length vec) {-# INLINE binarySearchRBy #-} -- | Given a vector sorted with respect to the given comparison function on indices -- in [l,u), finds the greatest index in [l,u] at which the given element could be -- inserted while preserving sortedness. binarySearchRByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int binarySearchRByBounds cmp vec e = binarySearchPBounds p vec where p e' = case cmp e' e of GT -> True ; _ -> False {-# INLINE binarySearchRByBounds #-} -- | Given a predicate that is guaraneteed to be monotone on the given vector, -- finds the first index at which the predicate returns True, or the length of -- the array if the predicate is false for the entire array. binarySearchP :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> m Int binarySearchP p vec = binarySearchPBounds p vec 0 (length vec) {-# INLINE binarySearchP #-} -- | Given a predicate that is guaranteed to be monotone on the indices [l,u) in -- a given vector, finds the index in [l,u] at which the predicate turns from -- False to True (yielding u if the entire interval is False). binarySearchPBounds :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> Int -> Int -> m Int binarySearchPBounds p vec = loop where loop !l !u | u <= l = return l | otherwise = unsafeRead vec k >>= \e -> if p e then loop l k else loop (k+1) u where k = midPoint u l {-# INLINE binarySearchPBounds #-} -- | Given a predicate that is guaranteed to be monotone on the vector elements -- in order, finds the index at which the predicate turns from False to True. -- The length of the vector is returned if the predicate is False for the entire -- vector. -- -- Begins searching at the start of the vector, in increasing steps of size 2^n. gallopingSearchLeftP :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> m Int gallopingSearchLeftP p vec = gallopingSearchLeftPBounds p vec 0 (length vec) {-# INLINE gallopingSearchLeftP #-} -- | Given a predicate that is guaranteed to be monotone on the vector elements -- in order, finds the index at which the predicate turns from False to True. -- The length of the vector is returned if the predicate is False for the entire -- vector. -- -- Begins searching at the end of the vector, in increasing steps of size 2^n. gallopingSearchRightP :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> m Int gallopingSearchRightP p vec = gallopingSearchRightPBounds p vec 0 (length vec) {-# INLINE gallopingSearchRightP #-} -- | Given a predicate that is guaranteed to be monotone on the indices [l,u) in -- a given vector, finds the index in [l,u] at which the predicate turns from -- False to True (yielding u if the entire interval is False). -- Begins searching at l, going right in increasing (2^n)-steps. gallopingSearchLeftPBounds :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> Int -- ^ l -> Int -- ^ u -> m Int gallopingSearchLeftPBounds p vec l u | u <= l = return l | otherwise = do x <- unsafeRead vec l if p x then return l else iter (l+1) l 2 where binSearch = binarySearchPBounds p vec iter !i !j !_stepSize | i >= u - 1 = do x <- unsafeRead vec (u-1) if p x then binSearch (j+1) (u-1) else return u iter !i !j !stepSize = do x <- unsafeRead vec i if p x then binSearch (j+1) i else iter (i+stepSize) i (2*stepSize) {-# INLINE gallopingSearchLeftPBounds #-} -- | Given a predicate that is guaranteed to be monotone on the indices [l,u) in -- a given vector, finds the index in [l,u] at which the predicate turns from -- False to True (yielding u if the entire interval is False). -- Begins searching at u, going left in increasing (2^n)-steps. gallopingSearchRightPBounds :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> Int -- ^ l -> Int -- ^ u -> m Int gallopingSearchRightPBounds p vec l u | u <= l = return l | otherwise = iter (u-1) (u-1) (-1) where binSearch = binarySearchPBounds p vec iter !i !j !_stepSize | i <= l = do x <- unsafeRead vec l if p x then return l else binSearch (l+1) j iter !i !j !stepSize = do x <- unsafeRead vec i if p x then iter (i+stepSize) i (2*stepSize) else binSearch (i+1) j {-# INLINE gallopingSearchRightPBounds #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/AmericanFlag.hs0000644000000000000000000003136513357002426023407 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE FlexibleContexts #-} {-# lANGUAGE ScopedTypeVariables #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.AmericanFlag -- Copyright : (c) 2011 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Non-portable (FlexibleContexts, ScopedTypeVariables) -- -- This module implements American flag sort: an in-place, unstable, bucket -- sort. Also in contrast to radix sort, the values are inspected in a big -- endian order, and buckets are sorted via recursive splitting. This, -- however, makes it sensible for sorting strings in lexicographic order -- (provided indexing is fast). -- -- The algorithm works as follows: at each stage, the array is looped over, -- counting the number of elements for each bucket. Then, starting at the -- beginning of the array, elements are permuted in place to reside in the -- proper bucket, following chains until they reach back to the current -- base index. Finally, each bucket is sorted recursively. This lends itself -- well to the aforementioned variable-length strings, and so the algorithm -- takes a stopping predicate, which is given a representative of the stripe, -- rather than running for a set number of iterations. module Data.Vector.Algorithms.AmericanFlag ( sort , sortBy , Lexicographic(..) ) where import Prelude hiding (read, length) import Control.Monad import Control.Monad.Primitive import Data.Proxy import Data.Word import Data.Int import Data.Bits import qualified Data.ByteString as B import Data.Vector.Generic.Mutable import qualified Data.Vector.Primitive.Mutable as PV import qualified Data.Vector.Unboxed.Mutable as U import Data.Vector.Algorithms.Common import qualified Data.Vector.Algorithms.Insertion as I import Foreign.Storable -- | The methods of this class specify the information necessary to sort -- arrays using the default ordering. The name 'Lexicographic' is meant -- to convey that index should return results in a similar way to indexing -- into a string. class Lexicographic e where -- | Computes the length of a representative of a stripe. It should take 'n' -- passes to sort values of extent 'n'. The extent may not be uniform across -- all values of the type. extent :: e -> Int -- | The size of the bucket array necessary for sorting es size :: Proxy e -> Int -- | Determines which bucket a given element should inhabit for a -- particular iteration. index :: Int -> e -> Int instance Lexicographic Word8 where extent _ = 1 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index _ n = fromIntegral n {-# INLINE index #-} instance Lexicographic Word16 where extent _ = 2 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ (n `shiftR` 8) .&. 255 index 1 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Word32 where extent _ = 4 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ (n `shiftR` 24) .&. 255 index 1 n = fromIntegral $ (n `shiftR` 16) .&. 255 index 2 n = fromIntegral $ (n `shiftR` 8) .&. 255 index 3 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Word64 where extent _ = 8 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ (n `shiftR` 56) .&. 255 index 1 n = fromIntegral $ (n `shiftR` 48) .&. 255 index 2 n = fromIntegral $ (n `shiftR` 40) .&. 255 index 3 n = fromIntegral $ (n `shiftR` 32) .&. 255 index 4 n = fromIntegral $ (n `shiftR` 24) .&. 255 index 5 n = fromIntegral $ (n `shiftR` 16) .&. 255 index 6 n = fromIntegral $ (n `shiftR` 8) .&. 255 index 7 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Word where extent _ = sizeOf (0 :: Word) {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ (n `shiftR` 56) .&. 255 index 1 n = fromIntegral $ (n `shiftR` 48) .&. 255 index 2 n = fromIntegral $ (n `shiftR` 40) .&. 255 index 3 n = fromIntegral $ (n `shiftR` 32) .&. 255 index 4 n = fromIntegral $ (n `shiftR` 24) .&. 255 index 5 n = fromIntegral $ (n `shiftR` 16) .&. 255 index 6 n = fromIntegral $ (n `shiftR` 8) .&. 255 index 7 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Int8 where extent _ = 1 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index _ n = 255 .&. fromIntegral n `xor` 128 {-# INLINE index #-} instance Lexicographic Int16 where extent _ = 2 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ ((n `xor` minBound) `shiftR` 8) .&. 255 index 1 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Int32 where extent _ = 4 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ ((n `xor` minBound) `shiftR` 24) .&. 255 index 1 n = fromIntegral $ (n `shiftR` 16) .&. 255 index 2 n = fromIntegral $ (n `shiftR` 8) .&. 255 index 3 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Int64 where extent _ = 8 {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = fromIntegral $ ((n `xor` minBound) `shiftR` 56) .&. 255 index 1 n = fromIntegral $ (n `shiftR` 48) .&. 255 index 2 n = fromIntegral $ (n `shiftR` 40) .&. 255 index 3 n = fromIntegral $ (n `shiftR` 32) .&. 255 index 4 n = fromIntegral $ (n `shiftR` 24) .&. 255 index 5 n = fromIntegral $ (n `shiftR` 16) .&. 255 index 6 n = fromIntegral $ (n `shiftR` 8) .&. 255 index 7 n = fromIntegral $ n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic Int where extent _ = sizeOf (0 :: Int) {-# INLINE extent #-} size _ = 256 {-# INLINE size #-} index 0 n = ((n `xor` minBound) `shiftR` 56) .&. 255 index 1 n = (n `shiftR` 48) .&. 255 index 2 n = (n `shiftR` 40) .&. 255 index 3 n = (n `shiftR` 32) .&. 255 index 4 n = (n `shiftR` 24) .&. 255 index 5 n = (n `shiftR` 16) .&. 255 index 6 n = (n `shiftR` 8) .&. 255 index 7 n = n .&. 255 index _ _ = 0 {-# INLINE index #-} instance Lexicographic B.ByteString where extent = B.length {-# INLINE extent #-} size _ = 257 {-# INLINE size #-} index i b | i >= B.length b = 0 | otherwise = fromIntegral (B.index b i) + 1 {-# INLINE index #-} instance (Lexicographic a, Lexicographic b) => Lexicographic (a, b) where extent (a,b) = extent a + extent b {-# INLINE extent #-} size _ = size (Proxy :: Proxy a) `max` size (Proxy :: Proxy b) {-# INLINE size #-} index i (a,b) | i >= extent a = index i b | otherwise = index i a {-# INLINE index #-} instance (Lexicographic a, Lexicographic b) => Lexicographic (Either a b) where extent (Left a) = 1 + extent a extent (Right b) = 1 + extent b {-# INLINE extent #-} size _ = size (Proxy :: Proxy a) `max` size (Proxy :: Proxy b) {-# INLINE size #-} index 0 (Left _) = 0 index 0 (Right _) = 1 index n (Left a) = index (n-1) a index n (Right b) = index (n-1) b {-# INLINE index #-} -- | Given a representative of a stripe and an index number, this -- function determines whether to stop sorting. terminate :: Lexicographic e => e -> Int -> Bool terminate e i = i >= extent e {-# INLINE terminate #-} -- | Sorts an array using the default ordering. Both Lexicographic and -- Ord are necessary because the algorithm falls back to insertion sort -- for sufficiently small arrays. sort :: forall e m v. (PrimMonad m, MVector v e, Lexicographic e, Ord e) => v (PrimState m) e -> m () sort v = sortBy compare terminate (size p) index v where p :: Proxy e p = Proxy {-# INLINABLE sort #-} -- | A fully parameterized version of the sorting algorithm. Again, this -- function takes both radix information and a comparison, because the -- algorithms falls back to insertion sort for small arrays. sortBy :: (PrimMonad m, MVector v e) => Comparison e -- ^ a comparison for the insertion sort flalback -> (e -> Int -> Bool) -- ^ determines whether a stripe is complete -> Int -- ^ the number of buckets necessary -> (Int -> e -> Int) -- ^ the big-endian radix function -> v (PrimState m) e -- ^ the array to be sorted -> m () sortBy cmp stop buckets radix v | length v == 0 = return () | otherwise = do count <- new buckets pile <- new buckets countLoop (radix 0) v count flagLoop cmp stop radix count pile v {-# INLINE sortBy #-} flagLoop :: (PrimMonad m, MVector v e) => Comparison e -> (e -> Int -> Bool) -- number of passes -> (Int -> e -> Int) -- radix function -> PV.MVector (PrimState m) Int -- auxiliary count array -> PV.MVector (PrimState m) Int -- auxiliary pile array -> v (PrimState m) e -- source array -> m () flagLoop cmp stop radix count pile v = go 0 v where go pass v = do e <- unsafeRead v 0 unless (stop e $ pass - 1) $ go' pass v go' pass v | len < threshold = I.sortByBounds cmp v 0 len | otherwise = do accumulate count pile permute (radix pass) count pile v recurse 0 where len = length v ppass = pass + 1 recurse i | i < len = do j <- countStripe (radix ppass) (radix pass) count v i go ppass (unsafeSlice i (j - i) v) recurse j | otherwise = return () {-# INLINE flagLoop #-} accumulate :: (PrimMonad m) => PV.MVector (PrimState m) Int -> PV.MVector (PrimState m) Int -> m () accumulate count pile = loop 0 0 where len = length count loop i acc | i < len = do ci <- unsafeRead count i let acc' = acc + ci unsafeWrite pile i acc unsafeWrite count i acc' loop (i+1) acc' | otherwise = return () {-# INLINE accumulate #-} permute :: (PrimMonad m, MVector v e) => (e -> Int) -- radix function -> PV.MVector (PrimState m) Int -- count array -> PV.MVector (PrimState m) Int -- pile array -> v (PrimState m) e -- source array -> m () permute rdx count pile v = go 0 where len = length v go i | i < len = do e <- unsafeRead v i let r = rdx e p <- unsafeRead pile r m <- if r > 0 then unsafeRead count (r-1) else return 0 case () of -- if the current element is already in the right pile, -- go to the end of the pile _ | m <= i && i < p -> go p -- if the current element happens to be in the right -- pile, bump the pile counter and go to the next element | i == p -> unsafeWrite pile r (p+1) >> go (i+1) -- otherwise follow the chain | otherwise -> follow i e p >> go (i+1) | otherwise = return () follow i e j = do en <- unsafeRead v j let r = rdx en p <- inc pile r if p == j -- if the target happens to be in the right pile, don't move it. then follow i e (j+1) else unsafeWrite v j e >> if i == p then unsafeWrite v i en else follow i en p {-# INLINE permute #-} countStripe :: (PrimMonad m, MVector v e) => (e -> Int) -- radix function -> (e -> Int) -- stripe function -> PV.MVector (PrimState m) Int -- count array -> v (PrimState m) e -- source array -> Int -- starting position -> m Int -- end of stripe: [lo,hi) countStripe rdx str count v lo = do set count 0 e <- unsafeRead v lo go (str e) e (lo+1) where len = length v go !s e i = inc count (rdx e) >> if i < len then do en <- unsafeRead v i if str en == s then go s en (i+1) else return i else return len {-# INLINE countStripe #-} threshold :: Int threshold = 25 vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Merge.hs0000644000000000000000000000761013357002426022131 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeFamilies #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Merge -- Copyright : (c) 2008-2011 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Portable -- -- This module implements a simple top-down merge sort. The temporary buffer -- is preallocated to 1/2 the size of the input array, and shared through -- the entire sorting process to ease the amount of allocation performed in -- total. This is a stable sort. module Data.Vector.Algorithms.Merge ( sort , sortBy , Comparison ) where import Prelude hiding (read, length) import Control.Monad.Primitive import Data.Bits import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common (Comparison, copyOffset, midPoint) import qualified Data.Vector.Algorithms.Optimal as O import qualified Data.Vector.Algorithms.Insertion as I -- | Sorts an array using the default comparison. sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () sort = sortBy compare {-# INLINABLE sort #-} -- | Sorts an array using a custom comparison. sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () sortBy cmp vec = if len <= 4 then if len <= 2 then if len /= 2 then return () else O.sort2ByOffset cmp vec 0 else if len == 3 then O.sort3ByOffset cmp vec 0 else O.sort4ByOffset cmp vec 0 else if len < threshold then I.sortByBounds cmp vec 0 len else do buf <- new halfLen mergeSortWithBuf cmp vec buf where len = length vec -- odd lengths have a larger half that needs to fit, so use ceiling, not floor halfLen = (len + 1) `div` 2 {-# INLINE sortBy #-} mergeSortWithBuf :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> v (PrimState m) e -> m () mergeSortWithBuf cmp src buf = loop 0 (length src) where loop l u | len < threshold = I.sortByBounds cmp src l u | otherwise = do loop l mid loop mid u merge cmp (unsafeSlice l len src) buf (mid - l) where len = u - l mid = midPoint u l {-# INLINE mergeSortWithBuf #-} merge :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> v (PrimState m) e -> Int -> m () merge cmp src buf mid = do unsafeCopy tmp lower eTmp <- unsafeRead tmp 0 eUpp <- unsafeRead upper 0 loop tmp 0 eTmp upper 0 eUpp 0 where lower = unsafeSlice 0 mid src upper = unsafeSlice mid (length src - mid) src tmp = unsafeSlice 0 mid buf wroteHigh low iLow eLow high iHigh iIns | iHigh >= length high = unsafeCopy (unsafeSlice iIns (length low - iLow) src) (unsafeSlice iLow (length low - iLow) low) | otherwise = do eHigh <- unsafeRead high iHigh loop low iLow eLow high iHigh eHigh iIns wroteLow low iLow high iHigh eHigh iIns | iLow >= length low = return () | otherwise = do eLow <- unsafeRead low iLow loop low iLow eLow high iHigh eHigh iIns loop !low !iLow !eLow !high !iHigh !eHigh !iIns = case cmp eHigh eLow of LT -> do unsafeWrite src iIns eHigh wroteHigh low iLow eLow high (iHigh + 1) (iIns + 1) _ -> do unsafeWrite src iIns eLow wroteLow low (iLow + 1) high iHigh eHigh (iIns + 1) {-# INLINE merge #-} threshold :: Int threshold = 25 {-# INLINE threshold #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Optimal.hs0000644000000000000000000002731613357002426022504 0ustar0000000000000000{-# LANGUAGE CPP #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Optimal -- Copyright : (c) 2008-2010 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Portable -- -- Optimal sorts for very small array sizes, or for small numbers of -- particular indices in a larger array (to be used, for instance, for -- sorting a median of 3 values into the lowest position in an array -- for a median-of-3 quicksort). -- The code herein was adapted from a C algorithm for optimal sorts -- of small arrays. The original code was produced for the article -- /Sorting Revisited/ by Paul Hsieh, available here: -- -- http://www.azillionmonkeys.com/qed/sort.html -- -- The LICENSE file contains the relevant copyright information for -- the reference C code. module Data.Vector.Algorithms.Optimal ( sort2ByIndex , sort2ByOffset , sort3ByIndex , sort3ByOffset , sort4ByIndex , sort4ByOffset , Comparison ) where import Prelude hiding (read, length) import Control.Monad.Primitive import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common (Comparison) #include "vector.h" -- | Sorts the elements at the positions 'off' and 'off + 1' in the given -- array using the comparison. sort2ByOffset :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> m () sort2ByOffset cmp a off = sort2ByIndex cmp a off (off + 1) {-# INLINABLE sort2ByOffset #-} -- | Sorts the elements at the two given indices using the comparison. This -- is essentially a compare-and-swap, although the first index is assumed to -- be the 'lower' of the two. sort2ByIndex :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> m () sort2ByIndex cmp a i j = UNSAFE_CHECK(checkIndex) "sort2ByIndex" i (length a) $ UNSAFE_CHECK(checkIndex) "sort2ByIndex" j (length a) $ do a0 <- unsafeRead a i a1 <- unsafeRead a j case cmp a0 a1 of GT -> unsafeWrite a i a1 >> unsafeWrite a j a0 _ -> return () {-# INLINABLE sort2ByIndex #-} -- | Sorts the three elements starting at the given offset in the array. sort3ByOffset :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> m () sort3ByOffset cmp a off = sort3ByIndex cmp a off (off + 1) (off + 2) {-# INLINABLE sort3ByOffset #-} -- | Sorts the elements at the three given indices. The indices are assumed -- to be given from lowest to highest, so if 'l < m < u' then -- 'sort3ByIndex cmp a m l u' essentially sorts the median of three into the -- lowest position in the array. sort3ByIndex :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m () sort3ByIndex cmp a i j k = UNSAFE_CHECK(checkIndex) "sort3ByIndex" i (length a) $ UNSAFE_CHECK(checkIndex) "sort3ByIndex" j (length a) $ UNSAFE_CHECK(checkIndex) "sort3ByIndex" k (length a) $ do a0 <- unsafeRead a i a1 <- unsafeRead a j a2 <- unsafeRead a k case cmp a0 a1 of GT -> case cmp a0 a2 of GT -> case cmp a2 a1 of LT -> do unsafeWrite a i a2 unsafeWrite a k a0 _ -> do unsafeWrite a i a1 unsafeWrite a j a2 unsafeWrite a k a0 _ -> do unsafeWrite a i a1 unsafeWrite a j a0 _ -> case cmp a1 a2 of GT -> case cmp a0 a2 of GT -> do unsafeWrite a i a2 unsafeWrite a j a0 unsafeWrite a k a1 _ -> do unsafeWrite a j a2 unsafeWrite a k a1 _ -> return () {-# INLINABLE sort3ByIndex #-} -- | Sorts the four elements beginning at the offset. sort4ByOffset :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> m () sort4ByOffset cmp a off = sort4ByIndex cmp a off (off + 1) (off + 2) (off + 3) {-# INLINABLE sort4ByOffset #-} -- The horror... -- | Sorts the elements at the four given indices. Like the 2 and 3 element -- versions, this assumes that the indices are given in increasing order, so -- it can be used to sort medians into particular positions and so on. sort4ByIndex :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> Int -> m () sort4ByIndex cmp a i j k l = UNSAFE_CHECK(checkIndex) "sort4ByIndex" i (length a) $ UNSAFE_CHECK(checkIndex) "sort4ByIndex" j (length a) $ UNSAFE_CHECK(checkIndex) "sort4ByIndex" k (length a) $ UNSAFE_CHECK(checkIndex) "sort4ByIndex" l (length a) $ do a0 <- unsafeRead a i a1 <- unsafeRead a j a2 <- unsafeRead a k a3 <- unsafeRead a l case cmp a0 a1 of GT -> case cmp a0 a2 of GT -> case cmp a1 a2 of GT -> case cmp a1 a3 of GT -> case cmp a2 a3 of GT -> do unsafeWrite a i a3 unsafeWrite a j a2 unsafeWrite a k a1 unsafeWrite a l a0 _ -> do unsafeWrite a i a2 unsafeWrite a j a3 unsafeWrite a k a1 unsafeWrite a l a0 _ -> case cmp a0 a3 of GT -> do unsafeWrite a i a2 unsafeWrite a j a1 unsafeWrite a k a3 unsafeWrite a l a0 _ -> do unsafeWrite a i a2 unsafeWrite a j a1 unsafeWrite a k a0 unsafeWrite a l a3 _ -> case cmp a2 a3 of GT -> case cmp a1 a3 of GT -> do unsafeWrite a i a3 unsafeWrite a j a1 unsafeWrite a k a2 unsafeWrite a l a0 _ -> do unsafeWrite a i a1 unsafeWrite a j a3 unsafeWrite a k a2 unsafeWrite a l a0 _ -> case cmp a0 a3 of GT -> do unsafeWrite a i a1 unsafeWrite a j a2 unsafeWrite a k a3 unsafeWrite a l a0 _ -> do unsafeWrite a i a1 unsafeWrite a j a2 unsafeWrite a k a0 -- unsafeWrite a l a3 _ -> case cmp a0 a3 of GT -> case cmp a1 a3 of GT -> do unsafeWrite a i a3 -- unsafeWrite a j a1 unsafeWrite a k a0 unsafeWrite a l a2 _ -> do unsafeWrite a i a1 unsafeWrite a j a3 unsafeWrite a k a0 unsafeWrite a l a2 _ -> case cmp a2 a3 of GT -> do unsafeWrite a i a1 unsafeWrite a j a0 unsafeWrite a k a3 unsafeWrite a l a2 _ -> do unsafeWrite a i a1 unsafeWrite a j a0 -- unsafeWrite a k a2 -- unsafeWrite a l a3 _ -> case cmp a1 a2 of GT -> case cmp a0 a2 of GT -> case cmp a0 a3 of GT -> case cmp a2 a3 of GT -> do unsafeWrite a i a3 unsafeWrite a j a2 unsafeWrite a k a0 unsafeWrite a l a1 _ -> do unsafeWrite a i a2 unsafeWrite a j a3 unsafeWrite a k a0 unsafeWrite a l a1 _ -> case cmp a1 a3 of GT -> do unsafeWrite a i a2 unsafeWrite a j a0 unsafeWrite a k a3 unsafeWrite a l a1 _ -> do unsafeWrite a i a2 unsafeWrite a j a0 unsafeWrite a k a1 -- unsafeWrite a l a3 _ -> case cmp a2 a3 of GT -> case cmp a0 a3 of GT -> do unsafeWrite a i a3 unsafeWrite a j a0 -- unsafeWrite a k a2 unsafeWrite a l a1 _ -> do -- unsafeWrite a i a0 unsafeWrite a j a3 -- unsafeWrite a k a2 unsafeWrite a l a1 _ -> case cmp a1 a3 of GT -> do -- unsafeWrite a i a0 unsafeWrite a j a2 unsafeWrite a k a3 unsafeWrite a l a1 _ -> do -- unsafeWrite a i a0 unsafeWrite a j a2 unsafeWrite a k a1 -- unsafeWrite a l a3 _ -> case cmp a1 a3 of GT -> case cmp a0 a3 of GT -> do unsafeWrite a i a3 unsafeWrite a j a0 unsafeWrite a k a1 unsafeWrite a l a2 _ -> do -- unsafeWrite a i a0 unsafeWrite a j a3 unsafeWrite a k a1 unsafeWrite a l a2 _ -> case cmp a2 a3 of GT -> do -- unsafeWrite a i a0 -- unsafeWrite a j a1 unsafeWrite a k a3 unsafeWrite a l a2 _ -> do -- unsafeWrite a i a0 -- unsafeWrite a j a1 -- unsafeWrite a k a2 -- unsafeWrite a l a3 return () {-# INLINABLE sort4ByIndex #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Radix.hs0000644000000000000000000002021513357002426022135 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ScopedTypeVariables #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Radix -- Copyright : (c) 2008-2011 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Non-portable (scoped type variables, bang patterns) -- -- This module provides a radix sort for a subclass of unboxed arrays. The -- radix class gives information on -- * the number of passes needed for the data type -- -- * the size of the auxiliary arrays -- -- * how to compute the pass-k radix of a value -- -- Radix sort is not a comparison sort, so it is able to achieve O(n) run -- time, though it also uses O(n) auxiliary space. In addition, there is a -- constant space overhead of 2*size*sizeOf(Int) for the sort, so it is not -- advisable to use this sort for large numbers of very small arrays. -- -- A standard example (upon which one could base their own Radix instance) -- is Word32: -- -- * We choose to sort on r = 8 bits at a time -- -- * A Word32 has b = 32 bits total -- -- Thus, b/r = 4 passes are required, 2^r = 256 elements are needed in an -- auxiliary array, and the radix function is: -- -- > radix k e = (e `shiftR` (k*8)) .&. 255 module Data.Vector.Algorithms.Radix (sort, sortBy, Radix(..)) where import Prelude hiding (read, length) import Control.Monad import Control.Monad.Primitive import qualified Data.Vector.Primitive.Mutable as PV import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common import Data.Bits import Data.Int import Data.Word import Foreign.Storable class Radix e where -- | The number of passes necessary to sort an array of es passes :: e -> Int -- | The size of an auxiliary array size :: e -> Int -- | The radix function parameterized by the current pass radix :: Int -> e -> Int instance Radix Int where passes _ = sizeOf (undefined :: Int) {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = e .&. 255 radix i e | i == passes e - 1 = radix' (e `xor` minBound) | otherwise = radix' e where radix' e = (e `shiftR` (i `shiftL` 3)) .&. 255 {-# INLINE radix #-} instance Radix Int8 where passes _ = 1 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix _ e = 255 .&. fromIntegral e `xor` 128 {-# INLINE radix #-} instance Radix Int16 where passes _ = 2 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix 1 e = fromIntegral (((e `xor` minBound) `shiftR` 8) .&. 255) {-# INLINE radix #-} instance Radix Int32 where passes _ = 4 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) radix 3 e = fromIntegral (((e `xor` minBound) `shiftR` 24) .&. 255) {-# INLINE radix #-} instance Radix Int64 where passes _ = 8 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) radix 3 e = fromIntegral ((e `shiftR` 24) .&. 255) radix 4 e = fromIntegral ((e `shiftR` 32) .&. 255) radix 5 e = fromIntegral ((e `shiftR` 40) .&. 255) radix 6 e = fromIntegral ((e `shiftR` 48) .&. 255) radix 7 e = fromIntegral (((e `xor` minBound) `shiftR` 56) .&. 255) {-# INLINE radix #-} instance Radix Word where passes _ = sizeOf (undefined :: Word) {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix i e = fromIntegral ((e `shiftR` (i `shiftL` 3)) .&. 255) {-# INLINE radix #-} instance Radix Word8 where passes _ = 1 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix _ = fromIntegral {-# INLINE radix #-} instance Radix Word16 where passes _ = 2 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) {-# INLINE radix #-} instance Radix Word32 where passes _ = 4 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) radix 3 e = fromIntegral ((e `shiftR` 24) .&. 255) {-# INLINE radix #-} instance Radix Word64 where passes _ = 8 {-# INLINE passes #-} size _ = 256 {-# INLINE size #-} radix 0 e = fromIntegral (e .&. 255) radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) radix 3 e = fromIntegral ((e `shiftR` 24) .&. 255) radix 4 e = fromIntegral ((e `shiftR` 32) .&. 255) radix 5 e = fromIntegral ((e `shiftR` 40) .&. 255) radix 6 e = fromIntegral ((e `shiftR` 48) .&. 255) radix 7 e = fromIntegral ((e `shiftR` 56) .&. 255) {-# INLINE radix #-} instance (Radix i, Radix j) => Radix (i, j) where passes ~(i, j) = passes i + passes j {-# INLINE passes #-} size ~(i, j) = size i `max` size j {-# INLINE size #-} radix k ~(i, j) | k < passes j = radix k j | otherwise = radix (k - passes j) i {-# INLINE radix #-} -- | Sorts an array based on the Radix instance. sort :: forall e m v. (PrimMonad m, MVector v e, Radix e) => v (PrimState m) e -> m () sort arr = sortBy (passes e) (size e) radix arr where e :: e e = undefined {-# INLINABLE sort #-} -- | Radix sorts an array using custom radix information -- requires the number of passes to fully sort the array, -- the size of of auxiliary arrays necessary (should be -- one greater than the maximum value returned by the radix -- function), and a radix function, which takes the pass -- and an element, and returns the relevant radix. sortBy :: (PrimMonad m, MVector v e) => Int -- ^ the number of passes -> Int -- ^ the size of auxiliary arrays -> (Int -> e -> Int) -- ^ the radix function -> v (PrimState m) e -- ^ the array to be sorted -> m () sortBy passes size rdx arr = do tmp <- new (length arr) count <- new size radixLoop passes rdx arr tmp count {-# INLINE sortBy #-} radixLoop :: (PrimMonad m, MVector v e) => Int -- passes -> (Int -> e -> Int) -- radix function -> v (PrimState m) e -- array to sort -> v (PrimState m) e -- temporary array -> PV.MVector (PrimState m) Int -- radix count array -> m () radixLoop passes rdx src dst count = go False 0 where len = length src go swap k | k < passes = if swap then body rdx dst src count k >> go (not swap) (k+1) else body rdx src dst count k >> go (not swap) (k+1) | otherwise = when swap (unsafeCopy src dst) {-# INLINE radixLoop #-} body :: (PrimMonad m, MVector v e) => (Int -> e -> Int) -- radix function -> v (PrimState m) e -- source array -> v (PrimState m) e -- destination array -> PV.MVector (PrimState m) Int -- radix count -> Int -- current pass -> m () body rdx src dst count k = do countLoop (rdx k) src count accumulate count moveLoop k rdx src dst count {-# INLINE body #-} accumulate :: (PrimMonad m) => PV.MVector (PrimState m) Int -> m () accumulate count = go 0 0 where len = length count go i acc | i < len = do ci <- unsafeRead count i unsafeWrite count i acc go (i+1) (acc + ci) | otherwise = return () {-# INLINE accumulate #-} moveLoop :: (PrimMonad m, MVector v e) => Int -> (Int -> e -> Int) -> v (PrimState m) e -> v (PrimState m) e -> PV.MVector (PrimState m) Int -> m () moveLoop k rdx src dst prefix = go 0 where len = length src go i | i < len = do srci <- unsafeRead src i pf <- inc prefix (rdx k srci) unsafeWrite dst pf srci go (i+1) | otherwise = return () {-# INLINE moveLoop #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Tim.hs0000644000000000000000000003353613357002426021631 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Tim -- Copyright : (c) 2013-2015 Dan Doel, 2015 Tim Baumann -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Non-portable (bang patterns) -- -- Timsort is a complex, adaptive, bottom-up merge sort. It is designed to -- minimize comparisons as much as possible, even at some cost in overhead. -- Thus, it may not be ideal for sorting simple primitive types, for which -- comparison is cheap. It may, however, be significantly faster for sorting -- arrays of complex values (strings would be an example, though an algorithm -- not based on comparison would probably be superior in that particular -- case). -- -- For more information on the details of the algorithm, read on. -- -- The first step of the algorithm is to identify runs of elements. These can -- either be non-decreasing or strictly decreasing sequences of elements in -- the input. Strictly decreasing sequences are used rather than -- non-increasing so that they can be easily reversed in place without the -- sort becoming unstable. -- -- If the natural runs are too short, they are padded to a minimum value. The -- minimum is chosen based on the length of the array, and padded runs are put -- in order using insertion sort. The length of the minimum run size is -- determined as follows: -- -- * If the length of the array is less than 64, the minimum size is the -- length of the array, and insertion sort is used for the entirety -- -- * Otherwise, a value between 32 and 64 is chosen such that N/min is -- either equal to or just below a power of two. This avoids having a -- small chunk left over to merge into much larger chunks at the end. -- -- This is accomplished by taking the the mininum to be the lowest six bits -- containing the highest set bit, and adding one if any other bits are set. -- For instance: -- -- length: 00000000 00000000 00000000 00011011 = 25 -- min: 00000000 00000000 00000000 00011011 = 25 -- -- length: 00000000 11111100 00000000 00000000 = 63 * 2^18 -- min: 00000000 00000000 00000000 00111111 = 63 -- -- length: 00000000 11111100 00000000 00000001 = 63 * 2^18 + 1 -- min: 00000000 00000000 00000000 01000000 = 64 -- -- Once chunks can be produced, the next step is merging them. The indices of -- all runs are stored in a stack. When we identify a new run, we push it onto -- the stack. However, certain invariants are maintained of the stack entries. -- Namely: -- -- if stk = _ :> z :> y :> x -- length x + length y < length z -- -- if stk = _ :> y :> x -- length x < length y -- -- This ensures that the chunks stored are decreasing, and that the chunk -- sizes follow something like the fibonacci sequence, ensuring there at most -- log-many chunks at any time. If pushing a new chunk on the stack would -- violate either of the invariants, we first perform a merge. -- -- If length x + length y >= length z, then y is merged with the smaller of x -- and z (if they are tied, x is chosen, because it is more likely to be -- cached). If, further, length x >= length y then they are merged. These steps -- are repeated until the invariants are established. -- -- The last important piece of the algorithm is the merging. At first, two -- chunks are merged element-wise. However, while doing so, counts are kept of -- the number of elements taken from one chunk without any from its partner. If -- this count exceeds a threshold, the merge switches to searching for elements -- from one chunk in the other, and copying chunks at a time. If these chunks -- start falling below the threshold, the merge switches back to element-wise. -- -- The search used in the merge is also special. It uses a galloping strategy, -- where exponentially increasing indices are tested, and once two such indices -- are determined to bracket the desired value, binary search is used to find -- the exact index within that range. This is asymptotically the same as simply -- using binary search, but is likely to do fewer comparisons than binary search -- would. -- -- One aspect that is not yet implemented from the original Tim sort is the -- adjustment of the above threshold. When galloping saves time, the threshold -- is lowered, and when it doesn't, it is raised. This may be implemented in the -- future. module Data.Vector.Algorithms.Tim ( sort , sortBy ) where import Prelude hiding (length, reverse) import Control.Monad.Primitive import Control.Monad (when) import Data.Bits import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Search ( gallopingSearchRightPBounds , gallopingSearchLeftPBounds ) import Data.Vector.Algorithms.Insertion (sortByBounds', Comparison) -- | Sorts an array using the default comparison. sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () sort = sortBy compare {-# INLINABLE sort #-} -- | Sorts an array using a custom comparison. sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () sortBy cmp vec | mr == len = iter [0] 0 (error "no merge buffer needed!") | otherwise = new 256 >>= iter [] 0 where len = length vec mr = minrun len iter s i tmpBuf | i >= len = performRemainingMerges s tmpBuf | otherwise = do (order, runLen) <- nextRun cmp vec i len when (order == Descending) $ reverse $ unsafeSlice i runLen vec let runEnd = min len (i + max runLen mr) sortByBounds' cmp vec i (i+runLen) runEnd (s', tmpBuf') <- performMerges (i : s) runEnd tmpBuf iter s' runEnd tmpBuf' runLengthInvariantBroken a b c i = (b - a <= i - b) || (c - b <= i - c) performMerges [b,a] i tmpBuf | i - b >= b - a = merge cmp vec a b i tmpBuf >>= performMerges [a] i performMerges (c:b:a:ss) i tmpBuf | runLengthInvariantBroken a b c i = if i - c <= b - a then merge cmp vec b c i tmpBuf >>= performMerges (b:a:ss) i else do tmpBuf' <- merge cmp vec a b c tmpBuf (ass', tmpBuf'') <- performMerges (a:ss) c tmpBuf' performMerges (c:ass') i tmpBuf'' performMerges s _ tmpBuf = return (s, tmpBuf) performRemainingMerges (b:a:ss) tmpBuf = merge cmp vec a b len tmpBuf >>= performRemainingMerges (a:ss) performRemainingMerges _ _ = return () {-# INLINE sortBy #-} -- | Computes the minimum run size for the sort. The goal is to choose a size -- such that there are almost if not exactly 2^n chunks of that size in the -- array. minrun :: Int -> Int minrun n0 = (n0 `unsafeShiftR` extra) + if (lowMask .&. n0) > 0 then 1 else 0 where -- smear the bits down from the most significant bit !n1 = n0 .|. unsafeShiftR n0 1 !n2 = n1 .|. unsafeShiftR n1 2 !n3 = n2 .|. unsafeShiftR n2 4 !n4 = n3 .|. unsafeShiftR n3 8 !n5 = n4 .|. unsafeShiftR n4 16 !n6 = n5 .|. unsafeShiftR n5 32 -- mask for the bits lower than the 6 highest bits !lowMask = n6 `unsafeShiftR` 6 !extra = popCount lowMask {-# INLINE minrun #-} data Order = Ascending | Descending deriving (Eq, Show) -- | Identify the next run (that is a monotonically increasing or strictly -- decreasing sequence) in the slice [l,u) in vec. Returns the order and length -- of the run. nextRun :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ l -> Int -- ^ u -> m (Order, Int) nextRun _ _ i len | i+1 >= len = return (Ascending, 1) nextRun cmp vec i len = do x <- unsafeRead vec i y <- unsafeRead vec (i+1) if x `gt` y then desc y 2 else asc y 2 where gt a b = cmp a b == GT desc _ !k | i + k >= len = return (Descending, k) desc x !k = do y <- unsafeRead vec (i+k) if x `gt` y then desc y (k+1) else return (Descending, k) asc _ !k | i + k >= len = return (Ascending, k) asc x !k = do y <- unsafeRead vec (i+k) if x `gt` y then return (Ascending, k) else asc y (k+1) {-# INLINE nextRun #-} -- | Tests if a temporary buffer has a given size. If not, allocates a new -- buffer and returns it instead of the old temporary buffer. ensureCapacity :: (PrimMonad m, MVector v e) => Int -> v (PrimState m) e -> m (v (PrimState m) e) ensureCapacity l tmpBuf | l <= length tmpBuf = return tmpBuf | otherwise = new (2*l) {-# INLINE ensureCapacity #-} -- | Copy the slice [i,i+len) from vec to tmpBuf. If tmpBuf is not large enough, -- a new buffer is allocated and used. Returns the buffer. cloneSlice :: (PrimMonad m, MVector v e) => Int -- ^ i -> Int -- ^ len -> v (PrimState m) e -- ^ vec -> v (PrimState m) e -- ^ tmpBuf -> m (v (PrimState m) e) cloneSlice i len vec tmpBuf = do tmpBuf' <- ensureCapacity len tmpBuf unsafeCopy (unsafeSlice 0 len tmpBuf') (unsafeSlice i len vec) return tmpBuf' {-# INLINE cloneSlice #-} -- | Number of consecutive times merge chooses the element from the same run -- before galloping mode is activated. minGallop :: Int minGallop = 7 {-# INLINE minGallop #-} -- | Merge the adjacent sorted slices [l,m) and [m,u) in vec. This is done by -- copying the slice [l,m) to a temporary buffer. Returns the (enlarged) -- temporary buffer. mergeLo :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -- ^ vec -> Int -- ^ l -> Int -- ^ m -> Int -- ^ u -> v (PrimState m) e -- ^ tmpBuf -> m (v (PrimState m) e) mergeLo cmp vec l m u tempBuf' = do tmpBuf <- cloneSlice l tmpBufLen vec tempBuf' vi <- unsafeRead tmpBuf 0 vj <- unsafeRead vec m iter tmpBuf 0 m l vi vj minGallop minGallop return tmpBuf where gt a b = cmp a b == GT gte a b = cmp a b /= LT tmpBufLen = m - l iter _ i _ _ _ _ _ _ | i >= tmpBufLen = return () iter tmpBuf i j k _ _ _ _ | j >= u = do let from = unsafeSlice i (tmpBufLen-i) tmpBuf to = unsafeSlice k (tmpBufLen-i) vec unsafeCopy to from iter tmpBuf i j k _ vj 0 _ = do i' <- gallopingSearchLeftPBounds (`gt` vj) tmpBuf i tmpBufLen let gallopLen = i' - i from = unsafeSlice i gallopLen tmpBuf to = unsafeSlice k gallopLen vec unsafeCopy to from vi' <- unsafeRead tmpBuf i' iter tmpBuf i' j (k+gallopLen) vi' vj minGallop minGallop iter tmpBuf i j k vi _ _ 0 = do j' <- gallopingSearchLeftPBounds (`gte` vi) vec j u let gallopLen = j' - j from = slice j gallopLen vec to = slice k gallopLen vec unsafeMove to from vj' <- unsafeRead vec j' iter tmpBuf i j' (k+gallopLen) vi vj' minGallop minGallop iter tmpBuf i j k vi vj ga gb | vj `gte` vi = do unsafeWrite vec k vi vi' <- unsafeRead tmpBuf (i+1) iter tmpBuf (i+1) j (k+1) vi' vj (ga-1) minGallop | otherwise = do unsafeWrite vec k vj vj' <- unsafeRead vec (j+1) iter tmpBuf i (j+1) (k+1) vi vj' minGallop (gb-1) {-# INLINE mergeLo #-} -- | Merge the adjacent sorted slices [l,m) and [m,u) in vec. This is done by -- copying the slice [j,k) to a temporary buffer. Returns the (enlarged) -- temporary buffer. mergeHi :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -- ^ vec -> Int -- ^ l -> Int -- ^ m -> Int -- ^ u -> v (PrimState m) e -- ^ tmpBuf -> m (v (PrimState m) e) mergeHi cmp vec l m u tmpBuf' = do tmpBuf <- cloneSlice m tmpBufLen vec tmpBuf' vi <- unsafeRead vec (m-1) vj <- unsafeRead tmpBuf (tmpBufLen-1) iter tmpBuf (m-1) (tmpBufLen-1) (u-1) vi vj minGallop minGallop return tmpBuf where gt a b = cmp a b == GT gte a b = cmp a b /= LT tmpBufLen = u - m iter _ _ j _ _ _ _ _ | j < 0 = return () iter tmpBuf i j _ _ _ _ _ | i < l = do let from = unsafeSlice 0 (j+1) tmpBuf to = unsafeSlice l (j+1) vec unsafeCopy to from iter tmpBuf i j k _ vj 0 _ = do i' <- gallopingSearchRightPBounds (`gt` vj) vec l i let gallopLen = i - i' from = slice (i'+1) gallopLen vec to = slice (k-gallopLen+1) gallopLen vec unsafeMove to from vi' <- unsafeRead vec i' iter tmpBuf i' j (k-gallopLen) vi' vj minGallop minGallop iter tmpBuf i j k vi _ _ 0 = do j' <- gallopingSearchRightPBounds (`gte` vi) tmpBuf 0 j let gallopLen = j - j' from = slice (j'+1) gallopLen tmpBuf to = slice (k-gallopLen+1) gallopLen vec unsafeCopy to from vj' <- unsafeRead tmpBuf j' iter tmpBuf i j' (k-gallopLen) vi vj' minGallop minGallop iter tmpBuf i j k vi vj ga gb | vi `gt` vj = do unsafeWrite vec k vi vi' <- unsafeRead vec (i-1) iter tmpBuf (i-1) j (k-1) vi' vj (ga-1) minGallop | otherwise = do unsafeWrite vec k vj vj' <- unsafeRead tmpBuf (j-1) iter tmpBuf i (j-1) (k-1) vi vj' minGallop (gb-1) {-# INLINE mergeHi #-} -- | Merge the adjacent sorted slices A=[l,m) and B=[m,u) in vec. This begins -- with galloping searches to find the index of vec[m] in A and the index of -- vec[m-1] in B to reduce the sizes of A and B. Then it uses `mergeHi` or -- `mergeLo` depending on whether A or B is larger. Returns the (enlarged) -- temporary buffer. merge :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -- ^ vec -> Int -- ^ l -> Int -- ^ m -> Int -- ^ u -> v (PrimState m) e -- ^ tmpBuf -> m (v (PrimState m) e) merge cmp vec l m u tmpBuf = do vm <- unsafeRead vec m l' <- gallopingSearchLeftPBounds (`gt` vm) vec l m if l' >= m then return tmpBuf else do vn <- unsafeRead vec (m-1) u' <- gallopingSearchRightPBounds (`gte` vn) vec m u if u' <= m then return tmpBuf else (if (m-l') <= (u'-m) then mergeLo else mergeHi) cmp vec l' m u' tmpBuf where gt a b = cmp a b == GT gte a b = cmp a b /= LT {-# INLINE merge #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Common.hs0000644000000000000000000000326413357002426022323 0ustar0000000000000000{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Common -- Copyright : (c) 2008-2011 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Portable -- -- Common operations and utility functions for all sorts module Data.Vector.Algorithms.Common where import Prelude hiding (read, length) import Control.Monad.Primitive import Data.Vector.Generic.Mutable import Data.Word (Word) import qualified Data.Vector.Primitive.Mutable as PV -- | A type of comparisons between two values of a given type. type Comparison e = e -> e -> Ordering copyOffset :: (PrimMonad m, MVector v e) => v (PrimState m) e -> v (PrimState m) e -> Int -> Int -> Int -> m () copyOffset from to iFrom iTo len = unsafeCopy (unsafeSlice iTo len to) (unsafeSlice iFrom len from) {-# INLINE copyOffset #-} inc :: (PrimMonad m, MVector v Int) => v (PrimState m) Int -> Int -> m Int inc arr i = unsafeRead arr i >>= \e -> unsafeWrite arr i (e+1) >> return e {-# INLINE inc #-} -- shared bucket sorting stuff countLoop :: (PrimMonad m, MVector v e) => (e -> Int) -> v (PrimState m) e -> PV.MVector (PrimState m) Int -> m () countLoop rdx src count = set count 0 >> go 0 where len = length src go i | i < len = unsafeRead src i >>= inc count . rdx >> go (i+1) | otherwise = return () {-# INLINE countLoop #-} midPoint :: Int -> Int -> Int midPoint a b = toInt $ (toWord a + toWord b) `div` 2 where toWord :: Int -> Word toWord = fromIntegral toInt :: Word -> Int toInt = fromIntegral {-# INLINE midPoint #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Intro.hs0000644000000000000000000001757513357002426022200 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ScopedTypeVariables #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Intro -- Copyright : (c) 2008-2015 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Non-portable (type operators, bang patterns) -- -- This module implements various algorithms based on the introsort algorithm, -- originally described by David R. Musser in the paper /Introspective Sorting -- and Selection Algorithms/. It is also in widespread practical use, as the -- standard unstable sort used in the C++ Standard Template Library. -- -- Introsort is at its core a quicksort. The version implemented here has the -- following optimizations that make it perform better in practice: -- -- * Small segments of the array are left unsorted until a final insertion -- sort pass. This is faster than recursing all the way down to -- one-element arrays. -- -- * The pivot for segment [l,u) is chosen as the median of the elements at -- l, u-1 and (u+l)/2. This yields good behavior on mostly sorted (or -- reverse-sorted) arrays. -- -- * The algorithm tracks its recursion depth, and if it decides it is -- taking too long (depth greater than 2 * lg n), it switches to a heap -- sort to maintain O(n lg n) worst case behavior. (This is what makes the -- algorithm introsort). module Data.Vector.Algorithms.Intro ( -- * Sorting sort , sortBy , sortByBounds -- * Selecting , select , selectBy , selectByBounds -- * Partial sorting , partialSort , partialSortBy , partialSortByBounds , Comparison ) where import Prelude hiding (read, length) import Control.Monad import Control.Monad.Primitive import Data.Bits import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common (Comparison, midPoint) import qualified Data.Vector.Algorithms.Insertion as I import qualified Data.Vector.Algorithms.Optimal as O import qualified Data.Vector.Algorithms.Heap as H -- | Sorts an entire array using the default ordering. sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () sort = sortBy compare {-# INLINABLE sort #-} -- | Sorts an entire array using a custom ordering. sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () sortBy cmp a = sortByBounds cmp a 0 (length a) {-# INLINE sortBy #-} -- | Sorts a portion of an array [l,u) using a custom ordering sortByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower index, l -> Int -- ^ upper index, u -> m () sortByBounds cmp a l u | len < 2 = return () | len == 2 = O.sort2ByOffset cmp a l | len == 3 = O.sort3ByOffset cmp a l | len == 4 = O.sort4ByOffset cmp a l | otherwise = introsort cmp a (ilg len) l u where len = u - l {-# INLINE sortByBounds #-} -- Internal version of the introsort loop which allows partial -- sort functions to call with a specified bound on iterations. introsort :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m () introsort cmp a i l u = sort i l u >> I.sortByBounds cmp a l u where sort 0 l u = H.sortByBounds cmp a l u sort d l u | len < threshold = return () | otherwise = do O.sort3ByIndex cmp a c l (u-1) -- sort the median into the lowest position p <- unsafeRead a l mid <- partitionBy cmp a p (l+1) u unsafeSwap a l (mid - 1) sort (d-1) mid u sort (d-1) l (mid - 1) where len = u - l c = midPoint u l {-# INLINE introsort #-} -- | Moves the least k elements to the front of the array in -- no particular order. select :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> Int -- ^ number of elements to select, k -> m () select = selectBy compare {-# INLINE select #-} -- | Moves the least k elements (as defined by the comparison) to -- the front of the array in no particular order. selectBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to select, k -> m () selectBy cmp a k = selectByBounds cmp a k 0 (length a) {-# INLINE selectBy #-} -- | Moves the least k elements in the interval [l,u) to the positions -- [l,k+l) in no particular order. selectByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to select, k -> Int -- ^ lower bound, l -> Int -- ^ upper bound, u -> m () selectByBounds cmp a k l u | l >= u = return () | otherwise = go (ilg len) l (l + k) u where len = u - l go 0 l m u = H.selectByBounds cmp a (m - l) l u go n l m u = do O.sort3ByIndex cmp a c l (u-1) p <- unsafeRead a l mid <- partitionBy cmp a p (l+1) u unsafeSwap a l (mid - 1) if m > mid then go (n-1) mid m u else if m < mid - 1 then go (n-1) l m (mid - 1) else return () where c = midPoint u l {-# INLINE selectByBounds #-} -- | Moves the least k elements to the front of the array, sorted. partialSort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> Int -- ^ number of elements to sort, k -> m () partialSort = partialSortBy compare {-# INLINE partialSort #-} -- | Moves the least k elements (as defined by the comparison) to -- the front of the array, sorted. partialSortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to sort, k -> m () partialSortBy cmp a k = partialSortByBounds cmp a k 0 (length a) {-# INLINE partialSortBy #-} -- | Moves the least k elements in the interval [l,u) to the positions -- [l,k+l), sorted. partialSortByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to sort, k -> Int -- ^ lower index, l -> Int -- ^ upper index, u -> m () partialSortByBounds cmp a k l u | l >= u = return () | otherwise = go (ilg len) l (l + k) u where isort = introsort cmp a {-# INLINE [1] isort #-} len = u - l go 0 l m n = H.partialSortByBounds cmp a (m - l) l u go n l m u | l == m = return () | otherwise = do O.sort3ByIndex cmp a c l (u-1) p <- unsafeRead a l mid <- partitionBy cmp a p (l+1) u unsafeSwap a l (mid - 1) case compare m mid of GT -> do isort (n-1) l (mid - 1) go (n-1) mid m u EQ -> isort (n-1) l m LT -> go n l m (mid - 1) where c = midPoint u l {-# INLINE partialSortByBounds #-} partitionBy :: forall m v e. (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int partitionBy cmp a = partUp where partUp :: e -> Int -> Int -> m Int partUp p l u | l < u = do e <- unsafeRead a l case cmp e p of LT -> partUp p (l+1) u _ -> partDown p l (u-1) | otherwise = return l partDown :: e -> Int -> Int -> m Int partDown p l u | l < u = do e <- unsafeRead a u case cmp p e of LT -> partDown p l (u-1) _ -> unsafeSwap a l u >> partUp p (l+1) u | otherwise = return l {-# INLINE partitionBy #-} -- computes the number of recursive calls after which heapsort should -- be invoked given the lower and upper indices of the array to be sorted ilg :: Int -> Int ilg m = 2 * loop m 0 where loop 0 !k = k - 1 loop n !k = loop (n `shiftR` 1) (k+1) -- the size of array at which the introsort algorithm switches to insertion sort threshold :: Int threshold = 18 {-# INLINE threshold #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Insertion.hs0000644000000000000000000000516013357002426023042 0ustar0000000000000000{-# LANGUAGE TypeFamilies #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Insertion -- Copyright : (c) 2008-2010 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Portable -- -- A simple insertion sort. Though it's O(n^2), its iterative nature can be -- beneficial for small arrays. It is used to sort small segments of an array -- by some of the more heavy-duty, recursive algorithms. module Data.Vector.Algorithms.Insertion ( sort , sortBy , sortByBounds , sortByBounds' , Comparison ) where import Prelude hiding (read, length) import Control.Monad.Primitive import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common (Comparison) import qualified Data.Vector.Algorithms.Optimal as O -- | Sorts an entire array using the default comparison for the type sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () sort = sortBy compare {-# INLINABLE sort #-} -- | Sorts an entire array using a given comparison sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () sortBy cmp a = sortByBounds cmp a 0 (length a) {-# INLINE sortBy #-} -- | Sorts the portion of an array delimited by [l,u) sortByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> m () sortByBounds cmp a l u | len < 2 = return () | len == 2 = O.sort2ByOffset cmp a l | len == 3 = O.sort3ByOffset cmp a l | len == 4 = O.sort4ByOffset cmp a l | otherwise = O.sort4ByOffset cmp a l >> sortByBounds' cmp a l (l + 4) u where len = u - l {-# INLINE sortByBounds #-} -- | Sorts the portion of the array delimited by [l,u) under the assumption -- that [l,m) is already sorted. sortByBounds' :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m () sortByBounds' cmp a l m u = sort m where sort i | i < u = do v <- unsafeRead a i insert cmp a l v i sort (i+1) | otherwise = return () {-# INLINE sortByBounds' #-} -- Given a sorted array in [l,u), inserts val into its proper position, -- yielding a sorted [l,u] insert :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> e -> Int -> m () insert cmp a l = loop where loop val j | j <= l = unsafeWrite a l val | otherwise = do e <- unsafeRead a (j - 1) case cmp val e of LT -> unsafeWrite a j e >> loop val (j - 1) _ -> unsafeWrite a j val {-# INLINE insert #-} vector-algorithms-0.8.0.1/src/Data/Vector/Algorithms/Heap.hs0000644000000000000000000002722713357002426021755 0ustar0000000000000000{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} -- --------------------------------------------------------------------------- -- | -- Module : Data.Vector.Algorithms.Heap -- Copyright : (c) 2008-2015 Dan Doel -- Maintainer : Dan Doel -- Stability : Experimental -- Portability : Non-portable (type operators) -- -- This module implements operations for working with a quaternary heap stored -- in an unboxed array. Most heapsorts are defined in terms of a binary heap, -- in which each internal node has at most two children. By contrast, a -- quaternary heap has internal nodes with up to four children. This reduces -- the number of comparisons in a heapsort slightly, and improves locality -- (again, slightly) by flattening out the heap. module Data.Vector.Algorithms.Heap ( -- * Sorting sort , sortBy , sortByBounds -- * Selection , select , selectBy , selectByBounds -- * Partial sorts , partialSort , partialSortBy , partialSortByBounds -- * Heap operations , heapify , pop , popTo , sortHeap , heapInsert , Comparison ) where import Prelude hiding (read, length) import Control.Monad import Control.Monad.Primitive import Data.Bits import Data.Vector.Generic.Mutable import Data.Vector.Algorithms.Common (Comparison) import qualified Data.Vector.Algorithms.Optimal as O -- | Sorts an entire array using the default ordering. sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () sort = sortBy compare {-# INLINABLE sort #-} -- | Sorts an entire array using a custom ordering. sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () sortBy cmp a = sortByBounds cmp a 0 (length a) {-# INLINE sortBy #-} -- | Sorts a portion of an array [l,u) using a custom ordering sortByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower index, l -> Int -- ^ upper index, u -> m () sortByBounds cmp a l u | len < 2 = return () | len == 2 = O.sort2ByOffset cmp a l | len == 3 = O.sort3ByOffset cmp a l | len == 4 = O.sort4ByOffset cmp a l | otherwise = heapify cmp a l u >> sortHeap cmp a l (l+4) u >> O.sort4ByOffset cmp a l where len = u - l {-# INLINE sortByBounds #-} -- | Moves the lowest k elements to the front of the array. -- The elements will be in no particular order. select :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> Int -- ^ number of elements to select, k -> m () select = selectBy compare {-# INLINE select #-} -- | Moves the lowest (as defined by the comparison) k elements -- to the front of the array. The elements will be in no particular -- order. selectBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to select, k -> m () selectBy cmp a k = selectByBounds cmp a k 0 (length a) {-# INLINE selectBy #-} -- | Moves the 'lowest' k elements in the portion [l,u) of the -- array into the positions [l,k+l). The elements will be in -- no particular order. selectByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to select, k -> Int -- ^ lower index, l -> Int -- ^ upper index, u -> m () selectByBounds cmp a k l u | l + k <= u = heapify cmp a l (l + k) >> go l (l + k) (u - 1) | otherwise = return () where go l m u | u < m = return () | otherwise = do el <- unsafeRead a l eu <- unsafeRead a u case cmp eu el of LT -> popTo cmp a l m u _ -> return () go l m (u - 1) {-# INLINE selectByBounds #-} -- | Moves the lowest k elements to the front of the array, sorted. -- -- The remaining values of the array will be in no particular order. partialSort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> Int -- ^ number of elements to sort, k -> m () partialSort = partialSortBy compare {-# INLINE partialSort #-} -- | Moves the lowest k elements (as defined by the comparison) to -- the front of the array, sorted. -- -- The remaining values of the array will be in no particular order. partialSortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to sort, k -> m () partialSortBy cmp a k = partialSortByBounds cmp a k 0 (length a) {-# INLINE partialSortBy #-} -- | Moves the lowest k elements in the portion [l,u) of the array -- into positions [l,k+l), sorted. -- -- The remaining values in [l,u) will be in no particular order. Values outside -- the range [l,u) will be unaffected. partialSortByBounds :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ number of elements to sort, k -> Int -- ^ lower index, l -> Int -- ^ upper index, u -> m () partialSortByBounds cmp a k l u -- this potentially does more work than absolutely required, -- but using a heap to find the least 2 of 4 elements -- seems unlikely to be better than just sorting all of them -- with an optimal sort, and the latter is obviously index -- correct. | len < 2 = return () | len == 2 = O.sort2ByOffset cmp a l | len == 3 = O.sort3ByOffset cmp a l | len == 4 = O.sort4ByOffset cmp a l | u <= l + k = sortByBounds cmp a l u | otherwise = do selectByBounds cmp a k l u sortHeap cmp a l (l + 4) (l + k) O.sort4ByOffset cmp a l where len = u - l {-# INLINE partialSortByBounds #-} -- | Constructs a heap in a portion of an array [l, u), using the values therein. -- -- Note: 'heapify' is more efficient than constructing a heap by repeated -- insertion. Repeated insertion has complexity O(n*log n) while 'heapify' is able -- to construct a heap in O(n), where n is the number of elements in the heap. heapify :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower index, l -> Int -- ^ upper index, u -> m () heapify cmp a l u = loop $ (len - 1) `shiftR` 2 where len = u - l loop k | k < 0 = return () | otherwise = unsafeRead a (l+k) >>= \e -> siftByOffset cmp a e l k len >> loop (k - 1) {-# INLINE heapify #-} -- | Given a heap stored in a portion of an array [l,u), swaps the -- top of the heap with the element at u and rebuilds the heap. pop :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower heap index, l -> Int -- ^ upper heap index, u -> m () pop cmp a l u = popTo cmp a l u u {-# INLINE pop #-} -- | Given a heap stored in a portion of an array [l,u) swaps the top -- of the heap with the element at position t, and rebuilds the heap. popTo :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower heap index, l -> Int -- ^ upper heap index, u -> Int -- ^ index to pop to, t -> m () popTo cmp a l u t = do al <- unsafeRead a l at <- unsafeRead a t unsafeWrite a t al siftByOffset cmp a at l 0 (u - l) {-# INLINE popTo #-} -- | Given a heap stored in a portion of an array [l,u), sorts the -- highest values into [m,u). The elements in [l,m) are not in any -- particular order. sortHeap :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower heap index, l -> Int -- ^ lower bound of final sorted portion, m -> Int -- ^ upper heap index, u -> m () sortHeap cmp a l m u = loop (u-1) >> unsafeSwap a l m where loop k | m < k = pop cmp a l k >> loop (k-1) | otherwise = return () {-# INLINE sortHeap #-} -- | Given a heap stored in a portion of an array [l,u) and an element e, -- inserts the element into the heap, resulting in a heap in [l,u]. -- -- Note: it is best to only use this operation when incremental construction of -- a heap is required. 'heapify' is capable of building a heap in O(n) time, -- while repeated insertion takes O(n*log n) time. heapInsert :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -- ^ lower heap index, l -> Int -- ^ upper heap index, u -> e -- ^ element to be inserted, e -> m () heapInsert cmp v l u e = sift (u - l) where sift k | k <= 0 = unsafeWrite v l e | otherwise = let pi = l + shiftR (k-1) 2 in unsafeRead v pi >>= \p -> case cmp p e of LT -> unsafeWrite v (l + k) p >> sift pi _ -> unsafeWrite v (l + k) e {-# INLINE heapInsert #-} -- Rebuilds a heap with a hole in it from start downwards. Afterward, -- the heap property should apply for [start + off, len + off). val -- is the new value to be put in the hole. siftByOffset :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> Int -> m () siftByOffset cmp a val off start len = sift val start len where sift val root len | child < len = do (child', ac) <- maximumChild cmp a off child len case cmp val ac of LT -> unsafeWrite a (root + off) ac >> sift val child' len _ -> unsafeWrite a (root + off) val | otherwise = unsafeWrite a (root + off) val where child = root `shiftL` 2 + 1 {-# INLINE siftByOffset #-} -- Finds the maximum child of a heap node, given the indx of the first child. maximumChild :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m (Int, e) maximumChild cmp a off child1 len | child4 < len = do ac1 <- unsafeRead a (child1 + off) ac2 <- unsafeRead a (child2 + off) ac3 <- unsafeRead a (child3 + off) ac4 <- unsafeRead a (child4 + off) return $ case cmp ac1 ac2 of LT -> case cmp ac2 ac3 of LT -> case cmp ac3 ac4 of LT -> (child4, ac4) _ -> (child3, ac3) _ -> case cmp ac2 ac4 of LT -> (child4, ac4) _ -> (child2, ac2) _ -> case cmp ac1 ac3 of LT -> case cmp ac3 ac4 of LT -> (child4, ac4) _ -> (child3, ac3) _ -> case cmp ac1 ac4 of LT -> (child4, ac4) _ -> (child1, ac1) | child3 < len = do ac1 <- unsafeRead a (child1 + off) ac2 <- unsafeRead a (child2 + off) ac3 <- unsafeRead a (child3 + off) return $ case cmp ac1 ac2 of LT -> case cmp ac2 ac3 of LT -> (child3, ac3) _ -> (child2, ac2) _ -> case cmp ac1 ac3 of LT -> (child3, ac3) _ -> (child1, ac1) | child2 < len = do ac1 <- unsafeRead a (child1 + off) ac2 <- unsafeRead a (child2 + off) return $ case cmp ac1 ac2 of LT -> (child2, ac2) _ -> (child1, ac1) | otherwise = do ac1 <- unsafeRead a (child1 + off) ; return (child1, ac1) where child2 = child1 + 1 child3 = child1 + 2 child4 = child1 + 3 {-# INLINE maximumChild #-} vector-algorithms-0.8.0.1/tests/0000755000000000000000000000000013357002426014661 5ustar0000000000000000vector-algorithms-0.8.0.1/tests/properties/0000755000000000000000000000000013357002426017055 5ustar0000000000000000vector-algorithms-0.8.0.1/tests/properties/Optimal.hs0000644000000000000000000000345413357002426021024 0ustar0000000000000000{-# LANGUAGE TypeOperators, FlexibleContexts #-} -- Exhaustive test sets for proper sorting and stability of -- optimal sorts module Optimal where import Control.Arrow import Control.Monad import Data.List import Data.Function import Data.Vector.Generic hiding (map, zip, concatMap, (++), replicate, foldM) interleavings :: [a] -> [a] -> [[a]] interleavings [ ] ys = [ys] interleavings xs [ ] = [xs] interleavings xs@(x:xt) ys@(y:yt) = map (x:) (interleavings xt ys) ++ map (y:) (interleavings xs yt) monotones :: Int -> Int -> [[Int]] monotones k = atLeastOne 0 where atLeastOne i 0 = [[]] atLeastOne i n = map (i:) $ picks i (n-1) picks _ 0 = [[]] picks i n | i >= k = [replicate n k] | otherwise = map (i:) (picks i (n-1)) ++ atLeastOne (i+1) n stability :: (Vector v (Int,Int)) => Int -> [v (Int, Int)] stability n = concatMap ( map fromList . foldM interleavings [] . groupBy ((==) `on` fst) . flip zip [0..]) $ monotones (n-2) n sort2 :: (Vector v Int) => [v Int] sort2 = map fromList $ permutations [0,1] stability2 :: (Vector v (Int,Int)) => [v (Int, Int)] stability2 = [fromList [(0, 0), (0, 1)]] sort3 :: (Vector v Int) => [v Int] sort3 = map fromList $ permutations [0..2] {- stability3 :: [UArr (Int :*: Int)] stability3 = map toU [ [0:*:0, 0:*:1, 0:*:2] , [0:*:0, 0:*:1, 1:*:2] , [0:*:0, 1:*:2, 0:*:1] , [1:*:2, 0:*:0, 0:*:1] , [0:*:0, 1:*:1, 1:*:2] , [1:*:1, 0:*:0, 1:*:2] , [1:*:1, 1:*:2, 0:*:0] ] -} sort4 :: (Vector v Int) => [v Int] sort4 = map fromList $ permutations [0..3] vector-algorithms-0.8.0.1/tests/properties/Util.hs0000644000000000000000000000122313357002426020324 0ustar0000000000000000{-# LANGUAGE TypeOperators #-} module Util where import Control.Monad import Control.Monad.ST import Data.Word import Data.Int import qualified Data.ByteString as B import qualified Data.Vector as V import Data.Vector.Mutable hiding (length) import Test.QuickCheck mfromList :: [e] -> ST s (MVector s e) mfromList l = do v <- new (length l) fill l 0 v where fill [] _ v = return v fill (x:xs) i v = do write v i x fill xs (i+1) v instance (Arbitrary e) => Arbitrary (V.Vector e) where arbitrary = fmap V.fromList arbitrary instance Arbitrary B.ByteString where arbitrary = B.pack `fmap` arbitrary vector-algorithms-0.8.0.1/tests/properties/Properties.hs0000644000000000000000000001456413357002426021557 0ustar0000000000000000{-# LANGUAGE RankNTypes, FlexibleContexts #-} module Properties where import Prelude import Optimal import Control.Monad import Control.Monad.ST import Data.List import Data.Ord import Data.Vector (Vector) import qualified Data.Vector as V import Data.Vector.Mutable (MVector) import qualified Data.Vector.Mutable as MV import Data.Vector.Generic (modify) import qualified Data.Vector.Generic.Mutable as G import Data.Vector.Algorithms.Optimal (Comparison) import Data.Vector.Algorithms.Radix (radix, passes, size) import qualified Data.Map as M import Test.QuickCheck hiding (Sorted) import Util prop_sorted :: (Ord e) => Vector e -> Property prop_sorted arr | V.length arr < 2 = property True | otherwise = check (V.head arr) (V.tail arr) where check e arr | V.null arr = property True | otherwise = e <= V.head arr .&. check (V.head arr) (V.tail arr) prop_empty :: (Ord e) => (forall s. MV.MVector s e -> ST s ()) -> Property prop_empty algo = prop_sorted (modify algo $ V.fromList []) prop_fullsort :: (Ord e) => (forall s mv. G.MVector mv e => mv s e -> ST s ()) -> Vector e -> Property prop_fullsort algo arr = prop_sorted $ modify algo arr {- prop_schwartzian :: (UA e, UA k, Ord k) => (e -> k) -> (forall e s. (UA e) => (e -> e -> Ordering) -> MUArr e s -> ST s ()) -> UArr e -> Property prop_schwartzian f algo arr | lengthU arr < 2 = property True | otherwise = let srt = modify (algo `usingKeys` f) arr in check (headU srt) (tailU srt) where check e arr | nullU arr = property True | otherwise = f e <= f (headU arr) .&. check (headU arr) (tailU arr) -} longGen :: (Arbitrary e) => Int -> Gen (Vector e) longGen k = liftM2 (\l r -> V.fromList (l ++ r)) (vectorOf k arbitrary) arbitrary sanity :: Int sanity = 100 prop_partialsort :: (Ord e, Arbitrary e, Show e) => (forall s mv. G.MVector mv e => mv s e -> Int -> ST s ()) -> Positive Int -> Property prop_partialsort = prop_sized $ \algo k -> prop_sorted . V.take k . modify algo prop_sized_empty :: (Ord e) => (forall s. MV.MVector s e -> Int -> ST s ()) -> Property prop_sized_empty algo = prop_empty (flip algo 0) .&&. prop_empty (flip algo 10) prop_select :: (Ord e, Arbitrary e, Show e) => (forall s mv. G.MVector mv e => mv s e -> Int -> ST s ()) -> Positive Int -> Property prop_select = prop_sized $ \algo k arr -> let vec' = modify algo arr l = V.slice 0 k vec' r = V.slice k (V.length vec' - k) vec' in V.all (\e -> V.all (e <=) r) l prop_sized :: (Arbitrary e, Show e, Testable prop) => ((forall s mv. G.MVector mv e => mv s e -> ST s ()) -> Int -> Vector e -> prop) -> (forall s mv. G.MVector mv e => mv s e -> Int -> ST s ()) -> Positive Int -> Property prop_sized prop algo (Positive k) = let k' = k `mod` sanity in forAll (longGen k') $ prop (\marr -> algo marr k') k' prop_stable :: (forall e s mv. G.MVector mv e => Comparison e -> mv s e -> ST s ()) -> Vector Int -> Property -- prop_stable algo arr = property $ modify algo arr == arr prop_stable algo arr = stable $ modify (algo (comparing fst)) $ V.zip arr ix where ix = V.fromList [1 .. V.length arr] stable arr | V.null arr = property True | otherwise = let (e, i) = V.head arr in V.all (\(e', i') -> e < e' || i < i') (V.tail arr) .&. stable (V.tail arr) prop_stable_radix :: (forall e s mv. G.MVector mv e => Int -> Int -> (Int -> e -> Int) -> mv s e -> ST s ()) -> Vector Int -> Property prop_stable_radix algo arr = stable . modify (algo (passes e) (size e) (\k (e, _) -> radix k e)) $ V.zip arr ix where ix = V.fromList [1 .. V.length arr] e = V.head arr prop_optimal :: Int -> (forall e s mv. G.MVector mv e => Comparison e -> mv s e -> Int -> ST s ()) -> Property prop_optimal n algo = label "sorting" sortn .&. label "stability" stabn where arrn = V.fromList [0..n-1] sortn = all ( (== arrn) . modify (\a -> algo compare a 0) . V.fromList) $ permutations [0..n-1] stabn = all ( (== arrn) . snd . V.unzip . modify (\a -> algo (comparing fst) a 0)) $ stability n type Bag e = M.Map e Int toBag :: (Ord e) => Vector e -> Bag e toBag = M.fromListWith (+) . flip zip (repeat 1) . V.toList prop_permutation :: (Ord e) => (forall s mv. G.MVector mv e => mv s e -> ST s ()) -> Vector e -> Property prop_permutation algo arr = property $ toBag arr == toBag (modify algo arr) newtype SortedVec e = Sorted (Vector e) instance (Show e) => Show (SortedVec e) where show (Sorted a) = show a instance (Arbitrary e, Ord e) => Arbitrary (SortedVec e) where arbitrary = fmap (Sorted . V.fromList . sort) $ liftM2 (++) (vectorOf 20 arbitrary) arbitrary ixRanges :: Vector e -> Gen (Int, Int) ixRanges vec = do i <- fmap (`mod` len) arbitrary j <- fmap (`mod` len) arbitrary return $ if i < j then (i, j) else (j, i) where len = V.length vec prop_search_inrange :: (Ord e) => (forall s. MVector s e -> e -> Int -> Int -> ST s Int) -> SortedVec e -> e -> Property prop_search_inrange algo (Sorted arr) e = forAll (ixRanges arr) $ \(i, j) -> let k = runST (mfromList (V.toList arr) >>= \marr -> algo marr e i j) in property $ i <= k && k <= j where len = V.length arr prop_search_insert :: (e -> e -> Bool) -> (e -> e -> Bool) -> (forall s. MVector s e -> e -> ST s Int) -> SortedVec e -> e -> Property prop_search_insert lo hi algo (Sorted arr) e = property $ (k == 0 || (arr V.! (k-1)) `lo` e) && (k == len || (arr V.! k) `hi` e) where len = V.length arr k = runST (mfromList (V.toList arr) >>= \marr -> algo marr e) prop_search_lowbound :: (Ord e) => (forall s. MVector s e -> e -> ST s Int) -> SortedVec e -> e -> Property prop_search_lowbound = prop_search_insert (<) (>=) prop_search_upbound :: (Ord e) => (forall s. MVector s e -> e -> ST s Int) -> SortedVec e -> e -> Property prop_search_upbound = prop_search_insert (<=) (>) vector-algorithms-0.8.0.1/tests/properties/Tests.hs0000644000000000000000000002272413357002426020522 0ustar0000000000000000{-# LANGUAGE RankNTypes, TypeOperators, FlexibleContexts #-} module Main (main) where import Properties import Util import Test.QuickCheck import Control.Monad import Control.Monad.ST import Data.Int import Data.Word import qualified Data.ByteString as B import Data.Vector (Vector) import qualified Data.Vector as V import Data.Vector.Generic.Mutable (MVector) import qualified Data.Vector.Generic.Mutable as MV import qualified Data.Vector.Algorithms.Insertion as INS import qualified Data.Vector.Algorithms.Intro as INT import qualified Data.Vector.Algorithms.Merge as M import qualified Data.Vector.Algorithms.Radix as R import qualified Data.Vector.Algorithms.Heap as H import qualified Data.Vector.Algorithms.Optimal as O import qualified Data.Vector.Algorithms.AmericanFlag as AF import qualified Data.Vector.Algorithms.Tim as T import qualified Data.Vector.Algorithms.Search as SR type Algo e r = forall s mv. MVector mv e => mv s e -> ST s r type SizeAlgo e r = forall s mv. MVector mv e => mv s e -> Int -> ST s r type BoundAlgo e r = forall s mv. MVector mv e => mv s e -> Int -> Int -> ST s r newtype WrappedAlgo e r = WrapAlgo { unWrapAlgo :: Algo e r } newtype WrappedSizeAlgo e r = WrapSizeAlgo { unWrapSizeAlgo :: SizeAlgo e r } newtype WrappedBoundAlgo e r = WrapBoundAlgo { unWrapBoundAlgo :: BoundAlgo e r } args = stdArgs { maxSuccess = 1000 , maxDiscardRatio = 2 } check_Int_sort = forM_ algos $ \(name,algo) -> quickCheckWith args (label name . prop_fullsort (unWrapAlgo algo)) where algos :: [(String, WrappedAlgo Int ())] algos = [ ("introsort", WrapAlgo INT.sort) , ("insertion sort", WrapAlgo INS.sort) , ("merge sort", WrapAlgo M.sort) , ("heapsort", WrapAlgo H.sort) , ("timsort", WrapAlgo T.sort) ] check_Int_partialsort = forM_ algos $ \(name,algo) -> quickCheckWith args (label name . prop_partialsort (unWrapSizeAlgo algo)) where algos :: [(String, WrappedSizeAlgo Int ())] algos = [ ("intro-partialsort", WrapSizeAlgo INT.partialSort) , ("heap partialsort", WrapSizeAlgo H.partialSort) ] check_Int_select = forM_ algos $ \(name,algo) -> quickCheckWith args (label name . prop_select (unWrapSizeAlgo algo)) where algos :: [(String, WrappedSizeAlgo Int ())] algos = [ ("intro-select", WrapSizeAlgo INT.select) , ("heap select", WrapSizeAlgo H.select) ] check_radix_sorts = do qc (label "radix Word8" . prop_fullsort (R.sort :: Algo Word8 ())) qc (label "radix Word16" . prop_fullsort (R.sort :: Algo Word16 ())) qc (label "radix Word32" . prop_fullsort (R.sort :: Algo Word32 ())) qc (label "radix Word64" . prop_fullsort (R.sort :: Algo Word64 ())) qc (label "radix Word" . prop_fullsort (R.sort :: Algo Word ())) qc (label "radix Int8" . prop_fullsort (R.sort :: Algo Int8 ())) qc (label "radix Int16" . prop_fullsort (R.sort :: Algo Int16 ())) qc (label "radix Int32" . prop_fullsort (R.sort :: Algo Int32 ())) qc (label "radix Int64" . prop_fullsort (R.sort :: Algo Int64 ())) qc (label "radix Int" . prop_fullsort (R.sort :: Algo Int ())) qc (label "radix (Int, Int)" . prop_fullsort (R.sort :: Algo (Int, Int) ())) qc (label "flag Word8" . prop_fullsort (AF.sort :: Algo Word8 ())) qc (label "flag Word16" . prop_fullsort (AF.sort :: Algo Word16 ())) qc (label "flag Word32" . prop_fullsort (AF.sort :: Algo Word32 ())) qc (label "flag Word64" . prop_fullsort (AF.sort :: Algo Word64 ())) qc (label "flag Word" . prop_fullsort (AF.sort :: Algo Word ())) qc (label "flag Int8" . prop_fullsort (AF.sort :: Algo Int8 ())) qc (label "flag Int16" . prop_fullsort (AF.sort :: Algo Int16 ())) qc (label "flag Int32" . prop_fullsort (AF.sort :: Algo Int32 ())) qc (label "flag Int64" . prop_fullsort (AF.sort :: Algo Int64 ())) qc (label "flag Int" . prop_fullsort (AF.sort :: Algo Int ())) qc (label "flag ByteString" . prop_fullsort (AF.sort :: Algo B.ByteString ())) where qc algo = quickCheckWith args algo {- check_schwartzian = do quickCheckWith args (prop_schwartzian i2w INS.sortBy) where i2w :: Int -> Word i2w = fromIntegral -} check_stable = do quickCheckWith args (label "merge sort" . prop_stable M.sortBy) quickCheckWith args (label "radix sort" . prop_stable_radix R.sortBy) quickCheckWith args (label "tim sort" . prop_stable T.sortBy) check_optimal = do qc . label "size 2" $ prop_optimal 2 O.sort2ByOffset qc . label "size 3" $ prop_optimal 3 O.sort3ByOffset qc . label "size 4" $ prop_optimal 4 O.sort4ByOffset where qc = quickCheck check_permutation = do qc $ label "introsort" . prop_permutation (INT.sort :: Algo Int ()) qc $ label "heapsort" . prop_permutation (H.sort :: Algo Int ()) qc $ label "mergesort" . prop_permutation (M.sort :: Algo Int ()) qc $ label "timsort" . prop_permutation (T.sort :: Algo Int ()) qc $ label "radix I8" . prop_permutation (R.sort :: Algo Int8 ()) qc $ label "radix I16" . prop_permutation (R.sort :: Algo Int16 ()) qc $ label "radix I32" . prop_permutation (R.sort :: Algo Int32 ()) qc $ label "radix I64" . prop_permutation (R.sort :: Algo Int64 ()) qc $ label "radix Int" . prop_permutation (R.sort :: Algo Int ()) qc $ label "radix W8" . prop_permutation (R.sort :: Algo Word8 ()) qc $ label "radix W16" . prop_permutation (R.sort :: Algo Word16 ()) qc $ label "radix W32" . prop_permutation (R.sort :: Algo Word32 ()) qc $ label "radix W64" . prop_permutation (R.sort :: Algo Word64 ()) qc $ label "radix Word" . prop_permutation (R.sort :: Algo Word ()) qc $ label "flag I8" . prop_permutation (AF.sort :: Algo Int8 ()) qc $ label "flag I16" . prop_permutation (AF.sort :: Algo Int16 ()) qc $ label "flag I32" . prop_permutation (AF.sort :: Algo Int32 ()) qc $ label "flag I64" . prop_permutation (AF.sort :: Algo Int64 ()) qc $ label "flag Int" . prop_permutation (AF.sort :: Algo Int ()) qc $ label "flag W8" . prop_permutation (AF.sort :: Algo Word8 ()) qc $ label "flag W16" . prop_permutation (AF.sort :: Algo Word16 ()) qc $ label "flag W32" . prop_permutation (AF.sort :: Algo Word32 ()) qc $ label "flag W64" . prop_permutation (AF.sort :: Algo Word64 ()) qc $ label "flag Word" . prop_permutation (AF.sort :: Algo Word ()) qc $ label "flag ByteString" . prop_permutation (AF.sort :: Algo B.ByteString ()) qc $ label "intropartial" . prop_sized (\x -> const (prop_permutation x)) (INT.partialSort :: SizeAlgo Int ()) qc $ label "introselect" . prop_sized (\x -> const (prop_permutation x)) (INT.select :: SizeAlgo Int ()) qc $ label "heappartial" . prop_sized (\x -> const (prop_permutation x)) (H.partialSort :: SizeAlgo Int ()) qc $ label "heapselect" . prop_sized (\x -> const (prop_permutation x)) (H.select :: SizeAlgo Int ()) where qc prop = quickCheckWith args prop check_corners = do qc "introsort empty" $ prop_empty (INT.sort :: Algo Int ()) qc "intropartial empty" $ prop_sized_empty (INT.partialSort :: SizeAlgo Int ()) qc "introselect empty" $ prop_sized_empty (INT.select :: SizeAlgo Int ()) qc "heapsort empty" $ prop_empty (H.sort :: Algo Int ()) qc "heappartial empty" $ prop_sized_empty (H.partialSort :: SizeAlgo Int ()) qc "heapselect empty" $ prop_sized_empty (H.select :: SizeAlgo Int ()) qc "mergesort empty" $ prop_empty (M.sort :: Algo Int ()) qc "timsort empty" $ prop_empty (T.sort :: Algo Int ()) qc "radixsort empty" $ prop_empty (R.sort :: Algo Int ()) qc "flagsort empty" $ prop_empty (AF.sort :: Algo Int ()) where qc s prop = quickCheckWith (stdArgs { maxSuccess = 2 }) (label s prop) type SAlgo e r = forall s mv. MVector mv e => mv s e -> e -> ST s r type BoundSAlgo e r = forall s mv. MVector mv e => mv s e -> e -> Int -> Int -> ST s r check_search_range = do qc $ (label "binarySearchL" .) . prop_search_inrange (SR.binarySearchLByBounds compare :: BoundSAlgo Int Int) qc $ (label "binarySearchL lo-bound" .) . prop_search_lowbound (SR.binarySearchL :: SAlgo Int Int) qc $ (label "binarySearch" .) . prop_search_inrange (SR.binarySearchByBounds compare :: BoundSAlgo Int Int) qc $ (label "binarySearchR" .) . prop_search_inrange (SR.binarySearchRByBounds compare :: BoundSAlgo Int Int) qc $ (label "binarySearchR hi-bound" .) . prop_search_upbound (SR.binarySearchR :: SAlgo Int Int) where qc prop = quickCheckWith args prop main = do putStrLn "Int tests:" check_Int_sort check_Int_partialsort check_Int_select putStrLn "Radix sort tests:" check_radix_sorts -- putStrLn "Schwartzian transform (Int -> Word):" -- check_schwartzian putStrLn "Stability:" check_stable putStrLn "Optimals:" check_optimal putStrLn "Permutation:" check_permutation putStrLn "Search in range:" check_search_range putStrLn "Corner cases:" check_corners vector-algorithms-0.8.0.1/bench/0000755000000000000000000000000013357002426014576 5ustar0000000000000000vector-algorithms-0.8.0.1/bench/simple/0000755000000000000000000000000013357002426016067 5ustar0000000000000000vector-algorithms-0.8.0.1/bench/simple/Main.hs0000644000000000000000000001647213357002426017321 0ustar0000000000000000{-# LANGUAGE Rank2Types #-} module Main (main) where import Prelude hiding (read, length) import qualified Prelude as P import Control.Monad import Control.Monad.ST import Data.Char import Data.Ord (comparing) import Data.List (maximumBy) import Data.Vector.Unboxed.Mutable import qualified Data.Vector.Algorithms.Insertion as INS import qualified Data.Vector.Algorithms.Intro as INT import qualified Data.Vector.Algorithms.Heap as H import qualified Data.Vector.Algorithms.Merge as M import qualified Data.Vector.Algorithms.Radix as R import qualified Data.Vector.Algorithms.AmericanFlag as AF import qualified Data.Vector.Algorithms.Tim as T import System.Environment import System.Console.GetOpt import System.Random.MWC import Blocks -- Does nothing. For testing the speed/heap allocation of the building blocks. noalgo :: (Unbox e) => MVector RealWorld e -> IO () noalgo _ = return () -- Allocates a temporary buffer, like mergesort for similar purposes as noalgo. alloc :: (Unbox e) => MVector RealWorld e -> IO () alloc arr | len <= 4 = arr `seq` return () | otherwise = (new (len `div` 2) :: IO (MVector RealWorld Int)) >> return () where len = length arr displayTime :: String -> Integer -> IO () displayTime s elapsed = putStrLn $ s ++ " : " ++ show (fromIntegral elapsed / (1e12 :: Double)) ++ " seconds" run :: String -> IO Integer -> IO () run s t = t >>= displayTime s sortSuite :: String -> GenIO -> Int -> (MVector RealWorld Int -> IO ()) -> IO () sortSuite str g n sort = do arr <- new n putStrLn $ "Testing: " ++ str run "Random " $ speedTest arr n (rand g >=> modulo n) sort run "Sorted " $ speedTest arr n ascend sort run "Reverse-sorted " $ speedTest arr n (descend n) sort run "Random duplicates " $ speedTest arr n (rand g >=> modulo 1000) sort let m = 4 * (n `div` 4) run "Median killer " $ speedTest arr m (medianKiller m) sort partialSortSuite :: String -> GenIO -> Int -> Int -> (MVector RealWorld Int -> Int -> IO ()) -> IO () partialSortSuite str g n k sort = sortSuite str g n (\a -> sort a k) -- ----------------- -- Argument handling -- ----------------- data Algorithm = DoNothing | Allocate | InsertionSort | IntroSort | IntroPartialSort | IntroSelect | HeapSort | HeapPartialSort | HeapSelect | MergeSort | RadixSort | AmericanFlagSort | TimSort deriving (Show, Read, Enum, Bounded) data Options = O { algos :: [Algorithm], elems :: Int, portion :: Int, usage :: Bool } deriving (Show) defaultOptions :: Options defaultOptions = O [] 10000 1000 False type OptionsT = Options -> Either String Options options :: [OptDescr OptionsT] options = [ Option ['A'] ["algorithm"] (ReqArg parseAlgo "ALGO") ("Specify an algorithm to be run. Options:\n" ++ algoOpts) , Option ['n'] ["num-elems"] (ReqArg parseN "INT") "Specify the size of arrays in algorithms." , Option ['k'] ["portion"] (ReqArg parseK "INT") "Specify the number of elements to partial sort/select in\nrelevant algorithms." , Option ['?','v'] ["help"] (NoArg $ \o -> Right $ o { usage = True }) "Show options." ] where allAlgos :: [Algorithm] allAlgos = [minBound .. maxBound] algoOpts = fmt allAlgos fmt (x:y:zs) = '\t' : pad (show x) ++ show y ++ "\n" ++ fmt zs fmt [x] = '\t' : show x ++ "\n" fmt [] = "" size = (" " ++) . maximumBy (comparing P.length) . map show $ allAlgos pad str = zipWith const (str ++ repeat ' ') size parseAlgo :: String -> Options -> Either String Options parseAlgo "None" o = Right $ o { algos = [] } parseAlgo "All" o = Right $ o { algos = [DoNothing .. AmericanFlagSort] } parseAlgo s o = leftMap (\e -> "Unrecognized algorithm `" ++ e ++ "'") . fmap (\v -> o { algos = v : algos o }) $ readEither s leftMap :: (a -> b) -> Either a c -> Either b c leftMap f (Left a) = Left (f a) leftMap _ (Right c) = Right c parseNum :: (Int -> Options) -> String -> Either String Options parseNum f = leftMap (\e -> "Invalid numeric argument `" ++ e ++ "'") . fmap f . readEither parseN, parseK :: String -> Options -> Either String Options parseN s o = parseNum (\n -> o { elems = n }) s parseK s o = parseNum (\k -> o { portion = k }) s readEither :: Read a => String -> Either String a readEither s = case reads s of [(x,t)] | all isSpace t -> Right x _ -> Left s runTest :: GenIO -> Int -> Int -> Algorithm -> IO () runTest g n k alg = case alg of DoNothing -> sortSuite "no algorithm" g n noalgo Allocate -> sortSuite "allocate" g n alloc InsertionSort -> sortSuite "insertion sort" g n insertionSort IntroSort -> sortSuite "introsort" g n introSort IntroPartialSort -> partialSortSuite "partial introsort" g n k introPSort IntroSelect -> partialSortSuite "introselect" g n k introSelect HeapSort -> sortSuite "heap sort" g n heapSort HeapPartialSort -> partialSortSuite "partial heap sort" g n k heapPSort HeapSelect -> partialSortSuite "heap select" g n k heapSelect MergeSort -> sortSuite "merge sort" g n mergeSort RadixSort -> sortSuite "radix sort" g n radixSort AmericanFlagSort -> sortSuite "flag sort" g n flagSort TimSort -> sortSuite "tim sort" g n timSort mergeSort :: MVector RealWorld Int -> IO () mergeSort v = M.sort v {-# NOINLINE mergeSort #-} introSort :: MVector RealWorld Int -> IO () introSort v = INT.sort v {-# NOINLINE introSort #-} introPSort :: MVector RealWorld Int -> Int -> IO () introPSort v k = INT.partialSort v k {-# NOINLINE introPSort #-} introSelect :: MVector RealWorld Int -> Int -> IO () introSelect v k = INT.select v k {-# NOINLINE introSelect #-} heapSort :: MVector RealWorld Int -> IO () heapSort v = H.sort v {-# NOINLINE heapSort #-} heapPSort :: MVector RealWorld Int -> Int -> IO () heapPSort v k = H.partialSort v k {-# NOINLINE heapPSort #-} heapSelect :: MVector RealWorld Int -> Int -> IO () heapSelect v k = H.select v k {-# NOINLINE heapSelect #-} insertionSort :: MVector RealWorld Int -> IO () insertionSort v = INS.sort v {-# NOINLINE insertionSort #-} radixSort :: MVector RealWorld Int -> IO () radixSort v = R.sort v {-# NOINLINE radixSort #-} flagSort :: MVector RealWorld Int -> IO () flagSort v = AF.sort v {-# NOINLINE flagSort #-} timSort :: MVector RealWorld Int -> IO () timSort v = T.sort v {-# NOINLINE timSort #-} main :: IO () main = getArgs >>= \args -> withSystemRandom $ \gen -> case getOpt Permute options args of (fs, _, []) -> case foldl (>>=) (Right defaultOptions) fs of Left err -> putStrLn $ usageInfo err options Right opts | not (usage opts) -> mapM_ (runTest gen (elems opts) (portion opts)) (algos opts) | otherwise -> putStrLn $ usageInfo "vector-algorithms-bench" options (_, _, errs) -> putStrLn $ usageInfo (concat errs) options vector-algorithms-0.8.0.1/bench/simple/Blocks.hs0000644000000000000000000000316513357002426017645 0ustar0000000000000000{-# LANGUAGE Rank2Types #-} module Blocks where import Control.Monad import Control.Monad.ST import Data.Vector.Unboxed.Mutable import System.CPUTime import System.Random.MWC (GenIO, Variate(..)) -- Some conveniences for doing evil stuff in the ST monad. -- All the tests get run in IO, but uvector stuff happens -- in ST, so we temporarily coerce. clock :: IO Integer clock = getCPUTime -- Strategies for filling the initial arrays rand :: Variate e => GenIO -> Int -> IO e rand g _ = uniform g ascend :: Num e => Int -> IO e ascend = return . fromIntegral descend :: Num e => e -> Int -> IO e descend m n = return $ m - fromIntegral n modulo :: Integral e => e -> Int -> IO e modulo m n = return $ fromIntegral n `mod` m -- This is the worst case for the median-of-three quicksort -- used in the introsort implementation. medianKiller :: Integral e => e -> Int -> IO e medianKiller m n' | n < k = return $ if even n then n + 1 else n + k | otherwise = return $ (n - k + 1) * 2 where n = fromIntegral n' k = m `div` 2 {-# INLINE medianKiller #-} initialize :: (Unbox e) => MVector RealWorld e -> Int -> (Int -> IO e) -> IO () initialize arr len fill = initial $ len - 1 where initial n = fill n >>= unsafeWrite arr n >> when (n > 0) (initial $ n - 1) {-# INLINE initialize #-} speedTest :: (Unbox e) => MVector RealWorld e -> Int -> (Int -> IO e) -> (MVector RealWorld e -> IO ()) -> IO Integer speedTest arr n fill algo = do initialize arr n fill t0 <- clock algo arr t1 <- clock return $ t1 - t0 {-# INLINE speedTest #-}