repa-3.4.1.4/Data/0000755000000000000000000000000012556111213011644 5ustar0000000000000000repa-3.4.1.4/Data/Array/0000755000000000000000000000000012556111213012722 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/0000755000000000000000000000000013354717731013627 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/Eval/0000755000000000000000000000000013053313255014503 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/Operators/0000755000000000000000000000000013146443554015603 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/Repr/0000755000000000000000000000000013053313255014524 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/Specialised/0000755000000000000000000000000013252414734016046 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/Stencil/0000755000000000000000000000000013053313255015215 5ustar0000000000000000repa-3.4.1.4/Data/Array/Repa/Eval/Gang.hs0000644000000000000000000001622112556111213015712 0ustar0000000000000000{-# LANGUAGE CPP #-} -- | Gang Primitives. module Data.Array.Repa.Eval.Gang ( theGang , Gang, forkGang, gangSize, gangIO, gangST) where import GHC.IO import GHC.ST import GHC.Conc (forkOn) import Control.Concurrent.MVar import Control.Exception (assert) import Control.Monad import GHC.Conc (numCapabilities) import System.IO -- TheGang -------------------------------------------------------------------- -- | This globally shared gang is auto-initialised at startup and shared by all -- Repa computations. -- -- In a data parallel setting, it does not help to have multiple gangs -- running at the same time. This is because a single data parallel -- computation should already be able to keep all threads busy. If we had -- multiple gangs running at the same time, then the system as a whole would -- run slower as the gangs would contend for cache and thrash the scheduler. -- -- If, due to laziness or otherwise, you try to start multiple parallel -- Repa computations at the same time, then you will get the following -- warning on stderr at runtime: -- -- @Data.Array.Repa: Performing nested parallel computation sequentially. -- You've probably called the 'compute' or 'copy' function while another -- instance was already running. This can happen if the second version -- was suspended due to lazy evaluation. Use 'deepSeqArray' to ensure that -- each array is fully evaluated before you 'compute' the next one. -- @ -- theGang :: Gang {-# NOINLINE theGang #-} theGang = unsafePerformIO $ do let caps = numCapabilities forkGang caps -- Requests ------------------------------------------------------------------- -- | The 'Req' type encapsulates work requests for individual members of a gang. data Req -- | Instruct the worker to run the given action. = ReqDo (Int -> IO ()) -- | Tell the worker that we're shutting the gang down. -- The worker should signal that it's receieved the request by -- writing to its result var before returning to the caller (forkGang). | ReqShutdown -- Gang ----------------------------------------------------------------------- -- | A 'Gang' is a group of threads that execute arbitrary work requests. data Gang = Gang { -- | Number of threads in the gang. _gangThreads :: !Int -- | Workers listen for requests on these vars. , _gangRequestVars :: [MVar Req] -- | Workers put their results in these vars. , _gangResultVars :: [MVar ()] -- | Indicates that the gang is busy. , _gangBusy :: MVar Bool } instance Show Gang where showsPrec p (Gang n _ _ _) = showString "<<" . showsPrec p n . showString " threads>>" -- | O(1). Yield the number of threads in the 'Gang'. gangSize :: Gang -> Int gangSize (Gang n _ _ _) = n -- | Fork a 'Gang' with the given number of threads (at least 1). forkGang :: Int -> IO Gang forkGang n = assert (n > 0) $ do -- Create the vars we'll use to issue work requests. mvsRequest <- sequence $ replicate n $ newEmptyMVar -- Create the vars we'll use to signal that threads are done. mvsDone <- sequence $ replicate n $ newEmptyMVar -- Add finalisers so we can shut the workers down cleanly if they -- become unreachable. zipWithM_ (\varReq varDone -> mkWeakMVar varReq (finaliseWorker varReq varDone)) mvsRequest mvsDone -- Create all the worker threads zipWithM_ forkOn [0..] $ zipWith3 gangWorker [0 .. n-1] mvsRequest mvsDone -- The gang is currently idle. busy <- newMVar False return $ Gang n mvsRequest mvsDone busy -- | The worker thread of a 'Gang'. -- The threads blocks on the MVar waiting for a work request. gangWorker :: Int -> MVar Req -> MVar () -> IO () gangWorker threadId varRequest varDone = do -- Wait for a request req <- takeMVar varRequest case req of ReqDo action -> do -- Run the action we were given. action threadId -- Signal that the action is complete. putMVar varDone () -- Wait for more requests. gangWorker threadId varRequest varDone ReqShutdown -> putMVar varDone () -- | Finaliser for worker threads. -- We want to shutdown the corresponding thread when it's MVar becomes -- unreachable. -- Without this Repa programs can complain about "Blocked indefinitely -- on an MVar" because worker threads are still blocked on the request -- MVars when the program ends. Whether the finalizer is called or not -- is very racey. It happens about 1 in 10 runs when for the -- repa-edgedetect benchmark, and less often with the others. -- -- We're relying on the comment in System.Mem.Weak that says -- "If there are no other threads to run, the runtime system will -- check for runnablefinalizers before declaring the system to be -- deadlocked." -- -- If we were creating and destroying the gang cleanly we wouldn't need -- this, but theGang is created with a top-level unsafePerformIO. -- Hacks beget hacks beget hacks... -- finaliseWorker :: MVar Req -> MVar () -> IO () finaliseWorker varReq varDone = do putMVar varReq ReqShutdown takeMVar varDone return () -- | Issue work requests for the 'Gang' and wait until they complete. -- -- If the gang is already busy then print a warning to `stderr` and just -- run the actions sequentially in the requesting thread. gangIO :: Gang -> (Int -> IO ()) -> IO () {-# NOINLINE gangIO #-} gangIO gang@(Gang _ _ _ busy) action = do b <- swapMVar busy True if b then do seqIO gang action else do parIO gang action _ <- swapMVar busy False return () -- | Run an action on the gang sequentially. seqIO :: Gang -> (Int -> IO ()) -> IO () seqIO (Gang n _ _ _) action = do hPutStr stderr $ unlines [ "Data.Array.Repa: Performing nested parallel computation sequentially." , " You've probably called the 'compute' or 'copy' function while another" , " instance was already running. This can happen if the second version" , " was suspended due to lazy evaluation. Use 'deepSeqArray' to ensure" , " that each array is fully evaluated before you 'compute' the next one." , "" ] mapM_ action [0 .. n-1] -- | Run an action on the gang in parallel. parIO :: Gang -> (Int -> IO ()) -> IO () parIO (Gang _ mvsRequest mvsResult _) action = do -- Send requests to all the threads. mapM_ (\v -> putMVar v (ReqDo action)) mvsRequest -- Wait for all the requests to complete. mapM_ takeMVar mvsResult -- | Same as 'gangIO' but in the 'ST' monad. gangST :: Gang -> (Int -> ST s ()) -> ST s () gangST g p = unsafeIOToST . gangIO g $ unsafeSTToIO . p repa-3.4.1.4/Data/Array/Repa/Operators/IndexSpace.hs0000644000000000000000000001414713146443554020171 0ustar0000000000000000{-# LANGUAGE TypeOperators, ExplicitForAll, FlexibleContexts #-} module Data.Array.Repa.Operators.IndexSpace ( reshape , append, (++) , transpose , extract , backpermute, unsafeBackpermute , backpermuteDft, unsafeBackpermuteDft , extend, unsafeExtend , slice, unsafeSlice) where import Data.Array.Repa.Index import Data.Array.Repa.Slice import Data.Array.Repa.Base import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Operators.Traversal import Data.Array.Repa.Shape as S import Prelude hiding ((++), traverse) import qualified Prelude as P stage = "Data.Array.Repa.Operators.IndexSpace" -- Index space transformations ------------------------------------------------ -- | Impose a new shape on the elements of an array. -- The new extent must be the same size as the original, else `error`. reshape :: ( Shape sh1, Shape sh2 , Source r1 e) => sh2 -> Array r1 sh1 e -> Array D sh2 e reshape sh2 arr | not $ S.size sh2 == S.size (extent arr) = error $ stage P.++ ".reshape: reshaped array will not match size of the original" reshape sh2 arr = fromFunction sh2 $ unsafeIndex arr . fromIndex (extent arr) . toIndex sh2 {-# INLINE [2] reshape #-} -- | Append two arrays. append, (++) :: ( Shape sh , Source r1 e, Source r2 e) => Array r1 (sh :. Int) e -> Array r2 (sh :. Int) e -> Array D (sh :. Int) e append arr1 arr2 = unsafeTraverse2 arr1 arr2 fnExtent fnElem where (_ :. n) = extent arr1 fnExtent (sh1 :. i) (sh2 :. j) = intersectDim sh1 sh2 :. (i + j) fnElem f1 f2 (sh :. i) | i < n = f1 (sh :. i) | otherwise = f2 (sh :. (i - n)) {-# INLINE [2] append #-} (++) arr1 arr2 = append arr1 arr2 {-# INLINE (++) #-} -- | Transpose the lowest two dimensions of an array. -- Transposing an array twice yields the original. transpose :: (Shape sh, Source r e) => Array r (sh :. Int :. Int) e -> Array D (sh :. Int :. Int) e transpose arr = unsafeTraverse arr (\(sh :. m :. n) -> (sh :. n :.m)) (\f -> \(sh :. i :. j) -> f (sh :. j :. i)) {-# INLINE [2] transpose #-} -- | Extract a sub-range of elements from an array. extract :: (Shape sh, Source r e) => sh -- ^ Starting index. -> sh -- ^ Size of result. -> Array r sh e -> Array D sh e extract start sz arr = fromFunction sz (\ix -> arr `unsafeIndex` (addDim start ix)) {-# INLINE [2] extract #-} -- | Backwards permutation of an array's elements. backpermute, unsafeBackpermute :: forall r sh1 sh2 e . ( Shape sh1 , Source r e) => sh2 -- ^ Extent of result array. -> (sh2 -> sh1) -- ^ Function mapping each index in the result array -- to an index of the source array. -> Array r sh1 e -- ^ Source array. -> Array D sh2 e backpermute newExtent perm arr = traverse arr (const newExtent) (. perm) {-# INLINE [2] backpermute #-} unsafeBackpermute newExtent perm arr = unsafeTraverse arr (const newExtent) (. perm) {-# INLINE [2] unsafeBackpermute #-} -- | Default backwards permutation of an array's elements. -- If the function returns `Nothing` then the value at that index is taken -- from the default array (@arrDft@) backpermuteDft, unsafeBackpermuteDft :: forall r1 r2 sh1 sh2 e . ( Shape sh1, Shape sh2 , Source r1 e, Source r2 e) => Array r2 sh2 e -- ^ Default values (@arrDft@) -> (sh2 -> Maybe sh1) -- ^ Function mapping each index in the result array -- to an index in the source array. -> Array r1 sh1 e -- ^ Source array. -> Array D sh2 e backpermuteDft arrDft fnIndex arrSrc = fromFunction (extent arrDft) fnElem where fnElem ix = case fnIndex ix of Just ix' -> arrSrc `index` ix' Nothing -> arrDft `index` ix {-# INLINE [2] backpermuteDft #-} unsafeBackpermuteDft arrDft fnIndex arrSrc = fromFunction (extent arrDft) fnElem where fnElem ix = case fnIndex ix of Just ix' -> arrSrc `unsafeIndex` ix' Nothing -> arrDft `unsafeIndex` ix {-# INLINE [2] unsafeBackpermuteDft #-} -- | Extend an array, according to a given slice specification. -- -- For example, to replicate the rows of an array use the following: -- -- @extend (Any :. (5::Int) :. All) arr@ -- extend, unsafeExtend :: ( Slice sl , Shape (SliceShape sl) , Source r e) => sl -> Array r (SliceShape sl) e -> Array D (FullShape sl) e extend sl arr = backpermute (fullOfSlice sl (extent arr)) (sliceOfFull sl) arr {-# INLINE [2] extend #-} unsafeExtend sl arr = unsafeBackpermute (fullOfSlice sl (extent arr)) (sliceOfFull sl) arr {-# INLINE [2] unsafeExtend #-} -- | Take a slice from an array, according to a given specification. -- -- For example, to take a row from a matrix use the following: -- -- @slice arr (Any :. (5::Int) :. All)@ -- -- To take a column use: -- -- @slice arr (Any :. (5::Int))@ -- slice, unsafeSlice :: ( Slice sl , Shape (FullShape sl) , Source r e) => Array r (FullShape sl) e -> sl -> Array D (SliceShape sl) e slice arr sl = backpermute (sliceOfFull sl (extent arr)) (fullOfSlice sl) arr {-# INLINE [2] slice #-} unsafeSlice arr sl = unsafeBackpermute (sliceOfFull sl (extent arr)) (fullOfSlice sl) arr {-# INLINE [2] unsafeSlice #-} repa-3.4.1.4/Data/Array/Repa/Operators/Interleave.hs0000644000000000000000000000672312556111213020231 0ustar0000000000000000{-# LANGUAGE TypeOperators, ExplicitForAll, FlexibleContexts #-} module Data.Array.Repa.Operators.Interleave ( interleave2 , interleave3 , interleave4) where import Data.Array.Repa.Shape import Data.Array.Repa.Index import Data.Array.Repa.Base import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Operators.Traversal import Prelude hiding ((++)) -- Interleave ----------------------------------------------------------------- -- | Interleave the elements of two arrays. -- All the input arrays must have the same extent, else `error`. -- The lowest dimension of the result array is twice the size of the inputs. -- -- @ -- interleave2 a1 a2 b1 b2 => a1 b1 a2 b2 -- a3 a4 b3 b4 a3 b3 a4 b4 -- @ -- interleave2 :: ( Shape sh , Source r1 a, Source r2 a) => Array r1 (sh :. Int) a -> Array r2 (sh :. Int) a -> Array D (sh :. Int) a {-# INLINE [2] interleave2 #-} interleave2 arr1 arr2 = unsafeTraverse2 arr1 arr2 shapeFn elemFn where shapeFn dim1 dim2 | dim1 == dim2 , sh :. len <- dim1 = sh :. (len * 2) | otherwise = error "Data.Array.Repa.interleave2: arrays must have same extent" elemFn get1 get2 (sh :. ix) = case ix `mod` 2 of 0 -> get1 (sh :. ix `div` 2) 1 -> get2 (sh :. ix `div` 2) _ -> error "Data.Array.Repa.interleave2: this never happens :-P" -- | Interleave the elements of three arrays. interleave3 :: ( Shape sh , Source r1 a, Source r2 a, Source r3 a) => Array r1 (sh :. Int) a -> Array r2 (sh :. Int) a -> Array r3 (sh :. Int) a -> Array D (sh :. Int) a {-# INLINE [2] interleave3 #-} interleave3 arr1 arr2 arr3 = unsafeTraverse3 arr1 arr2 arr3 shapeFn elemFn where shapeFn dim1 dim2 dim3 | dim1 == dim2 , dim1 == dim3 , sh :. len <- dim1 = sh :. (len * 3) | otherwise = error "Data.Array.Repa.interleave3: arrays must have same extent" elemFn get1 get2 get3 (sh :. ix) = case ix `mod` 3 of 0 -> get1 (sh :. ix `div` 3) 1 -> get2 (sh :. ix `div` 3) 2 -> get3 (sh :. ix `div` 3) _ -> error "Data.Array.Repa.interleave3: this never happens :-P" -- | Interleave the elements of four arrays. interleave4 :: ( Shape sh , Source r1 a, Source r2 a, Source r3 a, Source r4 a) => Array r1 (sh :. Int) a -> Array r2 (sh :. Int) a -> Array r3 (sh :. Int) a -> Array r4 (sh :. Int) a -> Array D (sh :. Int) a {-# INLINE [2] interleave4 #-} interleave4 arr1 arr2 arr3 arr4 = unsafeTraverse4 arr1 arr2 arr3 arr4 shapeFn elemFn where shapeFn dim1 dim2 dim3 dim4 | dim1 == dim2 , dim1 == dim3 , dim1 == dim4 , sh :. len <- dim1 = sh :. (len * 4) | otherwise = error "Data.Array.Repa.interleave4: arrays must have same extent" elemFn get1 get2 get3 get4 (sh :. ix) = case ix `mod` 4 of 0 -> get1 (sh :. ix `div` 4) 1 -> get2 (sh :. ix `div` 4) 2 -> get3 (sh :. ix `div` 4) 3 -> get4 (sh :. ix `div` 4) _ -> error "Data.Array.Repa.interleave4: this never happens :-P" repa-3.4.1.4/Data/Array/Repa/Operators/Mapping.hs0000644000000000000000000001330512556111213017520 0ustar0000000000000000{-# LANGUAGE FunctionalDependencies, UndecidableInstances #-} module Data.Array.Repa.Operators.Mapping ( -- * Generic maps map , zipWith , (+^), (-^), (*^), (/^) -- * Structured maps , Structured(..)) where import Data.Array.Repa.Shape import Data.Array.Repa.Base import Data.Array.Repa.Repr.ByteString import Data.Array.Repa.Repr.Cursored import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Repr.ForeignPtr import Data.Array.Repa.Repr.HintSmall import Data.Array.Repa.Repr.HintInterleave import Data.Array.Repa.Repr.Partitioned import Data.Array.Repa.Repr.Unboxed import Data.Array.Repa.Repr.Undefined import Prelude hiding (map, zipWith) import Foreign.Storable import Data.Word -- | Apply a worker function to each element of an array, -- yielding a new array with the same extent. -- map :: (Shape sh, Source r a) => (a -> b) -> Array r sh a -> Array D sh b map f arr = case delay arr of ADelayed sh g -> ADelayed sh (f . g) {-# INLINE [3] map #-} -- ZipWith -------------------------------------------------------------------- -- | Combine two arrays, element-wise, with a binary operator. -- If the extent of the two array arguments differ, -- then the resulting array's extent is their intersection. -- zipWith :: (Shape sh, Source r1 a, Source r2 b) => (a -> b -> c) -> Array r1 sh a -> Array r2 sh b -> Array D sh c zipWith f arr1 arr2 = let get ix = f (arr1 `unsafeIndex` ix) (arr2 `unsafeIndex` ix) {-# INLINE get #-} in fromFunction (intersectDim (extent arr1) (extent arr2)) get {-# INLINE [2] zipWith #-} infixl 7 *^, /^ infixl 6 +^, -^ (+^) = zipWith (+) {-# INLINE (+^) #-} (-^) = zipWith (-) {-# INLINE (-^) #-} (*^) = zipWith (*) {-# INLINE (*^) #-} (/^) = zipWith (/) {-# INLINE (/^) #-} -- Structured ------------------------------------------------------------------- -- | Structured versions of @map@ and @zipWith@ that preserve the representation -- of cursored and partitioned arrays. -- -- For cursored (@C@) arrays, the cursoring of the source array is preserved. -- -- For partitioned (@P@) arrays, the worker function is fused with each array -- partition separately, instead of treating the whole array as a single -- bulk object. -- -- Preserving the cursored and\/or paritioned representation of an array -- is will make follow-on computation more efficient than if the array was -- converted to a vanilla Delayed (@D@) array as with plain `map` and `zipWith`. -- -- If the source array is not cursored or partitioned then `smap` and -- `szipWith` are identical to the plain functions. -- class Structured r1 a b where -- | The target result representation. type TR r1 -- | Structured @map@. smap :: Shape sh => (a -> b) -> Array r1 sh a -> Array (TR r1) sh b -- | Structured @zipWith@. -- If you have a cursored or partitioned source array then use that as -- the third argument (corresponding to @r1@ here) szipWith :: (Shape sh, Source r c) => (c -> a -> b) -> Array r sh c -> Array r1 sh a -> Array (TR r1) sh b -- ByteString ------------------------- instance Structured B Word8 b where type TR B = D smap = map szipWith = zipWith -- Cursored --------------------------- instance Structured C a b where type TR C = C smap f (ACursored sh makec shiftc loadc) = ACursored sh makec shiftc (f . loadc) {-# INLINE [3] smap #-} szipWith f arr1 (ACursored sh makec shiftc loadc) = let makec' ix = (ix, makec ix) {-# INLINE makec' #-} shiftc' off (ix, cur) = (addDim off ix, shiftc off cur) {-# INLINE shiftc' #-} load' (ix, cur) = f (arr1 `unsafeIndex` ix) (loadc cur) {-# INLINE load' #-} in ACursored (intersectDim (extent arr1) sh) makec' shiftc' load' {-# INLINE [2] szipWith #-} -- Delayed ---------------------------- instance Structured D a b where type TR D = D smap = map szipWith = zipWith -- ForeignPtr ------------------------- instance Storable a => Structured F a b where type TR F = D smap = map szipWith = zipWith -- Partitioned ------------------------ instance (Structured r1 a b , Structured r2 a b) => Structured (P r1 r2) a b where type TR (P r1 r2) = P (TR r1) (TR r2) smap f (APart sh range arr1 arr2) = APart sh range (smap f arr1) (smap f arr2) {-# INLINE [3] smap #-} szipWith f arr1 (APart sh range arr21 arr22) = APart sh range (szipWith f arr1 arr21) (szipWith f arr1 arr22) {-# INLINE [2] szipWith #-} -- Small ------------------------------ instance Structured r1 a b => Structured (S r1) a b where type TR (S r1) = S (TR r1) smap f (ASmall arr1) = ASmall (smap f arr1) {-# INLINE [3] smap #-} szipWith f arr1 (ASmall arr2) = ASmall (szipWith f arr1 arr2) {-# INLINE [3] szipWith #-} -- Interleaved ------------------------ instance Structured r1 a b => Structured (I r1) a b where type TR (I r1) = I (TR r1) smap f (AInterleave arr1) = AInterleave (smap f arr1) {-# INLINE [3] smap #-} szipWith f arr1 (AInterleave arr2) = AInterleave (szipWith f arr1 arr2) {-# INLINE [3] szipWith #-} -- Unboxed ---------------------------- instance Unbox a => Structured U a b where type TR U = D smap = map szipWith = zipWith -- Undefined -------------------------- instance Structured X a b where type TR X = X smap _ (AUndefined sh) = AUndefined sh szipWith _ _ (AUndefined sh) = AUndefined sh repa-3.4.1.4/Data/Array/Repa/Operators/Reduction.hs0000644000000000000000000001556713053313255020100 0ustar0000000000000000{-# LANGUAGE BangPatterns, ExplicitForAll, TypeOperators, MagicHash #-} {-# OPTIONS -fno-warn-orphans #-} module Data.Array.Repa.Operators.Reduction ( foldS, foldP , foldAllS, foldAllP , sumS, sumP , sumAllS, sumAllP , equalsS, equalsP) where import Data.Array.Repa.Base import Data.Array.Repa.Index import Data.Array.Repa.Eval import Data.Array.Repa.Repr.Unboxed import Data.Array.Repa.Operators.Mapping as R import Data.Array.Repa.Shape as S import qualified Data.Vector.Unboxed as V import qualified Data.Vector.Unboxed.Mutable as M import Prelude hiding (sum) import qualified Data.Array.Repa.Eval.Reduction as E import System.IO.Unsafe import GHC.Exts -- fold ---------------------------------------------------------------------- -- | Sequential reduction of the innermost dimension of an arbitrary rank array. -- -- Combine this with `transpose` to fold any other dimension. -- -- Elements are reduced in the order of their indices, from lowest to highest. -- Applications of the operator are associatied arbitrarily. -- -- >>> let c 0 x = x; c x 0 = x; c x y = y -- >>> let a = fromListUnboxed (Z :. 2 :. 2) [1,2,3,4] :: Array U (Z :. Int :. Int) Int -- >>> foldS c 0 a -- AUnboxed (Z :. 2) (fromList [2,4]) -- foldS :: (Shape sh, Source r a, Unbox a) => (a -> a -> a) -> a -> Array r (sh :. Int) a -> Array U sh a foldS f z arr = arr `deepSeqArray` let sh@(sz :. n') = extent arr !(I# n) = n' in unsafePerformIO $ do mvec <- M.unsafeNew (S.size sz) E.foldS mvec (\ix -> arr `unsafeIndex` fromIndex sh (I# ix)) f z n !vec <- V.unsafeFreeze mvec now $ fromUnboxed sz vec {-# INLINE [1] foldS #-} -- | Parallel reduction of the innermost dimension of an arbitray rank array. -- -- The first argument needs to be an associative sequential operator. -- The starting element must be neutral with respect to the operator, for -- example @0@ is neutral with respect to @(+)@ as @0 + a = a@. -- These restrictions are required to support parallel evaluation, as the -- starting element may be used multiple times depending on the number of threads. -- -- Elements are reduced in the order of their indices, from lowest to highest. -- Applications of the operator are associatied arbitrarily. -- -- >>> let c 0 x = x; c x 0 = x; c x y = y -- >>> let a = fromListUnboxed (Z :. 2 :. 2) [1,2,3,4] :: Array U (Z :. Int :. Int) Int -- >>> foldP c 0 a -- AUnboxed (Z :. 2) (fromList [2,4]) -- foldP :: (Shape sh, Source r a, Unbox a, Monad m) => (a -> a -> a) -> a -> Array r (sh :. Int) a -> m (Array U sh a) foldP f z arr = arr `deepSeqArray` let sh@(sz :. n) = extent arr in case rank sh of -- specialise rank-1 arrays, else one thread does all the work. -- We can't match against the shape constructor, -- otherwise type error: (sz ~ Z) -- 1 -> do x <- foldAllP f z arr now $ fromUnboxed sz $ V.singleton x _ -> now $ unsafePerformIO $ do mvec <- M.unsafeNew (S.size sz) E.foldP mvec (\ix -> arr `unsafeIndex` fromIndex sh ix) f z n !vec <- V.unsafeFreeze mvec now $ fromUnboxed sz vec {-# INLINE [1] foldP #-} -- foldAll -------------------------------------------------------------------- -- | Sequential reduction of an array of arbitrary rank to a single scalar value. -- -- Elements are reduced in row-major order. Applications of the operator are -- associated arbitrarily. -- foldAllS :: (Shape sh, Source r a) => (a -> a -> a) -> a -> Array r sh a -> a foldAllS f z arr = arr `deepSeqArray` let !ex = extent arr !(I# n) = size ex in E.foldAllS (\ix -> arr `unsafeIndex` fromIndex ex (I# ix)) f z n {-# INLINE [1] foldAllS #-} -- | Parallel reduction of an array of arbitrary rank to a single scalar value. -- -- The first argument needs to be an associative sequential operator. -- The starting element must be neutral with respect to the operator, -- for example @0@ is neutral with respect to @(+)@ as @0 + a = a@. -- These restrictions are required to support parallel evaluation, as the -- starting element may be used multiple times depending on the number of threads. -- -- Elements are reduced in row-major order. Applications of the operator are -- associated arbitrarily. -- foldAllP :: (Shape sh, Source r a, Unbox a, Monad m) => (a -> a -> a) -> a -> Array r sh a -> m a foldAllP f z arr = arr `deepSeqArray` let sh = extent arr n = size sh in return $ unsafePerformIO $ E.foldAllP (\ix -> arr `unsafeIndex` fromIndex sh ix) f z n {-# INLINE [1] foldAllP #-} -- sum ------------------------------------------------------------------------ -- | Sequential sum the innermost dimension of an array. sumS :: (Shape sh, Source r a, Num a, Unbox a) => Array r (sh :. Int) a -> Array U sh a sumS = foldS (+) 0 {-# INLINE [3] sumS #-} -- | Parallel sum the innermost dimension of an array. sumP :: (Shape sh, Source r a, Num a, Unbox a, Monad m) => Array r (sh :. Int) a -> m (Array U sh a) sumP = foldP (+) 0 {-# INLINE [3] sumP #-} -- sumAll --------------------------------------------------------------------- -- | Sequential sum of all the elements of an array. sumAllS :: (Shape sh, Source r a, Num a) => Array r sh a -> a sumAllS = foldAllS (+) 0 {-# INLINE [3] sumAllS #-} -- | Parallel sum all the elements of an array. sumAllP :: (Shape sh, Source r a, Unbox a, Num a, Monad m) => Array r sh a -> m a sumAllP = foldAllP (+) 0 {-# INLINE [3] sumAllP #-} -- Equality ------------------------------------------------------------------ instance (Shape sh, Eq sh, Source r a, Eq a) => Eq (Array r sh a) where (==) arr1 arr2 = extent arr1 == extent arr2 && (foldAllS (&&) True (R.zipWith (==) arr1 arr2)) -- | Check whether two arrays have the same shape and contain equal elements, -- in parallel. equalsP :: (Shape sh, Source r1 a, Source r2 a, Eq a, Monad m) => Array r1 sh a -> Array r2 sh a -> m Bool equalsP arr1 arr2 = do same <- foldAllP (&&) True (R.zipWith (==) arr1 arr2) return $ (extent arr1 == extent arr2) && same -- | Check whether two arrays have the same shape and contain equal elements, -- sequentially. equalsS :: (Shape sh, Source r1 a, Source r2 a, Eq a) => Array r1 sh a -> Array r2 sh a -> Bool equalsS arr1 arr2 = extent arr1 == extent arr2 && (foldAllS (&&) True (R.zipWith (==) arr1 arr2)) repa-3.4.1.4/Data/Array/Repa/Operators/Selection.hs0000644000000000000000000000274212556111213020055 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} module Data.Array.Repa.Operators.Selection (selectP) where import Data.Array.Repa.Index import Data.Array.Repa.Base import Data.Array.Repa.Eval.Selection import Data.Array.Repa.Repr.Unboxed as U import qualified Data.Vector.Unboxed as V import System.IO.Unsafe -- | Produce an array by applying a predicate to a range of integers. -- If the predicate matches, then use the second function to generate -- the element. -- -- * This is a low-level function helpful for writing filtering -- operations on arrays. -- -- * Use the integer as the index into the array you're filtering. -- selectP :: (Unbox a, Monad m) => (Int -> Bool) -- ^ If the Int matches this predicate, -> (Int -> a) -- ^ ... then pass it to this fn to produce a value -> Int -- ^ Range between 0 and this maximum. -> m (Array U DIM1 a) -- ^ Array containing produced values. selectP match produce len = return $ unsafePerformIO $ do (sh, vec) <- selectIO return $ sh `seq` vec `seq` fromUnboxed sh vec where {-# INLINE selectIO #-} selectIO = do vecs <- selectChunkedP match produce len vecs' <- mapM V.unsafeFreeze vecs -- TODO: avoid copy somehow. let result = V.concat vecs' return (Z :. V.length result, result) {-# INLINE [1] selectP #-} repa-3.4.1.4/Data/Array/Repa/Operators/Traversal.hs0000644000000000000000000001040013053313255020064 0ustar0000000000000000-- Generic Traversal module Data.Array.Repa.Operators.Traversal ( traverse, unsafeTraverse , traverse2, unsafeTraverse2 , traverse3, unsafeTraverse3 , traverse4, unsafeTraverse4) where import Data.Array.Repa.Base import Data.Array.Repa.Shape import Data.Array.Repa.Repr.Delayed import Prelude hiding (traverse) -- | Unstructured traversal. traverse, unsafeTraverse :: forall r sh sh' a b . ( Source r a , Shape sh) => Array r sh a -- ^ Source array. -> (sh -> sh') -- ^ Function to produce the extent of the result. -> ((sh -> a) -> sh' -> b) -- ^ Function to produce elements of the result. -- It is passed a lookup function to get elements of the source. -> Array D sh' b traverse arr transExtent newElem = fromFunction (transExtent (extent arr)) (newElem (index arr)) {-# INLINE [3] traverse #-} unsafeTraverse arr transExtent newElem = fromFunction (transExtent (extent arr)) (newElem (unsafeIndex arr)) {-# INLINE [3] unsafeTraverse #-} -- | Unstructured traversal over two arrays at once. traverse2, unsafeTraverse2 :: forall r1 r2 sh sh' sh'' a b c . ( Source r1 a, Source r2 b , Shape sh, Shape sh') => Array r1 sh a -- ^ First source array. -> Array r2 sh' b -- ^ Second source array. -> (sh -> sh' -> sh'') -- ^ Function to produce the extent of the result. -> ((sh -> a) -> (sh' -> b) -> (sh'' -> c)) -- ^ Function to produce elements of the result. -- It is passed lookup functions to get elements of the -- source arrays. -> Array D sh'' c traverse2 arrA arrB transExtent newElem = fromFunction (transExtent (extent arrA) (extent arrB)) (newElem (index arrA) (index arrB)) {-# INLINE [3] traverse2 #-} unsafeTraverse2 arrA arrB transExtent newElem = fromFunction (transExtent (extent arrA) (extent arrB)) (newElem (unsafeIndex arrA) (unsafeIndex arrB)) {-# INLINE [3] unsafeTraverse2 #-} -- | Unstructured traversal over three arrays at once. traverse3, unsafeTraverse3 :: forall r1 r2 r3 sh1 sh2 sh3 sh4 a b c d . ( Source r1 a, Source r2 b, Source r3 c , Shape sh1, Shape sh2, Shape sh3) => Array r1 sh1 a -> Array r2 sh2 b -> Array r3 sh3 c -> (sh1 -> sh2 -> sh3 -> sh4) -> ( (sh1 -> a) -> (sh2 -> b) -> (sh3 -> c) -> sh4 -> d ) -> Array D sh4 d traverse3 arrA arrB arrC transExtent newElem = fromFunction (transExtent (extent arrA) (extent arrB) (extent arrC)) (newElem (index arrA) (index arrB) (index arrC)) {-# INLINE [3] traverse3 #-} unsafeTraverse3 arrA arrB arrC transExtent newElem = fromFunction (transExtent (extent arrA) (extent arrB) (extent arrC)) (newElem (unsafeIndex arrA) (unsafeIndex arrB) (unsafeIndex arrC)) {-# INLINE [3] unsafeTraverse3 #-} -- | Unstructured traversal over four arrays at once. traverse4, unsafeTraverse4 :: forall r1 r2 r3 r4 sh1 sh2 sh3 sh4 sh5 a b c d e . ( Source r1 a, Source r2 b, Source r3 c, Source r4 d , Shape sh1, Shape sh2, Shape sh3, Shape sh4) => Array r1 sh1 a -> Array r2 sh2 b -> Array r3 sh3 c -> Array r4 sh4 d -> (sh1 -> sh2 -> sh3 -> sh4 -> sh5 ) -> ( (sh1 -> a) -> (sh2 -> b) -> (sh3 -> c) -> (sh4 -> d) -> sh5 -> e ) -> Array D sh5 e traverse4 arrA arrB arrC arrD transExtent newElem = fromFunction (transExtent (extent arrA) (extent arrB) (extent arrC) (extent arrD)) (newElem (index arrA) (index arrB) (index arrC) (index arrD)) {-# INLINE [3] traverse4 #-} unsafeTraverse4 arrA arrB arrC arrD transExtent newElem = fromFunction (transExtent (extent arrA) (extent arrB) (extent arrC) (extent arrD)) (newElem (unsafeIndex arrA) (unsafeIndex arrB) (unsafeIndex arrC) (unsafeIndex arrD)) {-# INLINE [3] unsafeTraverse4 #-} repa-3.4.1.4/Data/Array/Repa/Repr/ByteString.hs0000644000000000000000000000274013053313255017155 0ustar0000000000000000 module Data.Array.Repa.Repr.ByteString ( B, Array (..) , fromByteString, toByteString) where import Data.Array.Repa.Shape import Data.Array.Repa.Base import Data.Array.Repa.Repr.Delayed import Data.Word import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as BU import Data.ByteString (ByteString) -- | Strict ByteStrings arrays are represented as ForeignPtr buffers of Word8 data B -- | Read elements from a `ByteString`. instance Source B Word8 where data Array B sh Word8 = AByteString !sh !ByteString linearIndex (AByteString _ bs) ix = bs `B.index` ix {-# INLINE linearIndex #-} unsafeLinearIndex (AByteString _ bs) ix = bs `BU.unsafeIndex` ix {-# INLINE unsafeLinearIndex #-} extent (AByteString sh _) = sh {-# INLINE extent #-} deepSeqArray (AByteString sh bs) x = sh `deepSeq` bs `seq` x {-# INLINE deepSeqArray #-} deriving instance Show sh => Show (Array B sh Word8) deriving instance Read sh => Read (Array B sh Word8) -- Conversions ---------------------------------------------------------------- -- | O(1). Wrap a `ByteString` as an array. fromByteString :: sh -> ByteString -> Array B sh Word8 fromByteString sh bs = AByteString sh bs {-# INLINE fromByteString #-} -- | O(1). Unpack a `ByteString` from an array. toByteString :: Array B sh Word8 -> ByteString toByteString (AByteString _ bs) = bs {-# INLINE toByteString #-} repa-3.4.1.4/Data/Array/Repa/Repr/Cursored.hs0000644000000000000000000001002412556111213016640 0ustar0000000000000000{-# LANGUAGE MagicHash #-} module Data.Array.Repa.Repr.Cursored ( C, Array (..) , makeCursored) where import Data.Array.Repa.Base import Data.Array.Repa.Shape import Data.Array.Repa.Index import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Repr.Undefined import Data.Array.Repa.Eval.Load import Data.Array.Repa.Eval.Elt import Data.Array.Repa.Eval.Cursored import Data.Array.Repa.Eval.Target import GHC.Exts import Debug.Trace -- | Cursored Arrays. -- These are produced by Repa's stencil functions, and help the fusion -- framework to share index compuations between array elements. -- -- The basic idea is described in ``Efficient Parallel Stencil Convolution'', -- Ben Lippmeier and Gabriele Keller, Haskell 2011 -- though the underlying -- array representation has changed since this paper was published. data C -- | Compute elements of a cursored array. instance Source C a where data Array C sh a = forall cursor. ACursored { cursoredExtent :: !sh -- | Make a cursor to a particular element. , makeCursor :: sh -> cursor -- | Shift the cursor by an offset, to get to another element. , shiftCursor :: sh -> cursor -> cursor -- | Load\/compute the element at the given cursor. , loadCursor :: cursor -> a } index (ACursored _ makec _ loadc) = loadc . makec {-# INLINE index #-} unsafeIndex = index {-# INLINE unsafeIndex #-} linearIndex (ACursored sh makec _ loadc) = loadc . makec . fromIndex sh {-# INLINE linearIndex #-} extent (ACursored sh _ _ _) = sh {-# INLINE extent #-} deepSeqArray (ACursored sh makec shiftc loadc) y = sh `deepSeq` makec `seq` shiftc `seq` loadc `seq` y {-# INLINE deepSeqArray #-} -- Fill ----------------------------------------------------------------------- -- | Compute all elements in an rank-2 array. instance Elt e => Load C DIM2 e where loadP (ACursored (Z :. (I# h) :. (I# w)) makec shiftc loadc) marr = do traceEventIO "Repa.loadP[Cursored]: start" fillCursoredBlock2P (unsafeWriteMVec marr) makec shiftc loadc w 0# 0# w h touchMVec marr traceEventIO "Repa.loadP[Cursored]: end" {-# INLINE loadP #-} loadS (ACursored (Z :. (I# h) :. (I# w)) makec shiftc loadc) marr = do traceEventIO "Repa.loadS[Cursored]: start" fillCursoredBlock2S (unsafeWriteMVec marr) makec shiftc loadc w 0# 0# w h touchMVec marr traceEventIO "Repa.loadS[Cursored]: end" {-# INLINE loadS #-} -- | Compute a range of elements in a rank-2 array. instance Elt e => LoadRange C DIM2 e where loadRangeP (ACursored (Z :. _h :. (I# w)) makec shiftc loadc) marr (Z :. (I# y0) :. (I# x0)) (Z :. (I# h0) :. (I# w0)) = do traceEventIO "Repa.loadRangeP[Cursored]: start" fillCursoredBlock2P (unsafeWriteMVec marr) makec shiftc loadc w x0 y0 w0 h0 touchMVec marr traceEventIO "Repa.loadRangeP[Cursored]: end" {-# INLINE loadRangeP #-} loadRangeS (ACursored (Z :. _h :. (I# w)) makec shiftc loadc) marr (Z :. (I# y0) :. (I# x0)) (Z :. (I# h0) :. (I# w0)) = do traceEventIO "Repa.loadRangeS[Cursored]: start" fillCursoredBlock2S (unsafeWriteMVec marr) makec shiftc loadc w x0 y0 w0 h0 touchMVec marr traceEventIO "Repa.loadRangeS[Cursored]: end" {-# INLINE loadRangeS #-} -- Conversions ---------------------------------------------------------------- -- | Define a new cursored array. makeCursored :: sh -> (sh -> cursor) -- ^ Create a cursor for an index. -> (sh -> cursor -> cursor) -- ^ Shift a cursor by an offset. -> (cursor -> e) -- ^ Compute the element at the cursor. -> Array C sh e makeCursored = ACursored {-# INLINE makeCursored #-} repa-3.4.1.4/Data/Array/Repa/Repr/Delayed.hs0000644000000000000000000000705512556111213016433 0ustar0000000000000000{-# LANGUAGE MagicHash #-} module Data.Array.Repa.Repr.Delayed ( D, Array(..) , fromFunction, toFunction , delay) where import Data.Array.Repa.Eval.Load import Data.Array.Repa.Eval.Target import Data.Array.Repa.Eval.Chunked import Data.Array.Repa.Eval.Cursored import Data.Array.Repa.Eval.Elt import Data.Array.Repa.Index import Data.Array.Repa.Shape import Data.Array.Repa.Base import Debug.Trace import GHC.Exts -- | Delayed arrays are represented as functions from the index to element value. -- -- Every time you index into a delayed array the element at that position -- is recomputed. data D -- | Compute elements of a delayed array. instance Source D a where data Array D sh a = ADelayed !sh (sh -> a) index (ADelayed _ f) ix = f ix {-# INLINE index #-} linearIndex (ADelayed sh f) ix = f (fromIndex sh ix) {-# INLINE linearIndex #-} extent (ADelayed sh _) = sh {-# INLINE extent #-} deepSeqArray (ADelayed sh f) y = sh `deepSeq` f `seq` y {-# INLINE deepSeqArray #-} -- Load ----------------------------------------------------------------------- -- | Compute all elements in an array. instance Shape sh => Load D sh e where loadP (ADelayed sh getElem) mvec = mvec `deepSeqMVec` do traceEventIO "Repa.loadP[Delayed]: start" fillChunkedP (size sh) (unsafeWriteMVec mvec) (getElem . fromIndex sh) touchMVec mvec traceEventIO "Repa.loadP[Delayed]: end" {-# INLINE [4] loadP #-} loadS (ADelayed sh getElem) mvec = mvec `deepSeqMVec` do traceEventIO "Repa.loadS[Delayed]: start" fillLinearS (size sh) (unsafeWriteMVec mvec) (getElem . fromIndex sh) touchMVec mvec traceEventIO "Repa.loadS[Delayed]: end" {-# INLINE [4] loadS #-} -- | Compute a range of elements in a rank-2 array. instance Elt e => LoadRange D DIM2 e where loadRangeP (ADelayed (Z :. _h :. (I# w)) getElem) mvec (Z :. (I# y0) :. (I# x0)) (Z :. (I# h0) :. (I# w0)) = mvec `deepSeqMVec` do traceEventIO "Repa.loadRangeP[Delayed]: start" fillBlock2P (unsafeWriteMVec mvec) getElem w x0 y0 w0 h0 touchMVec mvec traceEventIO "Repa.loadRangeP[Delayed]: end" {-# INLINE [1] loadRangeP #-} loadRangeS (ADelayed (Z :. _h :. (I# w)) getElem) mvec (Z :. (I# y0) :. (I# x0)) (Z :. (I# h0) :. (I# w0)) = mvec `deepSeqMVec` do traceEventIO "Repa.loadRangeS[Delayed]: start" fillBlock2S (unsafeWriteMVec mvec) getElem w x0 y0 w0 h0 touchMVec mvec traceEventIO "Repa.loadRangeS[Delayed]: end" {-# INLINE [1] loadRangeS #-} -- Conversions ---------------------------------------------------------------- -- | O(1). Wrap a function as a delayed array. fromFunction :: sh -> (sh -> a) -> Array D sh a fromFunction sh f = ADelayed sh f {-# INLINE fromFunction #-} -- | O(1). Produce the extent of an array, and a function to retrieve an -- arbitrary element. toFunction :: (Shape sh, Source r1 a) => Array r1 sh a -> (sh, sh -> a) toFunction arr = case delay arr of ADelayed sh f -> (sh, f) {-# INLINE toFunction #-} -- | O(1). Delay an array. -- This wraps the internal representation to be a function from -- indices to elements, so consumers don't need to worry about -- what the previous representation was. -- delay :: Shape sh => Source r e => Array r sh e -> Array D sh e delay arr = ADelayed (extent arr) (unsafeIndex arr) {-# INLINE delay #-} repa-3.4.1.4/Data/Array/Repa/Repr/ForeignPtr.hs0000644000000000000000000000707512556111213017145 0ustar0000000000000000 module Data.Array.Repa.Repr.ForeignPtr ( F, Array (..) , fromForeignPtr, toForeignPtr , computeIntoS, computeIntoP) where import Data.Array.Repa.Shape import Data.Array.Repa.Base import Data.Array.Repa.Eval.Load import Data.Array.Repa.Eval.Target import Data.Array.Repa.Repr.Delayed import Foreign.Storable import Foreign.ForeignPtr import Foreign.Marshal.Alloc import System.IO.Unsafe import qualified Foreign.ForeignPtr.Unsafe as Unsafe -- | Arrays represented as foreign buffers in the C heap. data F -- | Read elements from a foreign buffer. instance Storable a => Source F a where data Array F sh a = AForeignPtr !sh !Int !(ForeignPtr a) linearIndex (AForeignPtr _ len fptr) ix | ix < len = unsafePerformIO $ withForeignPtr fptr $ \ptr -> peekElemOff ptr ix | otherwise = error "Repa: foreign array index out of bounds" {-# INLINE linearIndex #-} unsafeLinearIndex (AForeignPtr _ _ fptr) ix = unsafePerformIO $ withForeignPtr fptr $ \ptr -> peekElemOff ptr ix {-# INLINE unsafeLinearIndex #-} extent (AForeignPtr sh _ _) = sh {-# INLINE extent #-} deepSeqArray (AForeignPtr sh len fptr) x = sh `deepSeq` len `seq` fptr `seq` x {-# INLINE deepSeqArray #-} -- Load ----------------------------------------------------------------------- -- | Filling foreign buffers. instance Storable e => Target F e where data MVec F e = FPVec !Int !(ForeignPtr e) newMVec n = do let (proxy :: e) = undefined ptr <- mallocBytes (sizeOf proxy * n) _ <- peek ptr `asTypeOf` return proxy fptr <- newForeignPtr finalizerFree ptr return $ FPVec n fptr {-# INLINE newMVec #-} -- CAREFUL: Unwrapping the foreignPtr like this means we need to be careful -- to touch it after the last use, otherwise the finaliser might run too early. unsafeWriteMVec (FPVec _ fptr) !ix !x = pokeElemOff (Unsafe.unsafeForeignPtrToPtr fptr) ix x {-# INLINE unsafeWriteMVec #-} unsafeFreezeMVec !sh (FPVec len fptr) = return $ AForeignPtr sh len fptr {-# INLINE unsafeFreezeMVec #-} deepSeqMVec !(FPVec _ fptr) x = Unsafe.unsafeForeignPtrToPtr fptr `seq` x {-# INLINE deepSeqMVec #-} touchMVec (FPVec _ fptr) = touchForeignPtr fptr {-# INLINE touchMVec #-} -- Conversions ---------------------------------------------------------------- -- | O(1). Wrap a `ForeignPtr` as an array. fromForeignPtr :: Shape sh => sh -> ForeignPtr e -> Array F sh e fromForeignPtr !sh !fptr = AForeignPtr sh (size sh) fptr {-# INLINE fromForeignPtr #-} -- | O(1). Unpack a `ForeignPtr` from an array. toForeignPtr :: Array F sh e -> ForeignPtr e toForeignPtr (AForeignPtr _ _ fptr) = fptr {-# INLINE toForeignPtr #-} -- | Compute an array sequentially and write the elements into a foreign -- buffer without intermediate copying. If you want to copy a -- pre-existing manifest array to a foreign buffer then `delay` it first. computeIntoS :: (Load r1 sh e, Storable e) => ForeignPtr e -> Array r1 sh e -> IO () computeIntoS !fptr !arr = loadS arr (FPVec 0 fptr) {-# INLINE computeIntoS #-} -- | Compute an array in parallel and write the elements into a foreign -- buffer without intermediate copying. If you want to copy a -- pre-existing manifest array to a foreign buffer then `delay` it first. computeIntoP :: (Load r1 sh e, Storable e) => ForeignPtr e -> Array r1 sh e -> IO () computeIntoP !fptr !arr = loadP arr (FPVec 0 fptr) {-# INLINE computeIntoP #-} repa-3.4.1.4/Data/Array/Repa/Repr/HintSmall.hs0000644000000000000000000000357312556111213016760 0ustar0000000000000000 module Data.Array.Repa.Repr.HintSmall (S, Array (..), hintSmall) where import Data.Array.Repa.Eval.Load import Data.Array.Repa.Base import Data.Array.Repa.Shape -- | Hints that evaluating this array is only a small amount of work. -- It will be evaluated sequentially in the main thread, instead of -- in parallel on the gang. This avoids the associated scheduling overhead. data S r1 instance Source r1 a => Source (S r1) a where data Array (S r1) sh a = ASmall !(Array r1 sh a) extent (ASmall arr) = extent arr {-# INLINE extent #-} index (ASmall arr) ix = index arr ix {-# INLINE index #-} unsafeIndex (ASmall arr) ix = unsafeIndex arr ix {-# INLINE unsafeIndex #-} linearIndex (ASmall arr) ix = linearIndex arr ix {-# INLINE linearIndex #-} unsafeLinearIndex (ASmall arr) ix = unsafeLinearIndex arr ix {-# INLINE unsafeLinearIndex #-} deepSeqArray (ASmall arr) x = deepSeqArray arr x {-# INLINE deepSeqArray #-} -- | Wrap an array with a smallness hint. hintSmall :: Array r1 sh e -> Array (S r1) sh e hintSmall = ASmall deriving instance Show (Array r1 sh e) => Show (Array (S r1) sh e) deriving instance Read (Array r1 sh e) => Read (Array (S r1) sh e) -- Load ---------------------------------------------------------------------- instance ( Shape sh, Load r1 sh e) => Load (S r1) sh e where loadP (ASmall arr) marr = loadS arr marr {-# INLINE loadP #-} loadS (ASmall arr) marr = loadS arr marr {-# INLINE loadS #-} -- LoadRange ------------------------------------------------------------------ instance ( Shape sh, LoadRange r1 sh e) => LoadRange (S r1) sh e where loadRangeP (ASmall arr) marr ix1 ix2 = loadRangeS arr marr ix1 ix2 {-# INLINE loadRangeP #-} loadRangeS (ASmall arr) marr ix1 ix2 = loadRangeS arr marr ix1 ix2 {-# INLINE loadRangeS #-} repa-3.4.1.4/Data/Array/Repa/Repr/HintInterleave.hs0000644000000000000000000000404712556111213020003 0ustar0000000000000000 module Data.Array.Repa.Repr.HintInterleave (I, Array (..), hintInterleave) where import Data.Array.Repa.Eval.Load import Data.Array.Repa.Eval.Target import Data.Array.Repa.Eval.Interleaved import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Shape import Data.Array.Repa.Base import Debug.Trace -- | Hints that computing this array will be an unbalanced workload -- and evaluation should be interleaved between the processors. data I r1 instance Source r1 a => Source (I r1) a where data Array (I r1) sh a = AInterleave !(Array r1 sh a) extent (AInterleave arr) = extent arr {-# INLINE extent #-} index (AInterleave arr) ix = index arr ix {-# INLINE index #-} unsafeIndex (AInterleave arr) ix = unsafeIndex arr ix {-# INLINE unsafeIndex #-} linearIndex (AInterleave arr) ix = linearIndex arr ix {-# INLINE linearIndex #-} unsafeLinearIndex (AInterleave arr) ix = unsafeLinearIndex arr ix {-# INLINE unsafeLinearIndex #-} deepSeqArray (AInterleave arr) x = deepSeqArray arr x {-# INLINE deepSeqArray #-} deriving instance Show (Array r1 sh e) => Show (Array (I r1) sh e) deriving instance Read (Array r1 sh e) => Read (Array (I r1) sh e) -- | Wrap an array with a unbalanced-ness hint. hintInterleave :: Array r1 sh e -> Array (I r1) sh e hintInterleave = AInterleave -- Load ----------------------------------------------------------------------- instance (Shape sh, Load D sh e) => Load (I D) sh e where loadP (AInterleave (ADelayed sh getElem)) marr = marr `deepSeqMVec` do traceEventIO "Repa.loadP[Interleaved]: start" fillInterleavedP (size sh) (unsafeWriteMVec marr) (getElem . fromIndex sh) touchMVec marr traceEventIO "Repa.loadP[Interleaved]: end" {-# INLINE [4] loadP #-} -- The fact that the workload is unbalanced doesn't affect us when the -- program is run sequentially, so just use the filling method of the inner -- representation loadS (AInterleave arr) marr = loadS arr marr {-# INLINE loadS #-} repa-3.4.1.4/Data/Array/Repa/Repr/Partitioned.hs0000644000000000000000000000470312556111213017343 0ustar0000000000000000 module Data.Array.Repa.Repr.Partitioned ( P, Array (..) , Range(..) , inRange) where import Data.Array.Repa.Base import Data.Array.Repa.Shape import Data.Array.Repa.Eval import Data.Array.Repa.Repr.Delayed -- | Partitioned arrays. -- The last partition takes priority -- -- These are produced by Repa's support functions and allow arrays to be defined -- using a different element function for each partition. -- -- The basic idea is described in ``Efficient Parallel Stencil Convolution'', -- Ben Lippmeier and Gabriele Keller, Haskell 2011 -- though the underlying -- array representation has changed since this paper was published. -- data P r1 r2 data Range sh = Range !sh !sh -- indices defining the range (sh -> Bool) -- predicate to check whether were in range -- | Check whether an index is within the given range. inRange :: Range sh -> sh -> Bool inRange (Range _ _ p) ix = p ix {-# INLINE inRange #-} -- Repr ----------------------------------------------------------------------- -- | Read elements from a partitioned array. instance (Source r1 e, Source r2 e) => Source (P r1 r2) e where data Array (P r1 r2) sh e = APart !sh -- size of the whole array !(Range sh) !(Array r1 sh e) -- if in range use this array !(Array r2 sh e) -- otherwise use this array index (APart _ range arr1 arr2) ix | inRange range ix = index arr1 ix | otherwise = index arr2 ix {-# INLINE index #-} linearIndex arr@(APart sh _ _ _) ix = index arr $ fromIndex sh ix {-# INLINE linearIndex #-} extent (APart sh _ _ _) = sh {-# INLINE extent #-} deepSeqArray (APart sh range arr1 arr2) y = sh `deepSeq` range `deepSeqRange` arr1 `deepSeqArray` arr2 `deepSeqArray` y {-# INLINE deepSeqArray #-} deepSeqRange :: Shape sh => Range sh -> b -> b deepSeqRange (Range ix sz f) y = ix `deepSeq` sz `deepSeq` f `seq` y {-# INLINE deepSeqRange #-} -- Load ----------------------------------------------------------------------- instance (LoadRange r1 sh e, Load r2 sh e) => Load (P r1 r2) sh e where loadP (APart _ (Range ix sz _) arr1 arr2) marr = do loadRangeP arr1 marr ix sz loadP arr2 marr {-# INLINE loadP #-} loadS (APart _ (Range ix sz _) arr1 arr2) marr = do loadRangeS arr1 marr ix sz loadS arr2 marr {-# INLINE loadS #-} repa-3.4.1.4/Data/Array/Repa/Repr/Unboxed.hs0000644000000000000000000001764413053313255016500 0ustar0000000000000000 module Data.Array.Repa.Repr.Unboxed ( U, U.Unbox, Array (..) , computeUnboxedS, computeUnboxedP , fromListUnboxed , fromUnboxed, toUnboxed , zip, zip3, zip4, zip5, zip6 , unzip, unzip3, unzip4, unzip5, unzip6) where import Data.Array.Repa.Shape as R import Data.Array.Repa.Base as R import Data.Array.Repa.Eval as R import Data.Array.Repa.Repr.Delayed as R import qualified Data.Vector.Unboxed as U import qualified Data.Vector.Unboxed.Mutable as UM import Control.Monad import Prelude hiding (zip, zip3, unzip, unzip3) -- | Unboxed arrays are represented as unboxed vectors. -- -- The implementation uses @Data.Vector.Unboxed@ which is based on type -- families and picks an efficient, specialised representation for every -- element type. In particular, unboxed vectors of pairs are represented -- as pairs of unboxed vectors. -- This is the most efficient representation for numerical data. -- data U -- | Read elements from an unboxed vector array. instance U.Unbox a => Source U a where data Array U sh a = AUnboxed !sh !(U.Vector a) linearIndex (AUnboxed _ vec) ix = vec U.! ix {-# INLINE linearIndex #-} unsafeLinearIndex (AUnboxed _ vec) ix = vec `U.unsafeIndex` ix {-# INLINE unsafeLinearIndex #-} extent (AUnboxed sh _) = sh {-# INLINE extent #-} deepSeqArray (AUnboxed sh vec) x = sh `deepSeq` vec `seq` x {-# INLINE deepSeqArray #-} deriving instance (Show sh, Show e, U.Unbox e) => Show (Array U sh e) deriving instance (Read sh, Read e, U.Unbox e) => Read (Array U sh e) -- Fill ----------------------------------------------------------------------- -- | Filling of unboxed vector arrays. instance U.Unbox e => Target U e where data MVec U e = UMVec (UM.IOVector e) newMVec n = liftM UMVec (UM.new n) {-# INLINE newMVec #-} unsafeWriteMVec (UMVec v) ix = UM.unsafeWrite v ix {-# INLINE unsafeWriteMVec #-} unsafeFreezeMVec sh (UMVec mvec) = do vec <- U.unsafeFreeze mvec return $ AUnboxed sh vec {-# INLINE unsafeFreezeMVec #-} deepSeqMVec (UMVec vec) x = vec `seq` x {-# INLINE deepSeqMVec #-} touchMVec _ = return () {-# INLINE touchMVec #-} -- Conversions ---------------------------------------------------------------- -- | Sequential computation of array elements.. -- -- * This is an alias for `computeS` with a more specific type. -- computeUnboxedS :: (Load r1 sh e, U.Unbox e) => Array r1 sh e -> Array U sh e computeUnboxedS = computeS {-# INLINE computeUnboxedS #-} -- | Parallel computation of array elements. -- -- * This is an alias for `computeP` with a more specific type. -- computeUnboxedP :: (Load r1 sh e, Monad m, U.Unbox e) => Array r1 sh e -> m (Array U sh e) computeUnboxedP = computeP {-# INLINE computeUnboxedP #-} -- | O(n). Convert a list to an unboxed vector array. -- -- * This is an alias for `fromList` with a more specific type. -- fromListUnboxed :: (Shape sh, U.Unbox a) => sh -> [a] -> Array U sh a fromListUnboxed = R.fromList {-# INLINE fromListUnboxed #-} -- | O(1). Wrap an unboxed vector as an array. fromUnboxed :: sh -> U.Vector e -> Array U sh e fromUnboxed sh vec = AUnboxed sh vec {-# INLINE fromUnboxed #-} -- | O(1). Unpack an unboxed vector from an array. toUnboxed :: Array U sh e -> U.Vector e toUnboxed (AUnboxed _ vec) = vec {-# INLINE toUnboxed #-} -- Zip ------------------------------------------------------------------------ -- | O(1). Zip some unboxed arrays. -- The shapes must be identical else `error`. zip :: (Shape sh, U.Unbox a, U.Unbox b) => Array U sh a -> Array U sh b -> Array U sh (a, b) zip (AUnboxed sh1 vec1) (AUnboxed sh2 vec2) | sh1 /= sh2 = error "Repa: zip array shapes not identical" | otherwise = AUnboxed sh1 (U.zip vec1 vec2) {-# INLINE zip #-} -- | O(1). Zip some unboxed arrays. -- The shapes must be identical else `error`. zip3 :: (Shape sh, U.Unbox a, U.Unbox b, U.Unbox c) => Array U sh a -> Array U sh b -> Array U sh c -> Array U sh (a, b, c) zip3 (AUnboxed sh1 vec1) (AUnboxed sh2 vec2) (AUnboxed sh3 vec3) | sh1 /= sh2 || sh1 /= sh3 = error "Repa: zip array shapes not identical" | otherwise = AUnboxed sh1 (U.zip3 vec1 vec2 vec3) {-# INLINE zip3 #-} -- | O(1). Zip some unboxed arrays. -- The shapes must be identical else `error`. zip4 :: (Shape sh, U.Unbox a, U.Unbox b, U.Unbox c, U.Unbox d) => Array U sh a -> Array U sh b -> Array U sh c -> Array U sh d -> Array U sh (a, b, c, d) zip4 (AUnboxed sh1 vec1) (AUnboxed sh2 vec2) (AUnboxed sh3 vec3) (AUnboxed sh4 vec4) | sh1 /= sh2 || sh1 /= sh3 || sh1 /= sh4 = error "Repa: zip array shapes not identical" | otherwise = AUnboxed sh1 (U.zip4 vec1 vec2 vec3 vec4) {-# INLINE zip4 #-} -- | O(1). Zip some unboxed arrays. -- The shapes must be identical else `error`. zip5 :: (Shape sh, U.Unbox a, U.Unbox b, U.Unbox c, U.Unbox d, U.Unbox e) => Array U sh a -> Array U sh b -> Array U sh c -> Array U sh d -> Array U sh e -> Array U sh (a, b, c, d, e) zip5 (AUnboxed sh1 vec1) (AUnboxed sh2 vec2) (AUnboxed sh3 vec3) (AUnboxed sh4 vec4) (AUnboxed sh5 vec5) | sh1 /= sh2 || sh1 /= sh3 || sh1 /= sh4 || sh1 /= sh5 = error "Repa: zip array shapes not identical" | otherwise = AUnboxed sh1 (U.zip5 vec1 vec2 vec3 vec4 vec5) {-# INLINE zip5 #-} -- | O(1). Zip some unboxed arrays. -- The shapes must be identical else `error`. zip6 :: (Shape sh, U.Unbox a, U.Unbox b, U.Unbox c, U.Unbox d, U.Unbox e, U.Unbox f) => Array U sh a -> Array U sh b -> Array U sh c -> Array U sh d -> Array U sh e -> Array U sh f -> Array U sh (a, b, c, d, e, f) zip6 (AUnboxed sh1 vec1) (AUnboxed sh2 vec2) (AUnboxed sh3 vec3) (AUnboxed sh4 vec4) (AUnboxed sh5 vec5) (AUnboxed sh6 vec6) | sh1 /= sh2 || sh1 /= sh3 || sh1 /= sh4 || sh1 /= sh5 || sh1 /= sh6 = error "Repa: zip array shapes not identical" | otherwise = AUnboxed sh1 (U.zip6 vec1 vec2 vec3 vec4 vec5 vec6) {-# INLINE zip6 #-} -- Unzip ---------------------------------------------------------------------- -- | O(1). Unzip an unboxed array. unzip :: (U.Unbox a, U.Unbox b) => Array U sh (a, b) -> (Array U sh a, Array U sh b) unzip (AUnboxed sh vec) = let (as, bs) = U.unzip vec in (AUnboxed sh as, AUnboxed sh bs) {-# INLINE unzip #-} -- | O(1). Unzip an unboxed array. unzip3 :: (U.Unbox a, U.Unbox b, U.Unbox c) => Array U sh (a, b, c) -> (Array U sh a, Array U sh b, Array U sh c) unzip3 (AUnboxed sh vec) = let (as, bs, cs) = U.unzip3 vec in (AUnboxed sh as, AUnboxed sh bs, AUnboxed sh cs) {-# INLINE unzip3 #-} -- | O(1). Unzip an unboxed array. unzip4 :: (U.Unbox a, U.Unbox b, U.Unbox c, U.Unbox d) => Array U sh (a, b, c, d) -> (Array U sh a, Array U sh b, Array U sh c, Array U sh d) unzip4 (AUnboxed sh vec) = let (as, bs, cs, ds) = U.unzip4 vec in (AUnboxed sh as, AUnboxed sh bs, AUnboxed sh cs, AUnboxed sh ds) {-# INLINE unzip4 #-} -- | O(1). Unzip an unboxed array. unzip5 :: (U.Unbox a, U.Unbox b, U.Unbox c, U.Unbox d, U.Unbox e) => Array U sh (a, b, c, d, e) -> (Array U sh a, Array U sh b, Array U sh c, Array U sh d, Array U sh e) unzip5 (AUnboxed sh vec) = let (as, bs, cs, ds, es) = U.unzip5 vec in (AUnboxed sh as, AUnboxed sh bs, AUnboxed sh cs, AUnboxed sh ds, AUnboxed sh es) {-# INLINE unzip5 #-} -- | O(1). Unzip an unboxed array. unzip6 :: (U.Unbox a, U.Unbox b, U.Unbox c, U.Unbox d, U.Unbox e, U.Unbox f) => Array U sh (a, b, c, d, e, f) -> (Array U sh a, Array U sh b, Array U sh c, Array U sh d, Array U sh e, Array U sh f) unzip6 (AUnboxed sh vec) = let (as, bs, cs, ds, es, fs) = U.unzip6 vec in (AUnboxed sh as, AUnboxed sh bs, AUnboxed sh cs, AUnboxed sh ds, AUnboxed sh es, AUnboxed sh fs) {-# INLINE unzip6 #-} repa-3.4.1.4/Data/Array/Repa/Repr/Undefined.hs0000644000000000000000000000206213053313255016761 0ustar0000000000000000 module Data.Array.Repa.Repr.Undefined ( X, Array (..)) where import Data.Array.Repa.Base import Data.Array.Repa.Shape import Data.Array.Repa.Eval -- | An array with undefined elements. -- -- * This is normally used as the last representation in a partitioned array, -- as the previous partitions are expected to provide full coverage. data X -- | Undefined array elements. Inspecting them yields `error`. -- instance Source X e where data Array X sh e = AUndefined !sh deepSeqArray _ x = x {-# INLINE deepSeqArray #-} extent (AUndefined sh) = sh {-# INLINE extent #-} index (AUndefined _) _ = error $ "Repa: array element is undefined." {-# INLINE index #-} linearIndex (AUndefined _) ix = error $ "Repa: array element at " ++ show ix ++ " is undefined." {-# INLINE linearIndex #-} deriving instance Show sh => Show (Array X sh e) deriving instance Read sh => Read (Array X sh e) instance Shape sh => Load X sh e where loadS _ _ = return () loadP _ _ = return () repa-3.4.1.4/Data/Array/Repa/Repr/Vector.hs0000644000000000000000000000554313053313255016331 0ustar0000000000000000 module Data.Array.Repa.Repr.Vector ( V, Array (..) , computeVectorS, computeVectorP , fromListVector , fromVector , toVector) where import Data.Array.Repa.Shape import Data.Array.Repa.Base import Data.Array.Repa.Eval import qualified Data.Vector as V import qualified Data.Vector.Mutable as VM import Control.Monad -- | Arrays represented as boxed vectors. -- -- This representation should only be used when your element type doesn't -- have an `Unbox` instsance. If it does, then use the Unboxed `U` -- representation will be faster. data V -- | Read elements from a boxed vector array. instance Source V a where data Array V sh a = AVector !sh !(V.Vector a) linearIndex (AVector _ vec) ix = vec V.! ix {-# INLINE linearIndex #-} unsafeLinearIndex (AVector _ vec) ix = vec `V.unsafeIndex` ix {-# INLINE unsafeLinearIndex #-} extent (AVector sh _) = sh {-# INLINE extent #-} deepSeqArray (AVector sh vec) x = sh `deepSeq` vec `seq` x {-# INLINE deepSeqArray #-} deriving instance (Show sh, Show e) => Show (Array V sh e) deriving instance (Read sh, Read e) => Read (Array V sh e) -- Fill ----------------------------------------------------------------------- -- | Filling of boxed vector arrays. instance Target V e where data MVec V e = MVector (VM.IOVector e) newMVec n = liftM MVector (VM.new n) {-# INLINE newMVec #-} unsafeWriteMVec (MVector v) ix = VM.unsafeWrite v ix {-# INLINE unsafeWriteMVec #-} unsafeFreezeMVec sh (MVector mvec) = do vec <- V.unsafeFreeze mvec return $ AVector sh vec {-# INLINE unsafeFreezeMVec #-} deepSeqMVec !_vec x = x {-# INLINE deepSeqMVec #-} touchMVec _ = return () {-# INLINE touchMVec #-} -- Conversions ---------------------------------------------------------------- -- | Sequential computation of array elements. -- -- * This is an alias for `compute` with a more specific type. -- computeVectorS :: Load r1 sh e => Array r1 sh e -> Array V sh e computeVectorS = computeS {-# INLINE computeVectorS #-} -- | Parallel computation of array elements. computeVectorP :: (Load r1 sh e, Monad m) => Array r1 sh e -> m (Array V sh e) computeVectorP = computeP {-# INLINE computeVectorP #-} -- | O(n). Convert a list to a boxed vector array. -- -- * This is an alias for `fromList` with a more specific type. -- fromListVector :: Shape sh => sh -> [a] -> Array V sh a fromListVector = fromList {-# INLINE fromListVector #-} -- | O(1). Wrap a boxed vector as an array. fromVector :: sh -> V.Vector e -> Array V sh e fromVector sh vec = AVector sh vec {-# INLINE fromVector #-} -- | O(1). Unpack a boxed vector from an array. toVector :: Array V sh e -> V.Vector e toVector (AVector _ vec) = vec {-# INLINE toVector #-} repa-3.4.1.4/Data/Array/Repa/Specialised/Dim2.hs0000644000000000000000000001005713252414734017200 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} -- | Functions specialised for arrays of dimension 2. module Data.Array.Repa.Specialised.Dim2 ( isInside2 , isOutside2 , clampToBorder2 , makeBordered2) where import Data.Array.Repa.Index import Data.Array.Repa.Base import Data.Array.Repa.Repr.Partitioned import Data.Array.Repa.Repr.Undefined -- | Check if an index lies inside the given extent. -- As opposed to `inRange` from "Data.Array.Repa.Index", -- this is a short-circuited test that checks that lowest dimension first. isInside2 :: DIM2 -- ^ Extent of array. -> DIM2 -- ^ Index to check. -> Bool {-# INLINE isInside2 #-} isInside2 ex = not . isOutside2 ex -- | Check if an index lies outside the given extent. -- As opposed to `inRange` from "Data.Array.Repa.Index", -- this is a short-circuited test that checks the lowest dimension first. isOutside2 :: DIM2 -- ^ Extent of array. -> DIM2 -- ^ Index to check. -> Bool {-# INLINE isOutside2 #-} isOutside2 (_ :. yLen :. xLen) (_ :. yy :. xx) | xx < 0 = True | xx >= xLen = True | yy < 0 = True | yy >= yLen = True | otherwise = False -- | Given the extent of an array, clamp the components of an index so they -- lie within the given array. Outlying indices are clamped to the index -- of the nearest border element. clampToBorder2 :: DIM2 -- ^ Extent of array. -> DIM2 -- ^ Index to clamp. -> DIM2 {-# INLINE clampToBorder2 #-} clampToBorder2 (_ :. yLen :. xLen) (sh :. j :. i) = clampX j i where {-# INLINE clampX #-} clampX !y !x | x < 0 = clampY y 0 | x >= xLen = clampY y (xLen - 1) | otherwise = clampY y x {-# INLINE clampY #-} clampY !y !x | y < 0 = sh :. 0 :. x | y >= yLen = sh :. (yLen - 1) :. x | otherwise = sh :. y :. x -- | Make a 2D partitioned array from two others, one to produce the elements -- in the internal region, and one to produce elements in the border region. -- The two arrays must have the same extent. -- The border must be the same width on all sides. -- makeBordered2 :: (Source r1 a, Source r2 a) => DIM2 -- ^ Extent of array. -> Int -- ^ Width of border. -> Array r1 DIM2 a -- ^ Array for internal elements. -> Array r2 DIM2 a -- ^ Array for border elements. -> Array (P r1 (P r2 (P r2 (P r2 (P r2 X))))) DIM2 a {-# INLINE makeBordered2 #-} makeBordered2 sh@(_ :. aHeight :. aWidth) bWidth arrInternal arrBorder = checkDims `seq` let -- minimum and maximum indicies of values in the inner part of the image. !inX = bWidth !inY = bWidth !inW = aWidth - 2 * bWidth !inH = aHeight - 2 * bWidth inInternal (Z :. y :. x) = x >= inX && x < (inX + inW) && y >= inY && y < (inY + inH) {-# INLINE inInternal #-} inBorder = not . inInternal {-# INLINE inBorder #-} in -- internal region APart sh (Range (Z :. inY :. inX) (Z :. inH :. inW ) inInternal) arrInternal -- border regions $ APart sh (Range (Z :. 0 :. 0) (Z :. bWidth :. aWidth) inBorder) arrBorder $ APart sh (Range (Z :. inY + inH :. 0) (Z :. bWidth :. aWidth) inBorder) arrBorder $ APart sh (Range (Z :. inY :. 0) (Z :. inH :. bWidth) inBorder) arrBorder $ APart sh (Range (Z :. inY :. inX + inW) (Z :. inH :. bWidth) inBorder) arrBorder $ AUndefined sh where checkDims = if (extent arrInternal) == (extent arrBorder) then () else error "makeBordered2: internal and border arrays have different extents" {-# NOINLINE checkDims #-} -- NOINLINE because we don't want the branch in the core code. repa-3.4.1.4/Data/Array/Repa/Stencil/Dim2.hs0000644000000000000000000002422312556111213016344 0ustar0000000000000000{-# LANGUAGE MagicHash #-} -- This is specialised for stencils up to 7x7. -- Due to limitations in the GHC optimiser, using larger stencils doesn't -- work, and will yield `error` at runtime. We can probably increase the -- limit if required -- just ask. -- -- The focus of the stencil is in the center of the 7x7 tile, which has -- coordinates (0, 0). All coefficients in the stencil must fit in the tile, -- so they can be given X,Y coordinates up to +/- 3 positions. -- The stencil can be any shape, and need not be symmetric -- provided it -- fits in the 7x7 tile. -- module Data.Array.Repa.Stencil.Dim2 ( -- * Stencil creation makeStencil2, stencil2 -- * Stencil operators , PC5, mapStencil2, forStencil2) where import Data.Array.Repa.Base import Data.Array.Repa.Index import Data.Array.Repa.Shape import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Repr.Cursored import Data.Array.Repa.Repr.Partitioned import Data.Array.Repa.Repr.HintSmall import Data.Array.Repa.Repr.Undefined import Data.Array.Repa.Stencil.Base import Data.Array.Repa.Stencil.Template import Data.Array.Repa.Stencil.Partition import GHC.Exts -- | A index into the flat array. -- Should be abstract outside the stencil modules. data Cursor = Cursor Int type PC5 = P C (P (S D) (P (S D) (P (S D) (P (S D) X)))) -- Wrappers ------------------------------------------------------------------- -- | Like `mapStencil2` but with the parameters flipped. forStencil2 :: Source r a => Boundary a -> Array r DIM2 a -> Stencil DIM2 a -> Array PC5 DIM2 a {-# INLINE forStencil2 #-} forStencil2 boundary arr stencil = mapStencil2 boundary stencil arr ------------------------------------------------------------------------------- -- | Apply a stencil to every element of a 2D array. mapStencil2 :: Source r a => Boundary a -- ^ How to handle the boundary of the array. -> Stencil DIM2 a -- ^ Stencil to apply. -> Array r DIM2 a -- ^ Array to apply stencil to. -> Array PC5 DIM2 a {-# INLINE mapStencil2 #-} mapStencil2 boundary stencil@(StencilStatic sExtent _zero _load) arr = let sh = extent arr (_ :. aHeight :. aWidth) = sh (_ :. sHeight :. sWidth) = sExtent sHeight2 = sHeight `div` 2 sWidth2 = sWidth `div` 2 -- Partition the array into the internal and border regions. ![ Region inX inY inW inH , Region westX westY westW westH , Region eastX eastY eastW eastH , Region northX northY northW northH , Region southX southY southW southH ] = partitionForStencil (Size aWidth aHeight) (Size sWidth sHeight) (Offset sWidth2 sHeight2) {-# INLINE inInternal #-} inInternal (Z :. y :. x) = x >= inX && x < (inX + inW) && y >= inY && y < (inY + inH) {-# INLINE inBorder #-} inBorder = not . inInternal -- Cursor functions ---------------- {-# INLINE makec #-} makec (Z :. y :. x) = Cursor (x + y * aWidth) {-# INLINE shiftc #-} shiftc ix (Cursor off) = Cursor $ case ix of Z :. y :. x -> off + y * aWidth + x {-# INLINE arrInternal #-} arrInternal = makeCursored (extent arr) makec shiftc getInner' {-# INLINE getInner' #-} getInner' cur = unsafeAppStencilCursor2 shiftc stencil arr cur {-# INLINE arrBorder #-} arrBorder = ASmall (fromFunction (extent arr) getBorder') {-# INLINE getBorder' #-} getBorder' ix = case boundary of BoundFixed c -> c BoundConst c -> unsafeAppStencilCursor2_const addDim stencil c arr ix BoundClamp -> unsafeAppStencilCursor2_clamp addDim stencil arr ix in -- internal region APart sh (Range (Z :. inY :. inX) (Z :. inH :. inW) inInternal) arrInternal -- border regions $ APart sh (Range (Z :. westY :. westX) (Z :. westH :. westW) inBorder) arrBorder $ APart sh (Range (Z :. eastY :. eastX) (Z :. eastH :. eastW) inBorder) arrBorder $ APart sh (Range (Z :. northY :. northX) (Z :. northH :. northW) inBorder) arrBorder $ APart sh (Range (Z :. southY :. southX) (Z :. southH :. southW) inBorder) arrBorder $ AUndefined sh unsafeAppStencilCursor2 :: Source r a => (DIM2 -> Cursor -> Cursor) -> Stencil DIM2 a -> Array r DIM2 a -> Cursor -> a {-# INLINE unsafeAppStencilCursor2 #-} unsafeAppStencilCursor2 shift (StencilStatic sExtent zero loads) arr cur0 | _ :. sHeight :. sWidth <- sExtent , sHeight <= 7, sWidth <= 7 = let -- Get data from the manifest array. {-# INLINE getData #-} getData (Cursor cur) = arr `unsafeLinearIndex` cur -- Build a function to pass data from the array to our stencil. {-# INLINE oload #-} oload oy ox = let !cur' = shift (Z :. oy :. ox) cur0 in loads (Z :. oy :. ox) (getData cur') in template7x7 oload zero | otherwise = error $ unlines [ "mapStencil2: Your stencil is too big for this method." , " It must fit within a 7x7 tile to be compiled statically." ] -- | Like above, but treat elements outside the array has having a constant value. unsafeAppStencilCursor2_const :: forall r a . Source r a => (DIM2 -> DIM2 -> DIM2) -> Stencil DIM2 a -> a -> Array r DIM2 a -> DIM2 -> a {-# INLINE unsafeAppStencilCursor2_const #-} unsafeAppStencilCursor2_const shift (StencilStatic sExtent zero loads) fixed arr cur | _ :. sHeight :. sWidth <- sExtent , _ :. (I# aHeight) :. (I# aWidth) <- extent arr , sHeight <= 7, sWidth <= 7 = let -- Get data from the manifest array. {-# INLINE getData #-} getData :: DIM2 -> a getData (Z :. (I# y) :. (I# x)) = getData' x y {-# NOINLINE getData' #-} getData' :: Int# -> Int# -> a getData' !x !y | 1# <- (x <# 0#) `orI#` (x >=# aWidth) `orI#` (y <# 0#) `orI#` (y >=# aHeight) = fixed | otherwise = arr `unsafeIndex` (Z :. (I# y) :. (I# x)) -- Build a function to pass data from the array to our stencil. {-# INLINE oload #-} oload oy ox = let !cur' = shift (Z :. oy :. ox) cur in loads (Z :. oy :. ox) (getData cur') in template7x7 oload zero | otherwise = error $ unlines [ "mapStencil2: Your stencil is too big for this method." , " It must fit within a 7x7 tile to be compiled statically." ] -- | Like above, but clamp out of bounds array values to the closest real value. unsafeAppStencilCursor2_clamp :: forall r a . Source r a => (DIM2 -> DIM2 -> DIM2) -> Stencil DIM2 a -> Array r DIM2 a -> DIM2 -> a {-# INLINE unsafeAppStencilCursor2_clamp #-} unsafeAppStencilCursor2_clamp shift (StencilStatic sExtent zero loads) arr cur | _ :. sHeight :. sWidth <- sExtent , _ :. (I# aHeight) :. (I# aWidth) <- extent arr , sHeight <= 7, sWidth <= 7 = let -- Get data from the manifest array. {-# INLINE getData #-} getData :: DIM2 -> a getData (Z :. (I# y) :. (I# x)) = wrapLoadX x y {-# NOINLINE wrapLoadX #-} wrapLoadX :: Int# -> Int# -> a wrapLoadX !x !y | 1# <- x <# 0# = wrapLoadY 0# y | 1# <- x >=# aWidth = wrapLoadY (aWidth -# 1#) y | otherwise = wrapLoadY x y {-# NOINLINE wrapLoadY #-} wrapLoadY :: Int# -> Int# -> a wrapLoadY !x !y | 1# <- y <# 0# = loadXY x 0# | 1# <- y >=# aHeight = loadXY x (aHeight -# 1#) | otherwise = loadXY x y {-# INLINE loadXY #-} loadXY :: Int# -> Int# -> a loadXY !x !y = arr `unsafeIndex` (Z :. (I# y) :. (I# x)) -- Build a function to pass data from the array to our stencil. {-# INLINE oload #-} oload oy ox = let !cur' = shift (Z :. oy :. ox) cur in loads (Z :. oy :. ox) (getData cur') in template7x7 oload zero | otherwise = error $ unlines [ "mapStencil2: Your stencil is too big for this method." , " It must fit within a 7x7 tile to be compiled statically." ] -- | Data template for stencils up to 7x7. template7x7 :: (Int -> Int -> a -> a) -> a -> a {-# INLINE template7x7 #-} template7x7 f zero = f (-3) (-3) $ f (-3) (-2) $ f (-3) (-1) $ f (-3) 0 $ f (-3) 1 $ f (-3) 2 $ f (-3) 3 $ f (-2) (-3) $ f (-2) (-2) $ f (-2) (-1) $ f (-2) 0 $ f (-2) 1 $ f (-2) 2 $ f (-2) 3 $ f (-1) (-3) $ f (-1) (-2) $ f (-1) (-1) $ f (-1) 0 $ f (-1) 1 $ f (-1) 2 $ f (-1) 3 $ f 0 (-3) $ f 0 (-2) $ f 0 (-1) $ f 0 0 $ f 0 1 $ f 0 2 $ f 0 3 $ f 1 (-3) $ f 1 (-2) $ f 1 (-1) $ f 1 0 $ f 1 1 $ f 1 2 $ f 1 3 $ f 2 (-3) $ f 2 (-2) $ f 2 (-1) $ f 2 0 $ f 2 1 $ f 2 2 $ f 2 3 $ f 3 (-3) $ f 3 (-2) $ f 3 (-1) $ f 3 0 $ f 3 1 $ f 3 2 $ f 3 3 $ zero repa-3.4.1.4/Data/Array/Repa/Arbitrary.hs0000644000000000000000000001034213354717731016122 0ustar0000000000000000{-# LANGUAGE TypeOperators, FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Data.Array.Repa.Arbitrary ( -- * Arbitrary Unboxed Arrays arbitraryUShaped , forAllUShaped , forAll2UShaped , forAll3UShaped , forAll4UShaped , forAll5UShaped -- * Arbitrary Boxed Arrays , arbitraryVShaped , forAllVShaped , forAll2VShaped , forAll3VShaped , forAll4VShaped , forAll5VShaped) where import Data.Array.Repa.Base import Data.Array.Repa.Repr.Unboxed import Data.Array.Repa.Shape import Data.Array.Repa.Index import Test.QuickCheck.Arbitrary import Test.QuickCheck.Gen import Test.QuickCheck.Property (forAll) import Control.Monad import qualified Data.Array.Repa.Repr.Vector as V import qualified Data.Vector.Unboxed as U -- Aribrary ------------------------------------------------------------------- -- | This module exports instances of @Arbitrary@ and @CoArbitrary@ for -- unboxed Repa arrays. instance Arbitrary Z where arbitrary = return Z -- Note: this is a shape that is "sized", and then random array for a given -- shape is generated. instance Arbitrary a => Arbitrary (a :. Int) where arbitrary = sized (\n -> do b <- if n == 0 then return 1 else choose (1, n) a <- resize ((n + b - 1) `div` b) arbitrary -- each dimension should be at least 1-wide return $ a :. b) -- | Generates a random unboxed array of a given shape arbitraryUShaped sh = fromListUnboxed sh `fmap` vector (size sh) -- | Generates a random boxed array of a given shape arbitraryVShaped sh = V.fromListVector sh `fmap` vector (size sh) instance (Arbitrary sh, Arbitrary a, U.Unbox a, Shape sh) => Arbitrary (Array U sh a) where arbitrary = arbitrary >>= arbitraryUShaped instance (Arbitrary sh, Arbitrary a, Shape sh) => Arbitrary (Array V.V sh a) where arbitrary = arbitrary >>= arbitraryVShaped -- CoArbitrary ---------------------------------------------------------------- instance CoArbitrary Z where coarbitrary _ = id instance (CoArbitrary a) => CoArbitrary (a :. Int) where coarbitrary (a :. b) = coarbitrary a . coarbitrary b instance (CoArbitrary sh, CoArbitrary a, Source r a, Shape sh) => CoArbitrary (Array r sh a) where coarbitrary arr = (coarbitrary . extent $ arr) . (coarbitrary . toList $ arr) -- Wrappers ------------------------------------------------------------------- -- | Convenience functions for writing tests on 2-,3-,4-tuples of arrays -- of the same size (or just of a fixed size.) -- | These are helper functions: forAll2 arbf = forAll $ liftM2 (,) arbf arbf forAll3 arbf = forAll $ liftM3 (,,) arbf arbf arbf forAll4 arbf = forAll $ liftM4 (,,,) arbf arbf arbf arbf forAll5 arbf = forAll $ liftM5 (,,,,) arbf arbf arbf arbf arbf -- | Property tested for unboxed random arrays with a given shape. forAllUShaped sh = forAll $ arbitraryUShaped sh -- | Property tested for pair of unboxed random arrays with a given shape. forAll2UShaped sh = forAll2 $ arbitraryUShaped sh -- | Property tested for triple of unboxed random arrays with a given shape. forAll3UShaped sh = forAll3 $ arbitraryUShaped sh -- | Property tested for quadruple of unboxed random arrays with a given shape. forAll4UShaped sh = forAll4 $ arbitraryUShaped sh -- | Property tested for 5-tuple of unboxed random arrays with a given shape. forAll5UShaped sh = forAll5 $ arbitraryUShaped sh -- | Property tested for unboxed random arrays with a given shape. forAllVShaped sh = forAll $ arbitraryVShaped sh -- | Property tested for pair of unboxed random arrays with a given shape. forAll2VShaped sh = forAll2 $ arbitraryVShaped sh -- | Property tested for triple of unboxed random arrays with a given shape. forAll3VShaped sh = forAll3 $ arbitraryVShaped sh -- | Property tested for quadruple of unboxed random arrays with a given shape. forAll4VShaped sh = forAll4 $ arbitraryVShaped sh -- | Property tested for 5-tuple of unboxed random arrays with a given shape. forAll5VShaped sh = forAll5 $ arbitraryVShaped sh repa-3.4.1.4/Data/Array/Repa/Eval.hs0000644000000000000000000001047612556111213015044 0ustar0000000000000000{-# LANGUAGE UndecidableInstances #-} -- | Low level interface to parallel array filling operators. module Data.Array.Repa.Eval ( -- * Element types Elt (..) -- * Parallel array filling , Target (..) , Load (..) , LoadRange (..) , fromList -- * Converting between representations , computeS, computeP, suspendedComputeP , copyS, copyP, suspendedCopyP , now -- * Chunked filling , fillLinearS , fillChunkedP , fillChunkedIOP -- * Interleaved filling , fillInterleavedP -- * Blockwise filling , fillBlock2P , fillBlock2S -- * Cursored blockwise filling , fillCursoredBlock2S , fillCursoredBlock2P -- * Chunked selection , selectChunkedS , selectChunkedP) where import Data.Array.Repa.Eval.Elt import Data.Array.Repa.Eval.Target import Data.Array.Repa.Eval.Load import Data.Array.Repa.Eval.Chunked import Data.Array.Repa.Eval.Interleaved import Data.Array.Repa.Eval.Cursored import Data.Array.Repa.Eval.Selection import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Base import Data.Array.Repa.Shape import System.IO.Unsafe -- | Parallel computation of array elements. -- -- * The source array must have a delayed representation like `D`, `C` or `P`, -- and the result a manifest representation like `U` or `F`. -- -- * If you want to copy data between manifest representations then use -- `copyP` instead. -- -- * If you want to convert a manifest array back to a delayed representation -- then use `delay` instead. -- computeP :: ( Load r1 sh e , Target r2 e, Source r2 e, Monad m) => Array r1 sh e -> m (Array r2 sh e) computeP arr = now $ suspendedComputeP arr {-# INLINE [4] computeP #-} -- | Sequential computation of array elements. computeS :: (Load r1 sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e computeS arr1 = arr1 `deepSeqArray` unsafePerformIO $ do mvec2 <- newMVec (size $ extent arr1) loadS arr1 mvec2 unsafeFreezeMVec (extent arr1) mvec2 {-# INLINE [4] computeS #-} -- | Suspended parallel computation of array elements. -- -- This version creates a thunk that will evaluate the array on demand. -- If you force it when another parallel computation is already running -- then you will get a runtime warning and evaluation will be sequential. -- Use `deepSeqArray` and `now` to ensure that each array is evaluated -- before proceeding to the next one. -- -- If unsure then just use the monadic version `computeP`. This one ensures -- that each array is fully evaluated before continuing. -- suspendedComputeP :: (Load r1 sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e suspendedComputeP arr1 = arr1 `deepSeqArray` unsafePerformIO $ do mvec2 <- newMVec (size $ extent arr1) loadP arr1 mvec2 unsafeFreezeMVec (extent arr1) mvec2 {-# INLINE [4] suspendedComputeP #-} -- | Parallel copying of arrays. -- -- * This is a wrapper that delays an array before calling `computeP`. -- -- * You can use it to copy manifest arrays between representations. -- copyP :: ( Source r1 e, Source r2 e , Load D sh e, Target r2 e , Monad m) => Array r1 sh e -> m (Array r2 sh e) copyP arr = now $ suspendedCopyP arr {-# INLINE [4] copyP #-} -- | Sequential copying of arrays. copyS :: ( Source r1 e , Load D sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e copyS arr1 = computeS $ delay arr1 {-# INLINE [4] copyS #-} -- | Suspended parallel copy of array elements. suspendedCopyP :: ( Source r1 e , Load D sh e, Target r2 e) => Array r1 sh e -> Array r2 sh e suspendedCopyP arr1 = suspendedComputeP $ delay arr1 {-# INLINE [4] suspendedCopyP #-} -- | Monadic version of `deepSeqArray`. -- -- Forces an suspended array computation to be completed at this point -- in a monadic computation. -- -- @ do let arr2 = suspendedComputeP arr1 -- ... -- arr3 <- now $ arr2 -- ... -- @ -- now :: (Shape sh, Source r e, Monad m) => Array r sh e -> m (Array r sh e) now arr = do arr `deepSeqArray` return () return arr {-# INLINE [4] now #-} repa-3.4.1.4/Data/Array/Repa/Index.hs0000644000000000000000000001111412556111213015212 0ustar0000000000000000{-# LANGUAGE TypeOperators, FlexibleInstances, ScopedTypeVariables #-} -- | Index types. module Data.Array.Repa.Index ( -- * Index types Z (..) , (:.) (..) -- * Common dimensions. , DIM0, DIM1, DIM2, DIM3, DIM4, DIM5 , ix1, ix2, ix3, ix4, ix5) where import Data.Array.Repa.Shape import GHC.Base (quotInt, remInt) stage = "Data.Array.Repa.Index" -- | An index of dimension zero data Z = Z deriving (Show, Read, Eq, Ord) -- | Our index type, used for both shapes and indices. infixl 3 :. data tail :. head = !tail :. !head deriving (Show, Read, Eq, Ord) -- Common dimensions type DIM0 = Z type DIM1 = DIM0 :. Int type DIM2 = DIM1 :. Int type DIM3 = DIM2 :. Int type DIM4 = DIM3 :. Int type DIM5 = DIM4 :. Int -- | Helper for index construction. -- -- Use this instead of explicit constructors like @(Z :. (x :: Int))@. -- The this is sometimes needed to ensure that 'x' is constrained to -- be in @Int@. ix1 :: Int -> DIM1 ix1 x = Z :. x {-# INLINE ix1 #-} ix2 :: Int -> Int -> DIM2 ix2 y x = Z :. y :. x {-# INLINE ix2 #-} ix3 :: Int -> Int -> Int -> DIM3 ix3 z y x = Z :. z :. y :. x {-# INLINE ix3 #-} ix4 :: Int -> Int -> Int -> Int -> DIM4 ix4 a z y x = Z :. a :. z :. y :. x {-# INLINE ix4 #-} ix5 :: Int -> Int -> Int -> Int -> Int -> DIM5 ix5 b a z y x = Z :. b :. a :. z :. y :. x {-# INLINE ix5 #-} -- Shape ---------------------------------------------------------------------- instance Shape Z where {-# INLINE [1] rank #-} rank _ = 0 {-# INLINE [1] zeroDim #-} zeroDim = Z {-# INLINE [1] unitDim #-} unitDim = Z {-# INLINE [1] intersectDim #-} intersectDim _ _ = Z {-# INLINE [1] addDim #-} addDim _ _ = Z {-# INLINE [1] size #-} size _ = 1 {-# INLINE [1] sizeIsValid #-} sizeIsValid _ = True {-# INLINE [1] toIndex #-} toIndex _ _ = 0 {-# INLINE [1] fromIndex #-} fromIndex _ _ = Z {-# INLINE [1] inShapeRange #-} inShapeRange Z Z Z = True {-# NOINLINE listOfShape #-} listOfShape _ = [] {-# NOINLINE shapeOfList #-} shapeOfList [] = Z shapeOfList _ = error $ stage ++ ".fromList: non-empty list when converting to Z." {-# INLINE deepSeq #-} deepSeq Z x = x instance Shape sh => Shape (sh :. Int) where {-# INLINE [1] rank #-} rank (sh :. _) = rank sh + 1 {-# INLINE [1] zeroDim #-} zeroDim = zeroDim :. 0 {-# INLINE [1] unitDim #-} unitDim = unitDim :. 1 {-# INLINE [1] intersectDim #-} intersectDim (sh1 :. n1) (sh2 :. n2) = (intersectDim sh1 sh2 :. (min n1 n2)) {-# INLINE [1] addDim #-} addDim (sh1 :. n1) (sh2 :. n2) = addDim sh1 sh2 :. (n1 + n2) {-# INLINE [1] size #-} size (sh1 :. n) = size sh1 * n {-# INLINE [1] sizeIsValid #-} sizeIsValid (sh1 :. n) | size sh1 > 0 = n <= maxBound `div` size sh1 | otherwise = False {-# INLINE [1] toIndex #-} toIndex (sh1 :. sh2) (sh1' :. sh2') = toIndex sh1 sh1' * sh2 + sh2' {-# INLINE [1] fromIndex #-} fromIndex (ds :. d) n = fromIndex ds (n `quotInt` d) :. r where -- If we assume that the index is in range, there is no point -- in computing the remainder for the highest dimension since -- n < d must hold. This saves one remInt per element access which -- is quite a big deal. r | rank ds == 0 = n | otherwise = n `remInt` d {-# INLINE [1] inShapeRange #-} inShapeRange (zs :. z) (sh1 :. n1) (sh2 :. n2) = (n2 >= z) && (n2 < n1) && (inShapeRange zs sh1 sh2) {-# NOINLINE listOfShape #-} listOfShape (sh :. n) = n : listOfShape sh {-# NOINLINE shapeOfList #-} shapeOfList xx = case xx of [] -> error $ stage ++ ".toList: empty list when converting to (_ :. Int)" x:xs -> shapeOfList xs :. x {-# INLINE deepSeq #-} deepSeq (sh :. n) x = deepSeq sh (n `seq` x) repa-3.4.1.4/Data/Array/Repa/Shape.hs0000644000000000000000000000507412556111213015213 0ustar0000000000000000{-# LANGUAGE RankNTypes #-} -- | Class of types that can be used as array shapes and indices. module Data.Array.Repa.Shape ( Shape(..) , inShape , showShape ) where -- Shape ---------------------------------------------------------------------- -- | Class of types that can be used as array shapes and indices. class Eq sh => Shape sh where -- | Get the number of dimensions in a shape. rank :: sh -> Int -- | The shape of an array of size zero, with a particular dimensionality. zeroDim :: sh -- | The shape of an array with size one, with a particular dimensionality. unitDim :: sh -- | Compute the intersection of two shapes. intersectDim :: sh -> sh -> sh -- | Add the coordinates of two shapes componentwise addDim :: sh -> sh -> sh -- | Get the total number of elements in an array with this shape. size :: sh -> Int -- | Check whether this shape is small enough so that its flat -- indices an be represented as `Int`. If this returns `False` then your -- array is too big. Mostly used for writing QuickCheck tests. sizeIsValid :: sh -> Bool -- | Convert an index into its equivalent flat, linear, row-major version. toIndex :: sh -- ^ Shape of the array. -> sh -- ^ Index into the array. -> Int -- | Inverse of `toIndex`. fromIndex :: sh -- ^ Shape of the array. -> Int -- ^ Index into linear representation. -> sh -- | Check whether an index is within a given shape. inShapeRange :: sh -- ^ Start index for range. -> sh -- ^ Final index for range. -> sh -- ^ Index to check for. -> Bool -- | Convert a shape into its list of dimensions. listOfShape :: sh -> [Int] -- | Convert a list of dimensions to a shape shapeOfList :: [Int] -> sh -- | Ensure that a shape is completely evaluated. infixr 0 `deepSeq` deepSeq :: sh -> a -> a -- | Check whether an index is a part of a given shape. inShape :: forall sh . Shape sh => sh -- ^ Shape of the array. -> sh -- ^ Index. -> Bool {-# INLINE inShape #-} inShape sh ix = inShapeRange zeroDim sh ix -- | Nicely format a shape as a string showShape :: Shape sh => sh -> String showShape = foldr (\sh str -> str ++ " :. " ++ show sh) "Z" . listOfShape repa-3.4.1.4/Data/Array/Repa/Slice.hs0000644000000000000000000000462512556111213015213 0ustar0000000000000000{-# LANGUAGE TypeFamilies, TypeOperators, FlexibleInstances #-} -- | Index space transformation between arrays and slices. module Data.Array.Repa.Slice ( All (..) , Any (..) , FullShape , SliceShape , Slice (..)) where import Data.Array.Repa.Index import Prelude hiding (replicate, drop) -- | Select all indices at a certain position. data All = All -- | Place holder for any possible shape. data Any sh = Any -- | Map a type of the index in the full shape, to the type of the index in the slice. type family FullShape ss type instance FullShape Z = Z type instance FullShape (Any sh) = sh type instance FullShape (sl :. Int) = FullShape sl :. Int type instance FullShape (sl :. All) = FullShape sl :. Int -- | Map the type of an index in the slice, to the type of the index in the full shape. type family SliceShape ss type instance SliceShape Z = Z type instance SliceShape (Any sh) = sh type instance SliceShape (sl :. Int) = SliceShape sl type instance SliceShape (sl :. All) = SliceShape sl :. Int -- | Class of index types that can map to slices. class Slice ss where -- | Map an index of a full shape onto an index of some slice. sliceOfFull :: ss -> FullShape ss -> SliceShape ss -- | Map an index of a slice onto an index of the full shape. fullOfSlice :: ss -> SliceShape ss -> FullShape ss instance Slice Z where {-# INLINE [1] sliceOfFull #-} sliceOfFull _ _ = Z {-# INLINE [1] fullOfSlice #-} fullOfSlice _ _ = Z instance Slice (Any sh) where {-# INLINE [1] sliceOfFull #-} sliceOfFull _ sh = sh {-# INLINE [1] fullOfSlice #-} fullOfSlice _ sh = sh instance Slice sl => Slice (sl :. Int) where {-# INLINE [1] sliceOfFull #-} sliceOfFull (fsl :. _) (ssl :. _) = sliceOfFull fsl ssl {-# INLINE [1] fullOfSlice #-} fullOfSlice (fsl :. n) ssl = fullOfSlice fsl ssl :. n instance Slice sl => Slice (sl :. All) where {-# INLINE [1] sliceOfFull #-} sliceOfFull (fsl :. All) (ssl :. s) = sliceOfFull fsl ssl :. s {-# INLINE [1] fullOfSlice #-} fullOfSlice (fsl :. All) (ssl :. s) = fullOfSlice fsl ssl :. s repa-3.4.1.4/Data/Array/Repa/Stencil.hs0000644000000000000000000000110112556111213015537 0ustar0000000000000000{-# LANGUAGE MagicHash, PatternGuards, BangPatterns, TemplateHaskell, QuasiQuotes, ParallelListComp, TypeOperators, ExplicitForAll, ScopedTypeVariables #-} {-# OPTIONS -Wnot #-} -- | Efficient computation of stencil based convolutions. -- module Data.Array.Repa.Stencil ( Stencil (..) , Boundary (..) -- * Stencil creation. , makeStencil) where import Data.Array.Repa import Data.Array.Repa.Base import Data.Array.Repa.Stencil.Base import Data.Array.Repa.Stencil.Template import Data.Array.Repa.Specialised.Dim2 repa-3.4.1.4/Data/Array/Repa/Unsafe.hs0000644000000000000000000000057412556111213015374 0ustar0000000000000000 -- | Functions without sanity or bounds checks. module Data.Array.Repa.Unsafe ( unsafeBackpermute , unsafeBackpermuteDft , unsafeSlice , unsafeExtend , unsafeTraverse , unsafeTraverse2 , unsafeTraverse3 , unsafeTraverse4) where import Data.Array.Repa.Operators.IndexSpace import Data.Array.Repa.Operators.Traversal repa-3.4.1.4/Data/Array/Repa.hs0000644000000000000000000001657412556111213014162 0ustar0000000000000000{-# OPTIONS -fno-warn-unused-imports #-} -- | Repa arrays are wrappers around a linear structure that holds the element -- data. -- -- The representation tag determines what structure holds the data. -- -- Delayed Representations (functions that compute elements) -- -- * `D` -- Functions from indices to elements. -- -- * `C` -- Cursor functions. -- -- Manifest Representations (real data) -- -- * `U` -- Adaptive unboxed vectors. -- -- * `V` -- Boxed vectors. -- -- * `B` -- Strict ByteStrings. -- -- * `F` -- Foreign memory buffers. -- -- Meta Representations -- -- * `P` -- Arrays that are partitioned into several representations. -- -- * `S` -- Hints that computing this array is a small amount of work, -- so computation should be sequential rather than parallel to avoid -- scheduling overheads. -- -- * `I` -- Hints that computing this array will be an unbalanced workload, -- so computation of successive elements should be interleaved between -- the processors -- -- * `X` -- Arrays whose elements are all undefined. -- -- Array fusion is achieved via the delayed (`D`) and cursored (`C`) -- representations. At compile time, the GHC simplifier combines the functions -- contained within `D` and `C` arrays without needing to create manifest -- intermediate arrays. -- -- -- /Advice for writing fast code:/ -- -- 1. Repa does not support nested parallellism. -- This means that you cannot `map` a parallel worker function across -- an array and then call `computeP` to evaluate it, or pass a parallel -- worker to parallel reductions such as `foldP`. If you do then you will -- get a run-time warning and the code will run very slowly. -- -- 2. Arrays of type @(Array D sh a)@ or @(Array C sh a)@ are /not real arrays/. -- They are represented as functions that compute each element on demand. -- You need to use `computeS`, `computeP`, `computeUnboxedP` -- and so on to actually evaluate the elements. -- -- 3. Add @INLINE@ pragmas to all leaf-functions in your code, expecially ones -- that compute numeric results. Non-inlined lazy function calls can cost -- upwards of 50 cycles each, while each numeric operator only costs one (or less). -- Inlining leaf functions also ensures they are specialised at the appropriate -- numeric types. -- -- 4. Add bang patterns to all function arguments, and all fields of your data -- types. In a high-performance Haskell program, the cost of lazy evaluation -- can easily dominate the run time if not handled correctly. You don't want -- to rely on the strictness analyser in numeric code because if it does not -- return a perfect result then the performance of your program will be awful. -- This is less of a problem for general Haskell code, and in a different -- context relying on strictness analysis is fine. -- -- 5. Scheduling an 8-thread parallel computation can take 50us on a Linux machine. -- You should switch to sequential evaluation functions like `computeS` and -- `foldS` for small arrays in inner loops, and at the bottom of a -- divide-and-conquer algorithm. Consider using a `computeP` that evaluates -- an array defined using `computeS` or `foldS` for each element. -- -- 6. Compile the modules that use Repa with the following flags: -- @-Odph -rtsopts -threaded@ -- @-fno-liberate-case -funfolding-use-threshold1000 -funfolding-keeness-factor1000@ -- @-fllvm -optlo-O3@ -- You don't want the liberate-case transform because it tends to duplicate -- too much intermediate code, and is not needed if you use bang patterns -- as per point 4. The unfolding flags tell the inliner to not to fool around with -- heuristics, and just inline everything. If the binaries become too big then -- split the array part of your program into separate modules and only compile -- those with the unfolding flags. -- -- 7. Repa writes to the GHC eventlog at the start and end of each parallel computation. -- Use threadscope to see what your program is doing. -- -- 8. When you're sure your program works, switch to the unsafe versions -- of functions like `traverse`. These don't do bounds checks. -- -- /Changes for Repa 3.2:/ -- -- 1. Renamed some Repa 3.1 type classes to have more intuitive names: -- `Repr` -> `Source`, `Fill` -> `Load`, `Fillable` -> `Target`, `Combine` -> `Structured`. -- -- 2. Also renamed `MArray` -> `MVec` to emphasise its linear structure. -- -- 3. Made `Array` and `MVec` associated types of `Source` and `Target` respectively. -- -- 4. Added the `S` (Smallness) and `I` (Interleave) hints. -- module Data.Array.Repa ( -- * Abstract array representation module Data.Array.Repa.Shape , module Data.Array.Repa.Index , Array (..) , Source(..), (!), toList , deepSeqArrays -- * Computation , computeP, computeS , copyP, copyS -- * Concrete array representations -- ** Delayed representation , D, fromFunction, toFunction , delay -- ** Unboxed vector representation , U , computeUnboxedP, computeUnboxedS , fromListUnboxed , fromUnboxed , toUnboxed -- from Data.Array.Repa.Operators.IndexSpace ---------------- -- * Operators -- ** Index space transformations , reshape , append, (++) , extract , transpose , backpermute , backpermuteDft -- ** Slice transformations , module Data.Array.Repa.Slice , slice , extend -- from Data.Array.Repa.Operators.Mapping ------------------- -- ** Structure preserving operations , map , zipWith , (+^), (-^), (*^), (/^) , Structured(..) -- from Data.Array.Repa.Operators.Traversal ------------------ -- ** Generic traversal , traverse , traverse2 , traverse3 , traverse4 -- from Data.Array.Repa.Operators.Interleave ----------------- -- ** Interleaving , interleave2 , interleave3 , interleave4 -- from Data.Array.Repa.Operators.Reduction ------------------ -- ** Reduction , foldP, foldS , foldAllP, foldAllS , sumP, sumS , sumAllP, sumAllS , equalsP, equalsS -- from Data.Array.Repa.Operators.Selection ------------------ -- ** Selection , selectP) where import Data.Array.Repa.Base import Data.Array.Repa.Shape import Data.Array.Repa.Index import Data.Array.Repa.Slice import Data.Array.Repa.Eval import Data.Array.Repa.Repr.Delayed import Data.Array.Repa.Repr.Vector import Data.Array.Repa.Repr.Unboxed import Data.Array.Repa.Repr.ByteString import Data.Array.Repa.Repr.ForeignPtr import Data.Array.Repa.Repr.HintSmall import Data.Array.Repa.Repr.HintInterleave import Data.Array.Repa.Repr.Cursored import Data.Array.Repa.Repr.Partitioned import Data.Array.Repa.Repr.Undefined () import Data.Array.Repa.Operators.Mapping import Data.Array.Repa.Operators.Traversal import Data.Array.Repa.Operators.IndexSpace import Data.Array.Repa.Operators.Interleave import Data.Array.Repa.Operators.Reduction import Data.Array.Repa.Operators.Selection import Data.Array.Repa.Arbitrary () import Prelude () repa-3.4.1.4/Data/Array/Repa/Eval/Chunked.hs0000644000000000000000000001441212556111213016417 0ustar0000000000000000{-# LANGUAGE MagicHash #-} -- | Evaluate an array by breaking it up into linear chunks and filling -- each chunk in parallel. module Data.Array.Repa.Eval.Chunked ( fillLinearS , fillBlock2S , fillChunkedP , fillChunkedIOP) where import Data.Array.Repa.Index import Data.Array.Repa.Eval.Gang import GHC.Exts import Prelude as P ------------------------------------------------------------------------------- -- | Fill something sequentially. -- -- * The array is filled linearly from start to finish. -- fillLinearS :: Int -- ^ Number of elements. -> (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (Int -> a) -- ^ Fn to get the value at a given index. -> IO () fillLinearS !(I# len) write getElem = fill 0# where fill !ix | 1# <- ix >=# len = return () | otherwise = do write (I# ix) (getElem (I# ix)) fill (ix +# 1#) {-# INLINE [0] fillLinearS #-} ------------------------------------------------------------------------------- -- | Fill a block in a rank-2 array, sequentially. -- -- * Blockwise filling can be more cache-efficient than linear filling for -- rank-2 arrays. -- -- * The block is filled in row major order from top to bottom. -- fillBlock2S :: (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (DIM2 -> a) -- ^ Fn to get the value at the given index. -> Int# -- ^ Width of the whole array. -> Int# -- ^ x0 lower left corner of block to fill. -> Int# -- ^ y0 -> Int# -- ^ w0 width of block to fill -> Int# -- ^ h0 height of block to fill -> IO () fillBlock2S write getElem !imageWidth !x0 !y0 !w0 h0 = do fillBlock y0 ix0 where !x1 = x0 +# w0 !y1 = y0 +# h0 !ix0 = x0 +# (y0 *# imageWidth) {-# INLINE fillBlock #-} fillBlock !y !ix | 1# <- y >=# y1 = return () | otherwise = do fillLine1 x0 ix fillBlock (y +# 1#) (ix +# imageWidth) where {-# INLINE fillLine1 #-} fillLine1 !x !ix' | 1# <- x >=# x1 = return () | otherwise = do write (I# ix') (getElem (Z :. (I# y) :. (I# x))) fillLine1 (x +# 1#) (ix' +# 1#) {-# INLINE [0] fillBlock2S #-} ------------------------------------------------------------------------------- -- | Fill something in parallel. -- -- * The array is split into linear chunks, -- and each thread linearly fills one chunk. -- fillChunkedP :: Int -- ^ Number of elements. -> (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (Int -> a) -- ^ Fn to get the value at a given index. -> IO () fillChunkedP !(I# len) write getElem = gangIO theGang $ \(I# thread) -> let !start = splitIx thread !end = splitIx (thread +# 1#) in fill start end where -- Decide now to split the work across the threads. -- If the length of the vector doesn't divide evenly among the threads, -- then the first few get an extra element. !(I# threads) = gangSize theGang !chunkLen = len `quotInt#` threads !chunkLeftover = len `remInt#` threads {-# INLINE splitIx #-} splitIx thread | 1# <- thread <# chunkLeftover = thread *# (chunkLen +# 1#) | otherwise = thread *# chunkLen +# chunkLeftover -- Evaluate the elements of a single chunk. {-# INLINE fill #-} fill !ix !end | 1# <- ix >=# end = return () | otherwise = do write (I# ix) (getElem (I# ix)) fill (ix +# 1#) end {-# INLINE [0] fillChunkedP #-} ------------------------------------------------------------------------------- -- | Fill something in parallel, using a separate IO action for each thread. -- -- * The array is split into linear chunks, -- and each thread linearly fills one chunk. -- fillChunkedIOP :: Int -- ^ Number of elements. -> (Int -> a -> IO ()) -- ^ Update fn to write into result buffer. -> (Int -> IO (Int -> IO a)) -- ^ Create a fn to get the value at a given index. -- The first `Int` is the thread number, so you can do some -- per-thread initialisation. -> IO () fillChunkedIOP !(I# len) write mkGetElem = gangIO theGang $ \(I# thread) -> let !start = splitIx thread !end = splitIx (thread +# 1#) in fillChunk thread start end where -- Decide now to split the work across the threads. -- If the length of the vector doesn't divide evenly among the threads, -- then the first few get an extra element. !(I# threads) = gangSize theGang !chunkLen = len `quotInt#` threads !chunkLeftover = len `remInt#` threads {-# INLINE splitIx #-} splitIx thread | 1# <- thread <# chunkLeftover = thread *# (chunkLen +# 1#) | otherwise = thread *# chunkLen +# chunkLeftover -- Given the threadId, starting and ending indices. -- Make a function to get each element for this chunk -- and call it for every index. {-# INLINE fillChunk #-} fillChunk !thread !ixStart !ixEnd = do getElem <- mkGetElem (I# thread) fill getElem ixStart ixEnd -- Call the provided getElem function for every element -- in a chunk, and feed the result to the write function. {-# INLINE fill #-} fill !getElem !ix0 !end = go ix0 where go !ix | 1# <- ix >=# end = return () | otherwise = do x <- getElem (I# ix) write (I# ix) x go (ix +# 1#) {-# INLINE [0] fillChunkedIOP #-} repa-3.4.1.4/Data/Array/Repa/Eval/Cursored.hs0000644000000000000000000002114613053313255016631 0ustar0000000000000000{-# LANGUAGE MagicHash #-} -- | Evaluate an array by dividing it into rectangular blocks and filling -- each block in parallel. module Data.Array.Repa.Eval.Cursored ( fillBlock2P , fillCursoredBlock2P , fillCursoredBlock2S ) where import Data.Array.Repa.Index import Data.Array.Repa.Shape import Data.Array.Repa.Eval.Elt import Data.Array.Repa.Eval.Gang import GHC.Base -- Non-cursored interface ----------------------------------------------------- -- | Fill a block in a rank-2 array in parallel. -- -- * Blockwise filling can be more cache-efficient than linear filling for -- rank-2 arrays. -- -- * Coordinates given are of the filled edges of the block. -- -- * We divide the block into columns, and give one column to each thread. -- -- * Each column is filled in row major order from top to bottom. -- fillBlock2P :: Elt a => (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (DIM2 -> a) -- ^ Function to evaluate the element at an index. -> Int# -- ^ Width of the whole array. -> Int# -- ^ x0 lower left corner of block to fill -> Int# -- ^ y0 -> Int# -- ^ w0 width of block to fill. -> Int# -- ^ h0 height of block to fill. -> IO () {-# INLINE [0] fillBlock2P #-} fillBlock2P write getElem !imageWidth !x0 !y0 !w0 h0 = fillCursoredBlock2P write id addDim getElem imageWidth x0 y0 w0 h0 {- -- | Fill a block in a rank-2 array sequentially. -- -- * Blockwise filling can be more cache-efficient than linear filling for -- rank-2 arrays. -- -- * Coordinates given are of the filled edges of the block. -- -- * The block is filled in row major order from top to bottom. -- fillBlock2S :: Elt a => (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (DIM2 -> a) -- ^ Function to evaluate the element at an index. -> Int# -- ^ Width of the whole array. -> Int# -- ^ x0 lower left corner of block to fill -> Int# -- ^ y0 -> Int# -- ^ w0 width of block to fill -> Int# -- ^ h0 height of block to filll -> IO () {-# INLINE [0] fillBlock2S #-} fillBlock2S write getElem !imageWidth !x0 !y0 !w0 !h0 = fillCursoredBlock2S write id addDim getElem imageWidth x0 y0 w0 h0 -} -- Block filling ---------------------------------------------------------------------------------- -- | Fill a block in a rank-2 array in parallel. -- -- * Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays. -- -- * Using cursor functions can help to expose inter-element indexing computations to -- the GHC and LLVM optimisers. -- -- * Coordinates given are of the filled edges of the block. -- -- * We divide the block into columns, and give one column to each thread. -- -- * Each column is filled in row major order from top to bottom. -- fillCursoredBlock2P :: Elt a => (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (DIM2 -> cursor) -- ^ Make a cursor to a particular element. -> (DIM2 -> cursor -> cursor) -- ^ Shift the cursor by an offset. -> (cursor -> a) -- ^ Function to evaluate the element at an index. -> Int# -- ^ Width of the whole array. -> Int# -- ^ x0 lower left corner of block to fill -> Int# -- ^ y0 -> Int# -- ^ w0 width of block to fill -> Int# -- ^ h0 height of block to fill -> IO () {-# INLINE [0] fillCursoredBlock2P #-} fillCursoredBlock2P write makeCursorFCB shiftCursorFCB getElemFCB !imageWidth !x0 !y0 !w0 !h0 = gangIO theGang fillBlock where !(I# threads) = gangSize theGang -- All columns have at least this many pixels. !colChunkLen = w0 `quotInt#` threads -- Extra pixels that we have to divide between some of the threads. !colChunkSlack = w0 `remInt#` threads -- Get the starting pixel of a column in the image. {-# INLINE colIx #-} colIx !ix | 1# <- ix <# colChunkSlack = x0 +# (ix *# (colChunkLen +# 1#)) | otherwise = x0 +# (ix *# colChunkLen) +# colChunkSlack -- Give one column to each thread {-# INLINE fillBlock #-} fillBlock :: Int -> IO () fillBlock !(I# ix) = let !x0' = colIx ix !w0' = colIx (ix +# 1#) -# x0' !y0' = y0 !h0' = h0 in fillCursoredBlock2S write makeCursorFCB shiftCursorFCB getElemFCB imageWidth x0' y0' w0' h0' -- | Fill a block in a rank-2 array, sequentially. -- -- * Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays. -- -- * Using cursor functions can help to expose inter-element indexing computations to -- the GHC and LLVM optimisers. -- -- * Coordinates given are of the filled edges of the block. -- -- * The block is filled in row major order from top to bottom. -- fillCursoredBlock2S :: Elt a => (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (DIM2 -> cursor) -- ^ Make a cursor to a particular element. -> (DIM2 -> cursor -> cursor) -- ^ Shift the cursor by an offset. -> (cursor -> a) -- ^ Function to evaluate an element at the given index. -> Int# -- ^ Width of the whole array. -> Int# -- ^ x0 lower left corner of block to fill. -> Int# -- ^ y0 -> Int# -- ^ w0 width of block to fill -> Int# -- ^ h0 height of block to fill -> IO () {-# INLINE [0] fillCursoredBlock2S #-} fillCursoredBlock2S write makeCursor shiftCursor getElem !imageWidth !x0 !y0 !w0 h0 = do fillBlock y0 where !x1 = x0 +# w0 !y1 = y0 +# h0 {-# INLINE fillBlock #-} fillBlock !y | 1# <- y >=# y1 = return () | otherwise = do fillLine4 x0 fillBlock (y +# 1#) where {-# INLINE fillLine4 #-} fillLine4 !x | 1# <- x +# 4# >=# x1 = fillLine1 x | otherwise = do -- Compute each source cursor based on the previous one so that -- the variable live ranges in the generated code are shorter. let srcCur0 = makeCursor (Z :. (I# y) :. (I# x)) let srcCur1 = shiftCursor (Z :. 0 :. 1) srcCur0 let srcCur2 = shiftCursor (Z :. 0 :. 1) srcCur1 let srcCur3 = shiftCursor (Z :. 0 :. 1) srcCur2 -- Get the result value for each cursor. let val0 = getElem srcCur0 let val1 = getElem srcCur1 let val2 = getElem srcCur2 let val3 = getElem srcCur3 -- Ensure that we've computed each of the result values before we -- write into the array. If the backend code generator can't tell -- our destination array doesn't alias with the source then writing -- to it can prevent the sharing of intermediate computations. touch val0 touch val1 touch val2 touch val3 -- Compute cursor into destination array. let !dstCur0 = x +# (y *# imageWidth) write (I# dstCur0) val0 write (I# (dstCur0 +# 1#)) val1 write (I# (dstCur0 +# 2#)) val2 write (I# (dstCur0 +# 3#)) val3 fillLine4 (x +# 4#) {-# INLINE fillLine1 #-} fillLine1 !x | 1# <- x >=# x1 = return () | otherwise = do let val0 = (getElem $ makeCursor (Z :. (I# y) :. (I# x))) write (I# (x +# (y *# imageWidth))) val0 fillLine1 (x +# 1#) repa-3.4.1.4/Data/Array/Repa/Eval/Interleaved.hs0000644000000000000000000000355612556111213017307 0ustar0000000000000000{-# LANGUAGE MagicHash #-} -- | Evaluate an array in parallel in an interleaved fashion, -- with each by having each processor computing alternate elements. module Data.Array.Repa.Eval.Interleaved ( fillInterleavedP) where import Data.Array.Repa.Eval.Gang import GHC.Exts import Prelude as P -- | Fill something in parallel. -- -- * The array is split into linear chunks and each thread fills one chunk. -- fillInterleavedP :: Int -- ^ Number of elements. -> (Int -> a -> IO ()) -- ^ Update function to write into result buffer. -> (Int -> a) -- ^ Fn to get the value at a given index. -> IO () {-# INLINE [0] fillInterleavedP #-} fillInterleavedP !(I# len) write getElem = gangIO theGang $ \(I# thread) -> let !step = threads !start = thread !count = elemsForThread thread in fill step start count where -- Decide now to split the work across the threads. !(I# threads) = gangSize theGang -- All threads get this many elements. !chunkLenBase = len `quotInt#` threads -- Leftover elements to divide between first few threads. !chunkLenSlack = len `remInt#` threads -- How many elements to compute with this thread. elemsForThread thread | 1# <- thread <# chunkLenSlack = chunkLenBase +# 1# | otherwise = chunkLenBase {-# INLINE elemsForThread #-} -- Evaluate the elements of a single chunk. fill !step !ix0 !count0 = go ix0 count0 where go !ix !count | 1# <- count <=# 0# = return () | otherwise = do write (I# ix) (getElem (I# ix)) go (ix +# step) (count -# 1#) {-# INLINE fill #-} repa-3.4.1.4/Data/Array/Repa/Eval/Elt.hs0000644000000000000000000001631512556111213015566 0ustar0000000000000000-- | Values that can be stored in Repa Arrays. {-# LANGUAGE MagicHash, UnboxedTuples, TypeSynonymInstances, FlexibleInstances #-} {-# LANGUAGE DefaultSignatures, FlexibleContexts, TypeOperators #-} module Data.Array.Repa.Eval.Elt (Elt (..)) where import GHC.Prim import GHC.Exts import GHC.Types import GHC.Word import GHC.Int import GHC.Generics -- Note that the touch# function is special because we can pass it boxed or unboxed -- values. The argument type has kind ?, not just * or #. -- | Element types that can be used with the blockwise filling functions. -- -- This class is mainly used to define the `touch` method. This is used internally -- in the imeplementation of Repa to prevent let-binding from being floated -- inappropriately by the GHC simplifier. Doing a `seq` sometimes isn't enough, -- because the GHC simplifier can erase these, and still move around the bindings. -- class Elt a where -- | Place a demand on a value at a particular point in an IO computation. touch :: a -> IO () default touch :: (Generic a, GElt (Rep a)) => a -> IO () touch = gtouch . from {-# INLINE touch #-} -- | Generic zero value, helpful for debugging. zero :: a default zero :: (Generic a, GElt (Rep a)) => a zero = to gzero {-# INLINE zero #-} -- | Generic one value, helpful for debugging. one :: a default one :: (Generic a, GElt (Rep a)) => a one = to gone {-# INLINE one #-} class GElt f where -- | Generic version of touch gtouch :: f a -> IO () -- | Generic version of zero gzero :: f a -- | Generic version of gone gone :: f a -- Generic Definition ---------------------------------------------------------- instance GElt U1 where gtouch _ = return () {-# INLINE gtouch #-} gzero = U1 {-# INLINE gzero #-} gone = U1 {-# INLINE gone #-} instance (GElt a, GElt b) => GElt (a :*: b) where gtouch (x :*: y) = gtouch x >> gtouch y {-# INLINE gtouch #-} gzero = gzero :*: gzero {-# INLINE gzero #-} gone = gone :*: gone {-# INLINE gone #-} instance (GElt a, GElt b) => GElt (a :+: b) where gtouch (L1 x) = gtouch x gtouch (R1 x) = gtouch x {-# INLINE gtouch #-} gzero = L1 gzero {-# INLINE gzero #-} gone = R1 gone {-# INLINE gone #-} instance (GElt a) => GElt (M1 i c a) where gtouch (M1 x) = gtouch x {-# INLINE gtouch #-} gzero = M1 gzero {-# INLINE gzero #-} gone = M1 gone {-# INLINE gone #-} instance (Elt a) => GElt (K1 i a) where gtouch (K1 x) = touch x {-# INLINE gtouch #-} gzero = K1 zero {-# INLINE gzero #-} gone = K1 one {-# INLINE gone #-} -- Bool ----------------------------------------------------------------------- instance Elt Bool where {-# INLINE touch #-} touch b = IO (\state -> case touch# b state of state' -> (# state', () #)) {-# INLINE zero #-} zero = False {-# INLINE one #-} one = True -- Floating ------------------------------------------------------------------- instance Elt Float where {-# INLINE touch #-} touch (F# f) = IO (\state -> case touch# f state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Double where {-# INLINE touch #-} touch (D# d) = IO (\state -> case touch# d state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 -- Int ------------------------------------------------------------------------ instance Elt Int where {-# INLINE touch #-} touch (I# i) = IO (\state -> case touch# i state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Int8 where {-# INLINE touch #-} touch (I8# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Int16 where {-# INLINE touch #-} touch (I16# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Int32 where {-# INLINE touch #-} touch (I32# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Int64 where {-# INLINE touch #-} touch (I64# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 -- Word ----------------------------------------------------------------------- instance Elt Word where {-# INLINE touch #-} touch (W# i) = IO (\state -> case touch# i state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Word8 where {-# INLINE touch #-} touch (W8# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Word16 where {-# INLINE touch #-} touch (W16# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Word32 where {-# INLINE touch #-} touch (W32# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 instance Elt Word64 where {-# INLINE touch #-} touch (W64# w) = IO (\state -> case touch# w state of state' -> (# state', () #)) {-# INLINE zero #-} zero = 0 {-# INLINE one #-} one = 1 -- Tuple ---------------------------------------------------------------------- instance (Elt a, Elt b) => Elt (a, b) where {-# INLINE touch #-} touch (a, b) = do touch a touch b {-# INLINE zero #-} zero = (zero, zero) {-# INLINE one #-} one = (one, one) instance (Elt a, Elt b, Elt c) => Elt (a, b, c) where {-# INLINE touch #-} touch (a, b, c) = do touch a touch b touch c {-# INLINE zero #-} zero = (zero, zero, zero) {-# INLINE one #-} one = (one, one, one) instance (Elt a, Elt b, Elt c, Elt d) => Elt (a, b, c, d) where {-# INLINE touch #-} touch (a, b, c, d) = do touch a touch b touch c touch d {-# INLINE zero #-} zero = (zero, zero, zero, zero) {-# INLINE one #-} one = (one, one, one, one) instance (Elt a, Elt b, Elt c, Elt d, Elt e) => Elt (a, b, c, d, e) where {-# INLINE touch #-} touch (a, b, c, d, e) = do touch a touch b touch c touch d touch e {-# INLINE zero #-} zero = (zero, zero, zero, zero, zero) {-# INLINE one #-} one = (one, one, one, one, one) instance (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f) => Elt (a, b, c, d, e, f) where {-# INLINE touch #-} touch (a, b, c, d, e, f) = do touch a touch b touch c touch d touch e touch f {-# INLINE zero #-} zero = (zero, zero, zero, zero, zero, zero) {-# INLINE one #-} one = (one, one, one, one, one, one) repa-3.4.1.4/Data/Array/Repa/Eval/Target.hs0000644000000000000000000000273112556111213016265 0ustar0000000000000000 module Data.Array.Repa.Eval.Target ( Target (..) , fromList) where import Data.Array.Repa.Base import Data.Array.Repa.Shape import Control.Monad import System.IO.Unsafe -- Target --------------------------------------------------------------------- -- | Class of manifest array representations that can be constructed in parallel. class Target r e where -- | Mutable version of the representation. data MVec r e -- | Allocate a new mutable array of the given size. newMVec :: Int -> IO (MVec r e) -- | Write an element into the mutable array. unsafeWriteMVec :: MVec r e -> Int -> e -> IO () -- | Freeze the mutable array into an immutable Repa array. unsafeFreezeMVec :: sh -> MVec r e -> IO (Array r sh e) -- | Ensure the strucure of a mutable array is fully evaluated. deepSeqMVec :: MVec r e -> a -> a -- | Ensure the array is still live at this point. -- Needed when the mutable array is a ForeignPtr with a finalizer. touchMVec :: MVec r e -> IO () -- | O(n). Construct a manifest array from a list. fromList :: (Shape sh, Target r e) => sh -> [e] -> Array r sh e fromList sh xx = unsafePerformIO $ do let len = length xx if len /= size sh then error "Data.Array.Repa.Eval.Fill.fromList: provide array shape does not match list length" else do mvec <- newMVec len zipWithM_ (unsafeWriteMVec mvec) [0..] xx unsafeFreezeMVec sh mvec repa-3.4.1.4/Data/Array/Repa/Eval/Load.hs0000644000000000000000000000253512556111213015720 0ustar0000000000000000 module Data.Array.Repa.Eval.Load ( Load (..) , LoadRange (..)) where import Data.Array.Repa.Eval.Target import Data.Array.Repa.Shape import Data.Array.Repa.Base -- Load ----------------------------------------------------------------------- -- | Compute all elements defined by an array and write them to a manifest -- target representation. -- -- Note that instances require that the source array to have a delayed -- representation such as `D` or `C`. If you want to use a pre-existing -- manifest array as the source then `delay` it first. class (Source r1 e, Shape sh) => Load r1 sh e where -- | Fill an entire array sequentially. loadS :: Target r2 e => Array r1 sh e -> MVec r2 e -> IO () -- | Fill an entire array in parallel. loadP :: Target r2 e => Array r1 sh e -> MVec r2 e -> IO () -- FillRange ------------------------------------------------------------------ -- | Compute a range of elements defined by an array and write them to a fillable -- representation. class (Source r1 e, Shape sh) => LoadRange r1 sh e where -- | Fill a range of an array sequentially. loadRangeS :: Target r2 e => Array r1 sh e -> MVec r2 e -> sh -> sh -> IO () -- | Fill a range of an array in parallel. loadRangeP :: Target r2 e => Array r1 sh e -> MVec r2 e -> sh -> sh -> IO () repa-3.4.1.4/Data/Array/Repa/Eval/Reduction.hs0000644000000000000000000001701113053313255016773 0ustar0000000000000000{-# LANGUAGE BangPatterns, MagicHash #-} module Data.Array.Repa.Eval.Reduction ( foldS, foldP , foldAllS, foldAllP) where import Data.Array.Repa.Eval.Gang import qualified Data.Vector.Unboxed as V import qualified Data.Vector.Unboxed.Mutable as M import GHC.Base ( quotInt, divInt ) import GHC.Exts -- | Sequential reduction of a multidimensional array along the innermost dimension. foldS :: V.Unbox a => M.IOVector a -- ^ vector to write elements into -> (Int# -> a) -- ^ function to get an element from the given index -> (a -> a -> a) -- ^ binary associative combination function -> a -- ^ starting value (typically an identity) -> Int# -- ^ inner dimension (length to fold over) -> IO () {-# INLINE [1] foldS #-} foldS !vec get c !r !n = iter 0# 0# where !(I# end) = M.length vec {-# INLINE iter #-} iter !sh !sz | 1# <- sh >=# end = return () | otherwise = do let !next = sz +# n M.unsafeWrite vec (I# sh) (reduceAny get c r sz next) iter (sh +# 1#) next -- | Parallel reduction of a multidimensional array along the innermost dimension. -- Each output value is computed by a single thread, with the output values -- distributed evenly amongst the available threads. foldP :: V.Unbox a => M.IOVector a -- ^ vector to write elements into -> (Int -> a) -- ^ function to get an element from the given index -> (a -> a -> a) -- ^ binary associative combination operator -> a -- ^ starting value. Must be neutral with respect -- ^ to the operator. eg @0 + a = a@. -> Int -- ^ inner dimension (length to fold over) -> IO () {-# INLINE [1] foldP #-} foldP vec f c !r (I# n) = gangIO theGang $ \(I# tid) -> fill (split tid) (split (tid +# 1#)) where !(I# threads) = gangSize theGang !(I# len) = M.length vec !step = (len +# threads -# 1#) `quotInt#` threads {-# INLINE split #-} split !ix = let !ix' = ix *# step in case len <# ix' of 0# -> ix' _ -> len {-# INLINE fill #-} fill !start !end = iter start (start *# n) where {-# INLINE iter #-} iter !sh !sz | 1# <- sh >=# end = return () | otherwise = do let !next = sz +# n M.unsafeWrite vec (I# sh) (reduce f c r (I# sz) (I# next)) iter (sh +# 1#) next -- | Sequential reduction of all the elements in an array. foldAllS :: (Int# -> a) -- ^ function to get an element from the given index -> (a -> a -> a) -- ^ binary associative combining function -> a -- ^ starting value -> Int# -- ^ number of elements -> a {-# INLINE [1] foldAllS #-} foldAllS f c !r !len = reduceAny (\i -> f i) c r 0# len -- | Parallel tree reduction of an array to a single value. Each thread takes an -- equally sized chunk of the data and computes a partial sum. The main thread -- then reduces the array of partial sums to the final result. -- -- We don't require that the initial value be a neutral element, so each thread -- computes a fold1 on its chunk of the data, and the seed element is only -- applied in the final reduction step. -- foldAllP :: V.Unbox a => (Int -> a) -- ^ function to get an element from the given index -> (a -> a -> a) -- ^ binary associative combining function -> a -- ^ starting value -> Int -- ^ number of elements -> IO a {-# INLINE [1] foldAllP #-} foldAllP f c !r !len | len == 0 = return r | otherwise = do mvec <- M.unsafeNew chunks gangIO theGang $ \tid -> fill mvec tid (split tid) (split (tid+1)) vec <- V.unsafeFreeze mvec return $! V.foldl' c r vec where !threads = gangSize theGang !step = (len + threads - 1) `quotInt` threads chunks = ((len + step - 1) `divInt` step) `min` threads {-# INLINE split #-} split !ix = len `min` (ix * step) {-# INLINE fill #-} fill !mvec !tid !start !end | start >= end = return () | otherwise = M.unsafeWrite mvec tid (reduce f c (f start) (start+1) end) -- Reduce --------------------------------------------------------------------- -- | This is the primitive reduction function. -- We use manual specialisations and rewrite rules to avoid the result -- being boxed up in the final iteration. {-# INLINE [0] reduce #-} reduce :: (Int -> a) -- ^ Get data from the array. -> (a -> a -> a) -- ^ Function to combine elements. -> a -- ^ Starting value. -> Int -- ^ Starting index in array. -> Int -- ^ Ending index in array. -> a -- ^ Result. reduce f c !r (I# start) (I# end) = reduceAny (\i -> f (I# i)) c r start end -- | Sequentially reduce values between the given indices {-# INLINE [0] reduceAny #-} reduceAny :: (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> a reduceAny f c !r !start !end = iter start r where {-# INLINE iter #-} iter !i !z | 1# <- i >=# end = z | otherwise = iter (i +# 1#) (z `c` f i) {-# INLINE [0] reduceInt #-} reduceInt :: (Int# -> Int#) -> (Int# -> Int# -> Int#) -> Int# -> Int# -> Int# -> Int# reduceInt f c !r !start !end = iter start r where {-# INLINE iter #-} iter !i !z | 1# <- i >=# end = z | otherwise = iter (i +# 1#) (z `c` f i) {-# INLINE [0] reduceFloat #-} reduceFloat :: (Int# -> Float#) -> (Float# -> Float# -> Float#) -> Float# -> Int# -> Int# -> Float# reduceFloat f c !r !start !end = iter start r where {-# INLINE iter #-} iter !i !z | 1# <- i >=# end = z | otherwise = iter (i +# 1#) (z `c` f i) {-# INLINE [0] reduceDouble #-} reduceDouble :: (Int# -> Double#) -> (Double# -> Double# -> Double#) -> Double# -> Int# -> Int# -> Double# reduceDouble f c !r !start !end = iter start r where {-# INLINE iter #-} iter !i !z | 1# <- i >=# end = z | otherwise = iter (i +# 1#) (z `c` f i) {-# INLINE unboxInt #-} unboxInt :: Int -> Int# unboxInt (I# i) = i {-# INLINE unboxFloat #-} unboxFloat :: Float -> Float# unboxFloat (F# f) = f {-# INLINE unboxDouble #-} unboxDouble :: Double -> Double# unboxDouble (D# d) = d {-# RULES "reduceInt" forall (get :: Int# -> Int) f r start end . reduceAny get f r start end = I# (reduceInt (\i -> unboxInt (get i)) (\d1 d2 -> unboxInt (f (I# d1) (I# d2))) (unboxInt r) start end) #-} {-# RULES "reduceFloat" forall (get :: Int# -> Float) f r start end . reduceAny get f r start end = F# (reduceFloat (\i -> unboxFloat (get i)) (\d1 d2 -> unboxFloat (f (F# d1) (F# d2))) (unboxFloat r) start end) #-} {-# RULES "reduceDouble" forall (get :: Int# -> Double) f r start end . reduceAny get f r start end = D# (reduceDouble (\i -> unboxDouble (get i)) (\d1 d2 -> unboxDouble (f (D# d1) (D# d2))) (unboxDouble r) start end) #-} repa-3.4.1.4/Data/Array/Repa/Eval/Selection.hs0000644000000000000000000001164612556111213016771 0ustar0000000000000000{-# LANGUAGE BangPatterns, ExplicitForAll, ScopedTypeVariables, PatternGuards #-} module Data.Array.Repa.Eval.Selection (selectChunkedS, selectChunkedP) where import Data.Array.Repa.Eval.Gang import Data.Array.Repa.Shape import Data.Vector.Unboxed as V import Data.Vector.Unboxed.Mutable as VM import GHC.Base (remInt, quotInt) import Prelude as P import Control.Monad as P import Data.IORef -- | Select indices matching a predicate. -- -- * This primitive can be useful for writing filtering functions. -- selectChunkedS :: Shape sh => (sh -> a -> IO ()) -- ^ Update function to write into result. -> (sh -> Bool) -- ^ See if this predicate matches. -> (sh -> a) -- ^ .. and apply fn to the matching index -> sh -- ^ Extent of indices to apply to predicate. -> IO Int -- ^ Number of elements written to destination array. {-# INLINE selectChunkedS #-} selectChunkedS fnWrite fnMatch fnProduce !shSize = fill 0 0 where lenSrc = size shSize fill !nSrc !nDst | nSrc >= lenSrc = return nDst | ixSrc <- fromIndex shSize nSrc , fnMatch ixSrc = do fnWrite ixSrc (fnProduce ixSrc) fill (nSrc + 1) (nDst + 1) | otherwise = fill (nSrc + 1) nDst -- | Select indices matching a predicate, in parallel. -- -- * This primitive can be useful for writing filtering functions. -- -- * The array is split into linear chunks, with one chunk being given to -- each thread. -- -- * The number of elements in the result array depends on how many threads -- you're running the program with. -- selectChunkedP :: forall a . Unbox a => (Int -> Bool) -- ^ See if this predicate matches. -> (Int -> a) -- .. and apply fn to the matching index -> Int -- Extent of indices to apply to predicate. -> IO [IOVector a] -- Chunks containing array elements. {-# INLINE selectChunkedP #-} selectChunkedP fnMatch fnProduce !len = do -- Make IORefs that the threads will write their result chunks to. -- We start with a chunk size proportial to the number of threads we have, -- but the threads themselves can grow the chunks if they run out of space. refs <- P.replicateM threads $ do vec <- VM.new $ len `div` threads newIORef vec -- Fire off a thread to fill each chunk. gangIO theGang $ \thread -> makeChunk (refs !! thread) (splitIx thread) (splitIx (thread + 1) - 1) -- Read the result chunks back from the IORefs. -- If a thread had to grow a chunk, then these might not be the same ones -- we created back in the first step. P.mapM readIORef refs where -- See how many threads we have available. !threads = gangSize theGang !chunkLen = len `quotInt` threads !chunkLeftover = len `remInt` threads -- Decide where to split the source array. {-# INLINE splitIx #-} splitIx thread | thread < chunkLeftover = thread * (chunkLen + 1) | otherwise = thread * chunkLen + chunkLeftover -- Fill the given chunk with elements selected from this range of indices. makeChunk :: IORef (IOVector a) -> Int -> Int -> IO () makeChunk !ref !ixSrc !ixSrcEnd | ixSrc > ixSrcEnd = do vecDst <- VM.new 0 writeIORef ref vecDst | otherwise = do vecDst <- VM.new (len `div` threads) vecDst' <- fillChunk ixSrc ixSrcEnd vecDst 0 (VM.length vecDst) writeIORef ref vecDst' -- The main filling loop. fillChunk :: Int -> Int -> IOVector a -> Int -> Int -> IO (IOVector a) fillChunk !ixSrc !ixSrcEnd !vecDst !ixDst !ixDstLen -- If we've finished selecting elements, then slice the vector down -- so it doesn't have any empty space at the end. | ixSrc > ixSrcEnd = return $ VM.slice 0 ixDst vecDst -- If we've run out of space in the chunk then grow it some more. | ixDst >= ixDstLen = do let ixDstLen' = (VM.length vecDst + 1) * 2 vecDst' <- VM.grow vecDst ixDstLen' fillChunk ixSrc ixSrcEnd vecDst' ixDst ixDstLen' -- We've got a maching element, so add it to the chunk. | fnMatch ixSrc = do VM.unsafeWrite vecDst ixDst (fnProduce ixSrc) fillChunk (ixSrc + 1) ixSrcEnd vecDst (ixDst + 1) ixDstLen -- The element doesnt match, so keep going. | otherwise = fillChunk (ixSrc + 1) ixSrcEnd vecDst ixDst ixDstLen repa-3.4.1.4/Data/Array/Repa/Stencil/Base.hs0000644000000000000000000000343512556111213016425 0ustar0000000000000000 -- | Basic definitions for stencil handling. module Data.Array.Repa.Stencil.Base ( Boundary (..) , Stencil (..) , makeStencil, makeStencil2) where import Data.Array.Repa.Index -- | How to handle the case when the stencil lies partly outside the array. data Boundary a -- | Use a fixed value for border regions. = BoundFixed !a -- | Treat points outside the array as having a constant value. | BoundConst !a -- | Clamp points outside to the same value as the edge pixel. | BoundClamp deriving (Show) -- | Represents a convolution stencil that we can apply to array. -- Only statically known stencils are supported right now. data Stencil sh a -- | Static stencils are used when the coefficients are fixed, -- and known at compile time. = StencilStatic { stencilExtent :: !sh , stencilZero :: !a , stencilAcc :: !(sh -> a -> a -> a) } -- | Make a stencil from a function yielding coefficients at each index. makeStencil :: Num a => sh -- ^ Extent of stencil. -> (sh -> Maybe a) -- ^ Get the coefficient at this index. -> Stencil sh a {-# INLINE makeStencil #-} makeStencil ex getCoeff = StencilStatic ex 0 $ \ix val acc -> case getCoeff ix of Nothing -> acc Just coeff -> acc + val * coeff -- | Wrapper for `makeStencil` that requires a DIM2 stencil. makeStencil2 :: Num a => Int -> Int -- ^ extent of stencil -> (DIM2 -> Maybe a) -- ^ Get the coefficient at this index. -> Stencil DIM2 a {-# INLINE makeStencil2 #-} makeStencil2 height width getCoeff = makeStencil (Z :. height :. width) getCoeff repa-3.4.1.4/Data/Array/Repa/Stencil/Template.hs0000644000000000000000000000640513053313255017331 0ustar0000000000000000{-# LANGUAGE TemplateHaskell, QuasiQuotes, ParallelListComp #-} -- | Template module Data.Array.Repa.Stencil.Template (stencil2) where import Data.Array.Repa.Index import Language.Haskell.TH import Language.Haskell.TH.Quote import qualified Data.List as List -- | QuasiQuoter for producing a static stencil defintion. -- -- A definition like -- -- @ -- [stencil2| 0 1 0 -- 1 0 1 -- 0 1 0 |] -- @ -- -- Is converted to: -- -- @ -- makeStencil2 (Z:.3:.3) -- (\\ix -> case ix of -- Z :. -1 :. 0 -> Just 1 -- Z :. 0 :. -1 -> Just 1 -- Z :. 0 :. 1 -> Just 1 -- Z :. 1 :. 0 -> Just 1 -- _ -> Nothing) -- @ -- stencil2 :: QuasiQuoter stencil2 = QuasiQuoter { quoteExp = parseStencil2 , quotePat = undefined , quoteType = undefined , quoteDec = undefined } -- | Parse a stencil definition. -- TODO: make this more robust. parseStencil2 :: String -> Q Exp parseStencil2 str = let -- Determine the extent of the stencil based on the layout. -- TODO: make this more robust. In particular, handle blank -- lines at the start of the definition. line1 : _ = lines str sizeX = fromIntegral $ length $ lines str sizeY = fromIntegral $ length $ words line1 -- TODO: this probably doesn't work for stencils who's extents are even. minX = negate (sizeX `div` 2) minY = negate (sizeY `div` 2) maxX = sizeX `div` 2 maxY = sizeY `div` 2 -- List of coefficients for the stencil. coeffs = (List.map read $ words str) :: [Integer] in makeStencil2' sizeX sizeY $ filter (\(_, _, v) -> v /= 0) $ [ (fromIntegral y, fromIntegral x, fromIntegral v) | y <- [minX, minX + (1 :: Integer) .. maxX] , x <- [minY, minY + (1 :: Integer) .. maxY] | v <- coeffs ] makeStencil2' :: Integer -> Integer -> [(Integer, Integer, Integer)] -> Q Exp makeStencil2' sizeX sizeY coeffs = do ix' <- newName "ix" z' <- [p| Z |] coeffs' <- newName "coeffs" let fnCoeffs = LamE [VarP ix'] $ CaseE (VarE (mkName "ix")) $ [ Match (InfixP (InfixP z' (mkName ":.") (LitP (IntegerL oy))) (mkName ":.") (LitP (IntegerL ox))) (NormalB $ ConE (mkName "Just") `AppE` LitE (IntegerL v)) [] | (oy, ox, v) <- coeffs ] ++ [Match WildP (NormalB $ ConE (mkName "Nothing")) []] return $ AppE (VarE (mkName "makeStencil2") `AppE` (LitE (IntegerL sizeX)) `AppE` (LitE (IntegerL sizeY))) $ LetE [ PragmaD (InlineP (mkName "coeffs") Inline FunLike (BeforePhase 0)) , ValD (VarP coeffs') (NormalB fnCoeffs) [] ] (VarE (mkName "coeffs")) repa-3.4.1.4/Data/Array/Repa/Stencil/Partition.hs0000644000000000000000000000432712556111213017525 0ustar0000000000000000 module Data.Array.Repa.Stencil.Partition ( Offset (..) , Size (..) , Region (..) , partitionForStencil) where -- | An offset in the 2d plane. data Offset = Offset !Int !Int -- | Size of a region in the 2d plane. data Size = Size !Int !Int -- | A region in the 2d plane. data Region = Region { regionX :: !Int , regionY :: !Int , regionWidth :: !Int , regionHeight :: !Int } deriving Show -- | Create a new region of the given size. regionOfSize :: Size -> Region regionOfSize (Size w h) = Region 0 0 w h {-# INLINE regionOfSize #-} -- | Offset a region. offsetRegion :: Offset -> Region -> Region offsetRegion (Offset x y) (Region x0 y0 w h) = Region (x0 + x) (y0 + y) w h {-# INLINE offsetRegion #-} -- | Partition a region into inner and border regions for the given stencil. partitionForStencil :: Size -- ^ Size of array -> Size -- ^ Size of stencil -> Offset -- ^ Focus of stencil -> [Region] partitionForStencil (Size arrW arrH) (Size krnW krnH) (Offset focX focY) = let gapNorth = focY gapSouth = krnH - focY - 1 gapWest = focX gapEast = krnW - focX - 1 innerW = arrW - gapWest - gapEast innerH = arrH - gapNorth - gapSouth regionInner = offsetRegion (Offset gapWest gapNorth) $ regionOfSize (Size innerW innerH) regionNorth = regionOfSize (Size arrW gapNorth) regionSouth = offsetRegion (Offset 0 (gapNorth + innerH)) $ regionOfSize (Size arrW gapSouth) regionWest = offsetRegion (Offset 0 gapNorth) $ regionOfSize (Size gapWest innerH) regionEast = offsetRegion (Offset (gapWest + innerW) gapNorth) $ regionOfSize (Size gapEast innerH) in [regionInner, regionNorth, regionSouth, regionWest, regionEast] {-# INLINE partitionForStencil #-} repa-3.4.1.4/Data/Array/Repa/Base.hs0000644000000000000000000000704012556111213015020 0ustar0000000000000000 module Data.Array.Repa.Base ( Source (..), (!), toList , deepSeqArrays) where import Data.Array.Repa.Shape -- Source ----------------------------------------------------------------------- -- | Class of array representations that we can read elements from. class Source r e where -- Arrays with a representation tag, shape, and element type. -- Use one of the type tags like `D`, `U` and so on for @r@, -- one of `DIM1`, `DIM2` ... for @sh@. data Array r sh e -- | O(1). Take the extent (size) of an array. extent :: Shape sh => Array r sh e -> sh -- | O(1). Shape polymorphic indexing. index, unsafeIndex :: Shape sh => Array r sh e -> sh -> e {-# INLINE index #-} index arr ix = arr `linearIndex` toIndex (extent arr) ix {-# INLINE unsafeIndex #-} unsafeIndex arr ix = arr `unsafeLinearIndex` toIndex (extent arr) ix -- | O(1). Linear indexing into underlying, row-major, array representation. linearIndex, unsafeLinearIndex :: Shape sh => Array r sh e -> Int -> e {-# INLINE unsafeLinearIndex #-} unsafeLinearIndex = linearIndex -- | Ensure an array's data structure is fully evaluated. deepSeqArray :: Shape sh =>Array r sh e -> b -> b -- | O(1). Alias for `index` (!) :: Shape sh => Source r e => Array r sh e -> sh -> e (!) = index -- | O(n). Convert an array to a list. toList :: Shape sh => Source r e => Array r sh e -> [e] {-# INLINE toList #-} toList arr = go 0 where len = size (extent arr) go ix | ix == len = [] | otherwise = unsafeLinearIndex arr ix : go (ix + 1) -- | Apply `deepSeqArray` to up to four arrays. --- -- NOTE: this shouldn't be needed anymore, as we've made all the shape fields strict. -- -- The implementation of this function has been hand-unwound to work for up to -- four arrays. Putting more in the list yields `error`. -- -- For functions that are /not/ marked as INLINE, you should apply `deepSeqArrays` -- to argument arrays before using them in a @compute@ or @copy@ expression. -- For example: -- -- @ processArrays -- :: Monad m -- => Array U DIM2 Int -> Array U DIM2 Int -- -> m (Array U DIM2 Int) -- processArrays arr1 arr2 -- = [arr1, arr2] \`deepSeqArrays\` -- do arr3 <- computeP $ map f arr1 -- arr4 <- computeP $ zipWith g arr3 arr2 -- return arr4 -- @ -- -- Applying `deepSeqArrays` tells the GHC simplifier that it's ok to unbox -- size fields and the pointers to the underlying array data at the start -- of the function. Without this, they may be unboxed repeatedly when -- computing elements in the result arrays, which will make your program slow. -- -- If you INLINE @processArrays@ into the function that computes @arr1@ and @arr2@, -- then you don't need to apply `deepSeqArrays`. This is because a pointer -- to the underlying data will be passed directly to the consumers and never boxed. -- -- If you're not sure, then just follow the example code above. -- deepSeqArrays :: Shape sh => Source r e => [Array r sh e] -> b -> b {-# INLINE deepSeqArrays #-} deepSeqArrays arrs x = case arrs of [] -> x [a1] -> a1 `deepSeqArray` x [a1, a2] -> a1 `deepSeqArray` a2 `deepSeqArray` x [a1, a2, a3] -> a1 `deepSeqArray` a2 `deepSeqArray` a3 `deepSeqArray` x [a1, a2, a3, a4] -> a1 `deepSeqArray` a2 `deepSeqArray` a3 `deepSeqArray` a4 `deepSeqArray` x _ -> error "deepSeqArrays: only works for up to four arrays" repa-3.4.1.4/LICENSE0000644000000000000000000000260512556111213012003 0ustar0000000000000000Copyright (c) 2001-2014, The Data Parallel Haskell Team Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - The names of the copyright holders may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. repa-3.4.1.4/Setup.hs0000644000000000000000000000005612556111213012430 0ustar0000000000000000import Distribution.Simple main = defaultMain repa-3.4.1.4/repa.cabal0000644000000000000000000000617713354717777012751 0ustar0000000000000000Name: repa Version: 3.4.1.4 License: BSD3 License-file: LICENSE Author: The DPH Team Maintainer: Ben Lippmeier Build-Type: Simple Cabal-Version: >=1.6 Stability: experimental Category: Data Structures Homepage: http://repa.ouroborus.net Bug-reports: http://groups.google.com/d/forum/haskell-repa Description: Repa provides high performance, regular, multi-dimensional, shape polymorphic parallel arrays. All numeric data is stored unboxed. Functions written with the Repa combinators are automatically parallel provided you supply +RTS -Nwhatever on the command line when running the program. Synopsis: High performance, regular, shape polymorphic parallel arrays. Library Build-Depends: base >= 4.8 && < 4.13 , template-haskell , ghc-prim , vector >= 0.11 && < 0.13 , bytestring == 0.10.* , QuickCheck >= 2.8 && < 2.12 ghc-options: -Wall -O2 -fmax-simplifier-iterations=20 -fsimplifier-phases=3 -funbox-strict-fields -fno-warn-missing-signatures if impl(ghc >= 8.0) ghc-options: -fno-cpr-anal else ghc-options: -fcpr-off extensions: NoMonomorphismRestriction ExplicitForAll EmptyDataDecls BangPatterns TypeFamilies MultiParamTypeClasses FlexibleInstances FlexibleContexts StandaloneDeriving ScopedTypeVariables PatternGuards ExistentialQuantification Exposed-modules: Data.Array.Repa.Eval.Gang Data.Array.Repa.Operators.IndexSpace Data.Array.Repa.Operators.Interleave Data.Array.Repa.Operators.Mapping Data.Array.Repa.Operators.Reduction Data.Array.Repa.Operators.Selection Data.Array.Repa.Operators.Traversal Data.Array.Repa.Repr.ByteString Data.Array.Repa.Repr.Cursored Data.Array.Repa.Repr.Delayed Data.Array.Repa.Repr.ForeignPtr Data.Array.Repa.Repr.HintSmall Data.Array.Repa.Repr.HintInterleave Data.Array.Repa.Repr.Partitioned Data.Array.Repa.Repr.Unboxed Data.Array.Repa.Repr.Undefined Data.Array.Repa.Repr.Vector Data.Array.Repa.Specialised.Dim2 Data.Array.Repa.Stencil.Dim2 Data.Array.Repa.Arbitrary Data.Array.Repa.Eval Data.Array.Repa.Index Data.Array.Repa.Shape Data.Array.Repa.Slice Data.Array.Repa.Stencil Data.Array.Repa.Unsafe Data.Array.Repa Other-modules: Data.Array.Repa.Eval.Chunked Data.Array.Repa.Eval.Cursored Data.Array.Repa.Eval.Interleaved Data.Array.Repa.Eval.Elt Data.Array.Repa.Eval.Target Data.Array.Repa.Eval.Load Data.Array.Repa.Eval.Reduction Data.Array.Repa.Eval.Selection Data.Array.Repa.Stencil.Base Data.Array.Repa.Stencil.Template Data.Array.Repa.Stencil.Partition Data.Array.Repa.Base -- vim: nospell