os-string-2.0.6/0000755000000000000000000000000007346545000011635 5ustar0000000000000000os-string-2.0.6/LICENSE0000644000000000000000000000276407346545000012653 0ustar0000000000000000Copyright Neil Mitchell 2005-2020. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Neil Mitchell nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. os-string-2.0.6/README.md0000644000000000000000000000041707346545000013116 0ustar0000000000000000# OsString [![Hackage version](https://img.shields.io/hackage/v/os-string.svg?label=Hackage)](https://hackage.haskell.org/package/os-string) This package provides functionality for manipulating @OsString@ values, and is shipped with . os-string-2.0.6/Setup.hs0000644000000000000000000000005607346545000013272 0ustar0000000000000000import Distribution.Simple main = defaultMain os-string-2.0.6/System/0000755000000000000000000000000007346545000013121 5ustar0000000000000000os-string-2.0.6/System/OsString.hs0000644000000000000000000001137007346545000015227 0ustar0000000000000000-- | -- Module : OsString -- Copyright : © 2021 Julian Ospald -- License : MIT -- -- Maintainer : Julian Ospald -- Stability : experimental -- Portability : portable -- -- An implementation of platform specific short 'OsString', which is: -- -- 1. on windows wide char bytes (@[Word16]@) -- 2. on unix char bytes (@[Word8]@) -- -- It captures the notion of syscall specific encoding (or the lack thereof) to avoid roundtrip issues -- and memory fragmentation by using unpinned byte arrays. Bytes are not touched or interpreted. module System.OsString ( -- * String types OsString -- * OsString construction , encodeUtf , unsafeEncodeUtf , encodeWith , encodeFS , encodeLE , osstr , empty , singleton , pack -- * OsString deconstruction , decodeUtf , decodeWith , decodeFS , decodeLE , unpack -- * Word types , OsChar -- * Word construction , unsafeFromChar -- * Word deconstruction , toChar -- * Basic interface , snoc , cons , last , tail , uncons , head , init , unsnoc , null , length -- * Transforming OsString , map , reverse , intercalate -- * Reducing OsStrings (folds) , foldl , foldl' , foldl1 , foldl1' , foldr , foldr' , foldr1 , foldr1' -- * Special folds , all , any , concat -- * Generating and unfolding OsStrings , replicate , unfoldr , unfoldrN -- * Substrings -- ** Breaking strings , take , takeEnd , takeWhileEnd , takeWhile , drop , dropEnd , dropWhileEnd , dropWhile , break , breakEnd , span , spanEnd , splitAt , split , splitWith , stripSuffix , stripPrefix -- * Predicates , isInfixOf , isPrefixOf , isSuffixOf -- ** Search for arbitrary susbstrings , breakSubstring -- * Searching OsStrings -- ** Searching by equality , elem , find , filter , partition -- * Indexing OsStrings , index , indexMaybe , (!?) , elemIndex , elemIndices , count , findIndex , findIndices -- * Coercions , coercionToPlatformTypes ) where import System.OsString.Internal ( unsafeFromChar , toChar , encodeUtf , unsafeEncodeUtf , encodeWith , encodeLE , osstr , pack , empty , singleton , decodeUtf , decodeWith , decodeLE , unpack , snoc , cons , last , tail , uncons , head , init , unsnoc , null , length , map , reverse , intercalate , foldl , foldl' , foldl1 , foldl1' , foldr , foldr' , foldr1 , foldr1' , all , any , concat , replicate , unfoldr , unfoldrN , take , takeEnd , takeWhileEnd , takeWhile , drop , dropEnd , dropWhileEnd , dropWhile , break , breakEnd , span , spanEnd , splitAt , split , splitWith , stripSuffix , stripPrefix , isInfixOf , isPrefixOf , isSuffixOf , breakSubstring , elem , find , filter , partition , index , indexMaybe , (!?) , elemIndex , elemIndices , count , findIndex , findIndices ) import qualified System.OsString.Internal as SOI import System.OsString.Internal.Types ( OsString, OsChar, coercionToPlatformTypes ) import Prelude (String, IO) {-# DEPRECATED encodeFS "Use System.OsPath.encodeFS from filepath" #-} -- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which is: -- -- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck) -- 2. on windows does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). encodeFS :: String -> IO OsString encodeFS = SOI.encodeFS {-# DEPRECATED decodeFS "Use System.OsPath.encodeFS from filepath" #-} -- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which is: -- -- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck) -- 2. on windows does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). decodeFS :: OsString -> IO String decodeFS = SOI.decodeFS os-string-2.0.6/System/OsString/0000755000000000000000000000000007346545000014671 5ustar0000000000000000os-string-2.0.6/System/OsString/Common.hs0000644000000000000000000010212207346545000016453 0ustar0000000000000000{- HLINT ignore "Unused LANGUAGE pragma" -} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskellQuotes #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -Wno-unused-imports #-} -- This template expects CPP definitions for: -- MODULE_NAME = Posix | Windows -- IS_WINDOWS = False | True -- #if defined(WINDOWS) #define WINDOWS_DOC #else #define POSIX_DOC #endif module System.OsString.MODULE_NAME ( -- * Types #ifdef WINDOWS WindowsString , WindowsChar #else PosixString , PosixChar #endif -- * String construction , encodeUtf , unsafeEncodeUtf , encodeWith , encodeFS , encodeLE #ifdef WINDOWS , fromString #endif , fromBytes #ifndef WINDOWS , fromBytestring #endif , pstr , singleton , empty , pack -- * String deconstruction , decodeUtf , decodeWith , decodeFS , decodeLE , unpack -- * Word construction , unsafeFromChar -- * Word deconstruction , toChar -- * Basic interface , snoc , cons , last , tail , uncons , head , init , unsnoc , null , length -- * Transforming OsString , map , reverse , intercalate -- * Reducing OsStrings (folds) , foldl , foldl' , foldl1 , foldl1' , foldr , foldr' , foldr1 , foldr1' -- ** Special folds , all , any , concat -- ** Generating and unfolding OsStrings , replicate , unfoldr , unfoldrN -- * Substrings -- ** Breaking strings , take , takeEnd , takeWhileEnd , takeWhile , drop , dropEnd , dropWhileEnd , dropWhile , break , breakEnd , span , spanEnd , splitAt , split , splitWith , stripSuffix , stripPrefix -- * Predicates , isInfixOf , isPrefixOf , isSuffixOf -- ** Search for arbitrary susbstrings , breakSubstring -- * Searching OsStrings -- ** Searching by equality , elem , find , filter , partition -- * Indexing OsStrings , index , indexMaybe , (!?) , elemIndex , elemIndices , count , findIndex , findIndices ) where import System.OsString.Internal.Types ( #ifdef WINDOWS WindowsString(..), WindowsChar(..) #else PosixString(..), PosixChar(..) #endif ) import Data.Coerce import Data.Char import Control.Monad.Catch ( MonadThrow, throwM ) import Data.ByteString.Internal ( ByteString ) import Control.Exception ( SomeException, try, displayException ) import Control.DeepSeq ( force ) import Data.Bifunctor ( first ) import GHC.IO ( evaluate, unsafePerformIO ) import qualified GHC.Foreign as GHC import Language.Haskell.TH.Quote ( QuasiQuoter (..) ) import Language.Haskell.TH.Syntax ( Lift (..), lift ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) #ifdef WINDOWS import System.OsString.Encoding import System.IO ( TextEncoding, utf16le ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import qualified System.OsString.Data.ByteString.Short.Word16 as BSP #else import System.OsString.Encoding import System.IO ( TextEncoding, utf8 ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import qualified System.OsString.Data.ByteString.Short as BSP #endif import GHC.Stack (HasCallStack) import Prelude (Bool(..), Int, Maybe(..), IO, String, Either(..), fmap, ($), (.), mconcat, fromEnum, fromInteger, mempty, fromIntegral, fail, (<$>), show, either, pure, const, flip, error, id) import Data.Bifunctor ( bimap ) import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 import qualified System.OsString.Data.ByteString.Short as BS8 #ifdef WINDOWS_DOC -- | Partial unicode friendly encoding. -- -- This encodes as UTF16-LE (strictly), which is a pretty good guess. -- -- Throws an 'EncodingException' if encoding fails. If the input does not -- contain surrogate chars, you can use @unsafeEncodeUtf@. #else -- | Partial unicode friendly encoding. -- -- This encodes as UTF8 (strictly), which is a good guess. -- -- Throws an 'EncodingException' if encoding fails. If the input does not -- contain surrogate chars, you can use 'unsafeEncodeUtf'. #endif encodeUtf :: MonadThrow m => String -> m PLATFORM_STRING #ifdef WINDOWS encodeUtf = either throwM pure . encodeWith utf16le #else encodeUtf = either throwM pure . encodeWith utf8 #endif -- | Unsafe unicode friendly encoding. -- -- Like 'encodeUtf', except it crashes when the input contains -- surrogate chars. For sanitized input, this can be useful. unsafeEncodeUtf :: HasCallStack => String -> PLATFORM_STRING #ifdef WINDOWS unsafeEncodeUtf = either (error . displayException) id . encodeWith utf16le #else unsafeEncodeUtf = either (error . displayException) id . encodeWith utf8 #endif #ifdef WINDOWS -- | Encode a 'String' with the specified encoding. -- -- Note: We expect a "wide char" encoding (e.g. UCS-2 or UTF-16). Anything -- that works with @Word16@ boundaries. Picking an incompatible encoding may crash -- filepath operations. encodeWith :: TextEncoding -- ^ text encoding (wide char) -> String -> Either EncodingException PLATFORM_STRING encodeWith enc str = unsafePerformIO $ do r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> WindowsString <$> BS8.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r #else -- | Encode a 'String' with the specified encoding. encodeWith :: TextEncoding -> String -> Either EncodingException PLATFORM_STRING encodeWith enc str = unsafePerformIO $ do r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> PosixString <$> BSP.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r #endif #ifdef WINDOWS_DOC -- | This mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range. -- -- The reason this is in IO is because it unifies with the Posix counterpart, -- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'. #else -- | This mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck). -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). #endif encodeFS :: String -> IO PLATFORM_STRING #ifdef WINDOWS {-# DEPRECATED encodeFS "Use System.OsPath.Windows.encodeFS from filepath" #-} encodeFS = fmap WindowsString . encodeWithBaseWindows #else {-# DEPRECATED encodeFS "Use System.OsPath.Posix.encodeFS from filepath" #-} encodeFS = fmap PosixString . encodeWithBasePosix #endif #ifdef WINDOWS_DOC -- | This mimics the behavior of the base library when doing string -- operations, which does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range. -- -- The reason this is in IO is because it unifies with the Posix counterpart, -- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'. #else -- | This mimics the behavior of the base library when doing string -- operations, which uses 'getLocaleEncoding'. -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). #endif encodeLE :: String -> IO PLATFORM_STRING #ifdef WINDOWS encodeLE = fmap WindowsString . encodeWithBaseWindows #else encodeLE = fmap PosixString . encodeWithBasePosix' #endif #ifdef WINDOWS -- | Like 'encodeLE but not in IO. -- -- 'encodeLE' was designed to have a symmetric type signature -- on unix and windows, but morally the function has no IO effects on windows, -- so we provide this variant without breaking existing API. -- -- On windows, 'encodeLE' is equivalent to 'encodeFS'. -- -- This function does not exist on unix. -- -- @since 2.0.6 fromString :: String -> WindowsString fromString = unsafePerformIO . fmap WindowsString . encodeWithBaseWindows #endif #ifdef WINDOWS_DOC -- | Partial unicode friendly decoding. -- -- This decodes as UTF16-LE (strictly), which is a pretty good. -- -- Throws a 'EncodingException' if decoding fails. #else -- | Partial unicode friendly decoding. -- -- This decodes as UTF8 (strictly), which is a good guess. Note that -- filenames on unix are encoding agnostic char arrays. -- -- Throws a 'EncodingException' if decoding fails. #endif decodeUtf :: MonadThrow m => PLATFORM_STRING -> m String #ifdef WINDOWS decodeUtf = either throwM pure . decodeWith utf16le #else decodeUtf = either throwM pure . decodeWith utf8 #endif #ifdef WINDOWS -- | Decode a 'WindowsString' with the specified encoding. -- -- The String is forced into memory to catch all exceptions. decodeWith :: TextEncoding -> PLATFORM_STRING -> Either EncodingException String decodeWith winEnc (WindowsString ba) = unsafePerformIO $ do r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen winEnc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r #else -- | Decode a 'PosixString' with the specified encoding. -- -- The String is forced into memory to catch all exceptions. decodeWith :: TextEncoding -> PLATFORM_STRING -> Either EncodingException String decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do r <- try @SomeException $ BSP.useAsCStringLen ba $ \fp -> GHC.peekCStringLen unixEnc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r #endif #ifdef WINDOWS_DOC -- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem -- operations, which does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range. -- -- The reason this is in IO is because it unifies with the Posix counterpart, -- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however. #else -- | This mimics the behavior of the base library when doing filesystem -- operations, which uses 'getLocaleEncoding'. -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). #endif decodeLE :: PLATFORM_STRING -> IO String #ifdef WINDOWS decodeLE (WindowsString ba) = decodeWithBaseWindows ba #else decodeLE (PosixString ba) = decodeWithBasePosix' ba #endif #ifdef WINDOWS_DOC -- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range. -- -- The reason this is in IO is because it unifies with the Posix counterpart, -- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however. #else -- | This mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck). -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). #endif decodeFS :: PLATFORM_STRING -> IO String #ifdef WINDOWS {-# DEPRECATED decodeFS "Use System.OsPath.Windows.decodeFS from filepath" #-} decodeFS (WindowsString ba) = decodeWithBaseWindows ba #else {-# DEPRECATED decodeFS "Use System.OsPath.Posix.decodeFS from filepath" #-} decodeFS (PosixString ba) = decodeWithBasePosix ba #endif #ifdef WINDOWS_DOC -- | Constructs a platform string from a ByteString. -- -- This ensures valid UCS-2LE. -- Note that this doesn't expand Word8 to Word16 on windows, so you may get invalid UTF-16. -- -- Throws 'EncodingException' on invalid UCS-2LE (although unlikely). #else -- | Constructs a platform string from a ByteString. -- -- This is a no-op. #endif fromBytes :: MonadThrow m => ByteString -> m PLATFORM_STRING #ifdef WINDOWS fromBytes bs = let ws = WindowsString . BS16.toShort $ bs in either throwM (const . pure $ ws) $ decodeWith ucs2le ws #else fromBytes = pure . PosixString . BSP.toShort #endif #ifndef WINDOWS -- | Like 'fromBytes', but not in IO. -- -- 'fromBytes' was designed to have a symmetric type signature -- on unix and windows, but morally the function has no IO effects on unix, -- so we provide this variant without breaking existing API. -- -- This function does not exist on windows. -- -- @since 2.0.6 fromBytestring :: ByteString -> PosixString fromBytestring = PosixString . BSP.toShort #endif #ifdef WINDOWS_DOC -- | QuasiQuote a 'WindowsString'. This accepts Unicode characters -- and encodes as UTF-16LE on windows. #else -- | QuasiQuote a 'PosixString'. This accepts Unicode characters -- and encodes as UTF-8 on unix. #endif pstr :: QuasiQuoter pstr = QuasiQuoter #ifdef WINDOWS { quoteExp = \s -> do ps <- either (fail . show) pure $ encodeWith (mkUTF16le ErrorOnCodingFailure) s lift ps , quotePat = \s -> do osp' <- either (fail . show) pure . encodeWith (mkUTF16le ErrorOnCodingFailure) $ s [p|((==) osp' -> True)|] , quoteType = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a type)" , quoteDec = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a declaration)" } #else { quoteExp = \s -> do ps <- either (fail . show) pure $ encodeWith (mkUTF8 ErrorOnCodingFailure) s lift ps , quotePat = \s -> do osp' <- either (fail . show) pure . encodeWith (mkUTF8 ErrorOnCodingFailure) $ s [p|((==) osp' -> True)|] , quoteType = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a type)" , quoteDec = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a declaration)" } #endif -- | Unpack a platform string to a list of platform words. unpack :: PLATFORM_STRING -> [PLATFORM_WORD] unpack = coerce BSP.unpack -- | Pack a list of platform words to a platform string. -- -- Note that using this in conjunction with 'unsafeFromChar' to -- convert from @[Char]@ to platform string is probably not what -- you want, because it will truncate unicode code points. pack :: [PLATFORM_WORD] -> PLATFORM_STRING pack = coerce BSP.pack singleton :: PLATFORM_WORD -> PLATFORM_STRING singleton = coerce BSP.singleton empty :: PLATFORM_STRING empty = mempty #ifdef WINDOWS -- | Truncates to 2 octets. unsafeFromChar :: Char -> PLATFORM_WORD unsafeFromChar = WindowsChar . fromIntegral . fromEnum #else -- | Truncates to 1 octet. unsafeFromChar :: Char -> PLATFORM_WORD unsafeFromChar = PosixChar . fromIntegral . fromEnum #endif -- | Converts back to a unicode codepoint (total). toChar :: PLATFORM_WORD -> Char #ifdef WINDOWS toChar (WindowsChar w) = chr $ fromIntegral w #else toChar (PosixChar w) = chr $ fromIntegral w #endif -- | /O(n)/ Append a byte to the end of a 'OsString' -- -- @since 1.4.200.0 snoc :: PLATFORM_STRING -> PLATFORM_WORD -> PLATFORM_STRING snoc = coerce BSP.snoc -- | /O(n)/ 'cons' is analogous to (:) for lists. -- -- @since 1.4.200.0 cons :: PLATFORM_WORD -> PLATFORM_STRING -> PLATFORM_STRING cons = coerce BSP.cons -- | /O(1)/ Extract the last element of a OsString, which must be finite and non-empty. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'unsnoc' instead. -- -- @since 1.4.200.0 last :: HasCallStack => PLATFORM_STRING -> PLATFORM_WORD last = coerce BSP.last -- | /O(n)/ Extract the elements after the head of a OsString, which must be non-empty. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'uncons' instead. -- -- @since 1.4.200.0 tail :: HasCallStack => PLATFORM_STRING -> PLATFORM_STRING tail = coerce BSP.tail -- | /O(n)/ Extract the 'head' and 'tail' of a OsString, returning 'Nothing' -- if it is empty. -- -- @since 1.4.200.0 uncons :: PLATFORM_STRING -> Maybe (PLATFORM_WORD, PLATFORM_STRING) uncons = coerce BSP.uncons -- | /O(1)/ Extract the first element of a OsString, which must be non-empty. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'uncons' instead. -- -- @since 1.4.200.0 head :: HasCallStack => PLATFORM_STRING -> PLATFORM_WORD head = coerce BSP.head -- | /O(n)/ Return all the elements of a 'OsString' except the last one. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'unsnoc' instead. -- -- @since 1.4.200.0 init :: HasCallStack => PLATFORM_STRING -> PLATFORM_STRING init = coerce BSP.init -- | /O(n)/ Extract the 'init' and 'last' of a OsString, returning 'Nothing' -- if it is empty. -- -- @since 1.4.200.0 unsnoc :: PLATFORM_STRING -> Maybe (PLATFORM_STRING, PLATFORM_WORD) unsnoc = coerce BSP.unsnoc -- | /O(1)/. The empty 'OsString'. -- -- @since 1.4.200.0 null :: PLATFORM_STRING -> Bool null = coerce BSP.null -- | /O(1)/ The length of a 'OsString'. -- -- This returns the number of code units -- (@Word8@ on unix and @Word16@ on windows), not -- bytes. -- -- >>> length "abc" -- 3 -- -- @since 1.4.200.0 length :: PLATFORM_STRING -> Int #ifdef WINDOWS length = coerce BSP.numWord16 #else length = coerce BSP.length #endif -- | /O(n)/ 'map' @f xs@ is the OsString obtained by applying @f@ to each -- element of @xs@. -- -- @since 1.4.200.0 map :: (PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_STRING map = coerce BSP.map -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. -- -- @since 1.4.200.0 reverse :: PLATFORM_STRING -> PLATFORM_STRING reverse = coerce BSP.reverse -- | /O(n)/ The 'intercalate' function takes a 'OsString' and a list of -- 'OsString's and concatenates the list after interspersing the first -- argument between each element of the list. -- -- @since 1.4.200.0 intercalate :: PLATFORM_STRING -> [PLATFORM_STRING] -> PLATFORM_STRING intercalate = coerce BSP.intercalate -- | 'foldl', applied to a binary operator, a starting value (typically -- the left-identity of the operator), and a OsString, reduces the -- OsString using the binary operator, from left to right. -- -- @since 1.4.200.0 foldl :: forall a. (a -> PLATFORM_WORD -> a) -> a -> PLATFORM_STRING -> a foldl = coerce (BSP.foldl @a) -- | 'foldl'' is like 'foldl', but strict in the accumulator. -- -- @since 1.4.200.0 foldl' :: forall a. (a -> PLATFORM_WORD -> a) -> a -> PLATFORM_STRING -> a foldl' = coerce (BSP.foldl' @a) -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'OsString's. -- An exception will be thrown in the case of an empty OsString. -- -- @since 1.4.200.0 foldl1 :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD foldl1 = coerce BSP.foldl1 -- | 'foldl1'' is like 'foldl1', but strict in the accumulator. -- An exception will be thrown in the case of an empty OsString. -- -- @since 1.4.200.0 foldl1' :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD foldl1' = coerce BSP.foldl1' -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a OsString, -- reduces the OsString using the binary operator, from right to left. -- -- @since 1.4.200.0 foldr :: forall a. (PLATFORM_WORD -> a -> a) -> a -> PLATFORM_STRING -> a foldr = coerce (BSP.foldr @a) -- | 'foldr'' is like 'foldr', but strict in the accumulator. -- -- @since 1.4.200.0 foldr' :: forall a. (PLATFORM_WORD -> a -> a) -> a -> PLATFORM_STRING -> a foldr' = coerce (BSP.foldr' @a) -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'OsString's -- An exception will be thrown in the case of an empty OsString. -- -- @since 1.4.200.0 foldr1 :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD foldr1 = coerce BSP.foldr1 -- | 'foldr1'' is a variant of 'foldr1', but is strict in the -- accumulator. -- -- @since 1.4.200.0 foldr1' :: (PLATFORM_WORD -> PLATFORM_WORD -> PLATFORM_WORD) -> PLATFORM_STRING -> PLATFORM_WORD foldr1' = coerce BSP.foldr1' -- | /O(n)/ Applied to a predicate and a 'OsString', 'all' determines -- if all elements of the 'OsString' satisfy the predicate. -- -- @since 1.4.200.0 all :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Bool all = coerce BSP.all -- | /O(n)/ Applied to a predicate and a 'OsString', 'any' determines if -- any element of the 'OsString' satisfies the predicate. -- -- @since 1.4.200.0 any :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Bool any = coerce BSP.any -- /O(n)/ Concatenate a list of OsStrings. -- -- @since 1.4.200.0 concat :: [PLATFORM_STRING] -> PLATFORM_STRING concat = mconcat -- | /O(n)/ 'replicate' @n x@ is a OsString of length @n@ with @x@ -- the value of every element. The following holds: -- -- > replicate w c = unfoldr w (\u -> Just (u,u)) c -- -- @since 1.4.200.0 replicate :: Int -> PLATFORM_WORD -> PLATFORM_STRING replicate = coerce BSP.replicate -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a -- OsString from a seed value. The function takes the element and -- returns 'Nothing' if it is done producing the OsString or returns -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, -- and @b@ is the seed value for further production. -- -- This function is not efficient/safe. It will build a list of @[Word8]@ -- and run the generator until it returns `Nothing`, otherwise recurse infinitely, -- then finally create a 'OsString'. -- -- If you know the maximum length, consider using 'unfoldrN'. -- -- Examples: -- -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 -- > == pack [0, 1, 2, 3, 4, 5] -- -- @since 1.4.200.0 unfoldr :: forall a. (a -> Maybe (PLATFORM_WORD, a)) -> a -> PLATFORM_STRING unfoldr = coerce (BSP.unfoldr @a) -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a OsString from a seed -- value. However, the length of the result is limited by the first -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' -- when the maximum length of the result is known. -- -- The following equation relates 'unfoldrN' and 'unfoldr': -- -- > fst (unfoldrN n f s) == take n (unfoldr f s) -- -- @since 1.4.200.0 unfoldrN :: forall a. Int -> (a -> Maybe (PLATFORM_WORD, a)) -> a -> (PLATFORM_STRING, Maybe a) unfoldrN = coerce (BSP.unfoldrN @a) -- | /O(n)/ 'take' @n@, applied to a OsString @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. -- -- @since 1.4.200.0 take :: Int -> PLATFORM_STRING -> PLATFORM_STRING take = coerce BSP.take -- | /O(n)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. -- Takes @n@ elements from end of bytestring. -- -- >>> takeEnd 3 "abcdefg" -- "efg" -- >>> takeEnd 0 "abcdefg" -- "" -- >>> takeEnd 4 "abc" -- "abc" -- -- @since 1.4.200.0 takeEnd :: Int -> PLATFORM_STRING -> PLATFORM_STRING takeEnd = coerce BSP.takeEnd -- | Returns the longest (possibly empty) suffix of elements -- satisfying the predicate. -- -- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. -- -- @since 1.4.200.0 takeWhileEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING takeWhileEnd = coerce BSP.takeWhileEnd -- | Similar to 'Prelude.takeWhile', -- returns the longest (possibly empty) prefix of elements -- satisfying the predicate. -- -- @since 1.4.200.0 takeWhile :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING takeWhile = coerce BSP.takeWhile -- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or 'empty' if @n > 'length' xs@. -- -- @since 1.4.200.0 drop :: Int -> PLATFORM_STRING -> PLATFORM_STRING drop = coerce BSP.drop -- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. -- Drops @n@ elements from end of bytestring. -- -- >>> dropEnd 3 "abcdefg" -- "abcd" -- >>> dropEnd 0 "abcdefg" -- "abcdefg" -- >>> dropEnd 4 "abc" -- "" -- -- @since 1.4.200.0 dropEnd :: Int -> PLATFORM_STRING -> PLATFORM_STRING dropEnd = coerce BSP.dropEnd -- | Similar to 'Prelude.dropWhile', -- drops the longest (possibly empty) prefix of elements -- satisfying the predicate and returns the remainder. -- -- @since 1.4.200.0 dropWhile :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING dropWhile = coerce BSP.dropWhile -- | Similar to 'Prelude.dropWhileEnd', -- drops the longest (possibly empty) suffix of elements -- satisfying the predicate and returns the remainder. -- -- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. -- -- @since 1.4.200.0 dropWhileEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING dropWhileEnd = coerce BSP.dropWhileEnd -- | Returns the longest (possibly empty) suffix of elements which __do not__ -- satisfy the predicate and the remainder of the string. -- -- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. -- -- @since 1.4.200.0 breakEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) breakEnd = coerce BSP.breakEnd -- | Similar to 'Prelude.break', -- returns the longest (possibly empty) prefix of elements which __do not__ -- satisfy the predicate and the remainder of the string. -- -- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. -- -- @since 1.4.200.0 break :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) break = coerce BSP.break -- | Similar to 'Prelude.span', -- returns the longest (possibly empty) prefix of elements -- satisfying the predicate and the remainder of the string. -- -- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. -- -- @since 1.4.200.0 span :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) span = coerce BSP.span -- | Returns the longest (possibly empty) suffix of elements -- satisfying the predicate and the remainder of the string. -- -- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. -- -- We have -- -- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") -- -- and -- -- > spanEnd (not . isSpace) sbs -- > == -- > let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x) -- -- @since 1.4.200.0 spanEnd :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) spanEnd = coerce BSP.spanEnd -- | /O(n)/ 'splitAt' @n sbs@ is equivalent to @('take' n sbs, 'drop' n sbs)@. -- -- @since 1.4.200.0 splitAt :: Int -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) splitAt = coerce BSP.splitAt -- | /O(n)/ Break a 'OsString' into pieces separated by the byte -- argument, consuming the delimiter. I.e. -- -- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 -- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 -- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 -- > split undefined "" == [] -- and not [""] -- -- and -- -- > intercalate [c] . split c == id -- > split == splitWith . (==) -- -- @since 1.4.200.0 split :: PLATFORM_WORD -> PLATFORM_STRING -> [PLATFORM_STRING] split = coerce BSP.split -- | /O(n)/ Splits a 'OsString' into components delimited by -- separators, where the predicate returns True for a separator element. -- The resulting components do not contain the separators. Two adjacent -- separators result in an empty component in the output. eg. -- -- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 -- > splitWith undefined "" == [] -- and not [""] -- -- @since 1.4.200.0 splitWith :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> [PLATFORM_STRING] splitWith = coerce BSP.splitWith -- | /O(n)/ The 'stripSuffix' function takes two OsStrings and returns 'Just' -- the remainder of the second iff the first is its suffix, and otherwise -- 'Nothing'. -- -- @since 1.4.200.0 stripSuffix :: PLATFORM_STRING -> PLATFORM_STRING -> Maybe PLATFORM_STRING stripSuffix = coerce BSP.stripSuffix -- | /O(n)/ The 'stripPrefix' function takes two OsStrings and returns 'Just' -- the remainder of the second iff the first is its prefix, and otherwise -- 'Nothing'. -- -- @since 1.4.200.0 stripPrefix :: PLATFORM_STRING -> PLATFORM_STRING -> Maybe PLATFORM_STRING stripPrefix = coerce BSP.stripPrefix -- | Check whether one string is a substring of another. -- -- @since 1.4.200.0 isInfixOf :: PLATFORM_STRING -> PLATFORM_STRING -> Bool isInfixOf = coerce BSP.isInfixOf -- |/O(n)/ The 'isPrefixOf' function takes two OsStrings and returns 'True' -- -- @since 1.4.200.0 isPrefixOf :: PLATFORM_STRING -> PLATFORM_STRING -> Bool isPrefixOf = coerce BSP.isPrefixOf -- | /O(n)/ The 'isSuffixOf' function takes two OsStrings and returns 'True' -- iff the first is a suffix of the second. -- -- The following holds: -- -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y -- -- @since 1.4.200.0 isSuffixOf :: PLATFORM_STRING -> PLATFORM_STRING -> Bool isSuffixOf = coerce BSP.isSuffixOf -- | Break a string on a substring, returning a pair of the part of the -- string prior to the match, and the rest of the string. -- -- The following relationships hold: -- -- > break (== c) l == breakSubstring (singleton c) l -- -- For example, to tokenise a string, dropping delimiters: -- -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) -- > where (h,t) = breakSubstring x y -- -- To skip to the first occurrence of a string: -- -- > snd (breakSubstring x y) -- -- To take the parts of a string before a delimiter: -- -- > fst (breakSubstring x y) -- -- Note that calling `breakSubstring x` does some preprocessing work, so -- you should avoid unnecessarily duplicating breakSubstring calls with the same -- pattern. -- -- @since 1.4.200.0 breakSubstring :: PLATFORM_STRING -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) breakSubstring = coerce BSP.breakSubstring -- | /O(n)/ 'elem' is the 'OsString' membership predicate. -- -- @since 1.4.200.0 elem :: PLATFORM_WORD -> PLATFORM_STRING -> Bool elem = coerce BSP.elem -- | /O(n)/ The 'find' function takes a predicate and a OsString, -- and returns the first element in matching the predicate, or 'Nothing' -- if there is no such element. -- -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing -- -- @since 1.4.200.0 find :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Maybe PLATFORM_WORD find = coerce BSP.find -- | /O(n)/ 'filter', applied to a predicate and a OsString, -- returns a OsString containing those characters that satisfy the -- predicate. -- -- @since 1.4.200.0 filter :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> PLATFORM_STRING filter = coerce BSP.filter -- | /O(n)/ The 'partition' function takes a predicate a OsString and returns -- the pair of OsStrings with elements which do and do not satisfy the -- predicate, respectively; i.e., -- -- > partition p bs == (filter p sbs, filter (not . p) sbs) -- -- @since 1.4.200.0 partition :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> (PLATFORM_STRING, PLATFORM_STRING) partition = coerce BSP.partition -- | /O(1)/ 'OsString' index (subscript) operator, starting from 0. -- -- @since 1.4.200.0 index :: HasCallStack => PLATFORM_STRING -> Int -> PLATFORM_WORD index = coerce BSP.index -- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: -- -- > 0 <= n < length bs -- -- @since 1.4.200.0 indexMaybe :: PLATFORM_STRING -> Int -> Maybe PLATFORM_WORD indexMaybe = coerce BSP.indexMaybe -- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: -- -- > 0 <= n < length bs -- -- @since 1.4.200.0 (!?) :: PLATFORM_STRING -> Int -> Maybe PLATFORM_WORD (!?) = indexMaybe -- | /O(n)/ The 'elemIndex' function returns the index of the first -- element in the given 'OsString' which is equal to the query -- element, or 'Nothing' if there is no such element. -- -- @since 1.4.200.0 elemIndex :: PLATFORM_WORD -> PLATFORM_STRING -> Maybe Int elemIndex = coerce BSP.elemIndex -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. -- -- @since 1.4.200.0 elemIndices :: PLATFORM_WORD -> PLATFORM_STRING -> [Int] elemIndices = coerce BSP.elemIndices -- | count returns the number of times its argument appears in the OsString -- -- @since 1.4.200.0 count :: PLATFORM_WORD -> PLATFORM_STRING -> Int count = coerce BSP.count -- | /O(n)/ The 'findIndex' function takes a predicate and a 'OsString' and -- returns the index of the first element in the OsString -- satisfying the predicate. -- -- @since 1.4.200.0 findIndex :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> Maybe Int findIndex = coerce BSP.findIndex -- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the -- indices of all elements satisfying the predicate, in ascending order. -- -- @since 1.4.200.0 findIndices :: (PLATFORM_WORD -> Bool) -> PLATFORM_STRING -> [Int] findIndices = coerce BSP.findIndices os-string-2.0.6/System/OsString/Data/ByteString/0000755000000000000000000000000007346545000017634 5ustar0000000000000000os-string-2.0.6/System/OsString/Data/ByteString/Short.hs0000644000000000000000000001161407346545000021272 0ustar0000000000000000{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NoImplicitPrelude #-} -- | -- Module : System.OsString.Data.ByteString.Short -- Copyright : (c) Duncan Coutts 2012-2013, Julian Ospald 2022 -- License : BSD-style -- -- Maintainer : hasufell@posteo.de -- Stability : stable -- Portability : ghc only -- -- A compact representation suitable for storing short byte strings in memory. -- -- In typical use cases it can be imported alongside "Data.ByteString", e.g. -- -- > import qualified Data.ByteString as B -- > import qualified Data.ByteString.Short as B -- > (ShortByteString, toShort, fromShort) -- -- Other 'ShortByteString' operations clash with "Data.ByteString" or "Prelude" -- functions however, so they should be imported @qualified@ with a different -- alias e.g. -- -- > import qualified Data.ByteString.Short as B.Short -- module System.OsString.Data.ByteString.Short ( -- * The @ShortByteString@ type ShortByteString(..), -- ** Memory overhead -- | With GHC, the memory overheads are as follows, expressed in words and -- in bytes (words are 4 and 8 bytes on 32 or 64bit machines respectively). -- -- * 'B.ByteString' unshared: 8 words; 32 or 64 bytes. -- -- * 'B.ByteString' shared substring: 4 words; 16 or 32 bytes. -- -- * 'ShortByteString': 4 words; 16 or 32 bytes. -- -- For the string data itself, both 'ShortByteString' and 'B.ByteString' use -- one byte per element, rounded up to the nearest word. For example, -- including the overheads, a length 10 'ShortByteString' would take -- @16 + 12 = 28@ bytes on a 32bit platform and @32 + 16 = 48@ bytes on a -- 64bit platform. -- -- These overheads can all be reduced by 1 word (4 or 8 bytes) when the -- 'ShortByteString' or 'B.ByteString' is unpacked into another constructor. -- -- For example: -- -- > data ThingId = ThingId {-# UNPACK #-} !Int -- > {-# UNPACK #-} !ShortByteString -- -- This will take @1 + 1 + 3@ words (the @ThingId@ constructor + -- unpacked @Int@ + unpacked @ShortByteString@), plus the words for the -- string data. -- ** Heap fragmentation -- | With GHC, the 'B.ByteString' representation uses /pinned/ memory, -- meaning it cannot be moved by the GC. This is usually the right thing to -- do for larger strings, but for small strings using pinned memory can -- lead to heap fragmentation which wastes space. The 'ShortByteString' -- type (and the @Text@ type from the @text@ package) use /unpinned/ memory -- so they do not contribute to heap fragmentation. In addition, with GHC, -- small unpinned strings are allocated in the same way as normal heap -- allocations, rather than in a separate pinned area. -- * Introducing and eliminating 'ShortByteString's empty, singleton, pack, unpack, fromShort, toShort, -- * Basic interface snoc, cons, append, last, tail, uncons, uncons2, head, init, unsnoc, null, length, -- * Transforming ShortByteStrings map, reverse, intercalate, -- * Reducing 'ShortByteString's (folds) foldl, foldl', foldl1, foldl1', foldr, foldr', foldr1, foldr1', -- ** Special folds all, any, concat, -- ** Generating and unfolding ByteStrings replicate, unfoldr, unfoldrN, -- * Substrings -- ** Breaking strings take, takeEnd, takeWhileEnd, takeWhile, drop, dropEnd, dropWhile, dropWhileEnd, breakEnd, break, span, spanEnd, splitAt, split, splitWith, stripSuffix, stripPrefix, -- * Predicates isInfixOf, isPrefixOf, isSuffixOf, -- ** Search for arbitrary substrings breakSubstring, -- * Searching ShortByteStrings -- ** Searching by equality elem, -- ** Searching with a predicate find, filter, partition, -- * Indexing ShortByteStrings index, indexMaybe, (!?), elemIndex, elemIndices, count, findIndex, findIndices, -- * Low level conversions -- ** Packing 'Foreign.C.String.CString's and pointers packCString, packCStringLen, -- ** Using ShortByteStrings as 'Foreign.C.String.CString's useAsCString, useAsCStringLen, ) where import Data.ByteString.Short.Internal import System.OsString.Data.ByteString.Short.Internal import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise) import Data.Word (Word8) uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString) uncons2 = \sbs -> let l = length sbs nl = l - 2 in if | l <= 1 -> Nothing | otherwise -> let h = indexWord8Array (asBA sbs) 0 h' = indexWord8Array (asBA sbs) 1 t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl in Just (h, h', t) os-string-2.0.6/System/OsString/Data/ByteString/Short/0000755000000000000000000000000007346545000020733 5ustar0000000000000000os-string-2.0.6/System/OsString/Data/ByteString/Short/Internal.hs0000644000000000000000000003567007346545000023056 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE UnliftedFFITypes #-} -- Required for WORDS_BIGENDIAN #include -- | -- Module : System.OsString.Data.ByteString.Short.Internal -- Copyright : © 2022 Julian Ospald -- License : MIT -- -- Maintainer : Julian Ospald -- Stability : experimental -- Portability : portable -- -- Internal low-level utilities mostly for 'System.OsPath.Data.ByteString.Short.Word16', -- such as byte-array operations and other stuff not meant to be exported from Word16 module. module System.OsString.Data.ByteString.Short.Internal where import Control.Monad.ST import Control.Exception (assert, throwIO) import Data.ByteString.Short.Internal (ShortByteString(..), length) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup ( Semigroup((<>)) ) import Foreign.C.Types ( CSize(..) , CInt(..) ) import Data.ByteString.Internal ( accursedUnutterablePerformIO ) #endif #if !MIN_VERSION_bytestring(0,10,9) import Foreign.Marshal.Alloc (allocaBytes) import Foreign.C.String ( CString, CStringLen ) import Foreign.C.Types ( CSize(..) ) import Foreign.Storable (pokeByteOff) #endif import Foreign.Marshal.Array (withArray0, peekArray0, newArray0, withArrayLen, peekArray) import GHC.Exts import GHC.Word import GHC.ST ( ST (ST) ) import GHC.Stack ( HasCallStack ) import Prelude hiding ( length ) import qualified Data.ByteString.Short.Internal as BS import qualified Data.Char as C import qualified Data.List as List _nul :: Word16 _nul = 0x00 isSpace :: Word16 -> Bool isSpace = C.isSpace . word16ToChar -- | Total conversion to char. word16ToChar :: Word16 -> Char word16ToChar = C.chr . fromIntegral create :: Int -> (forall s. MBA s -> ST s ()) -> ShortByteString create len fill = runST $ do mba <- newByteArray len fill mba BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#) {-# INLINE create #-} asBA :: ShortByteString -> BA asBA (SBS ba#) = BA# ba# data BA = BA# ByteArray# data MBA s = MBA# (MutableByteArray# s) newPinnedByteArray :: Int -> ST s (MBA s) newPinnedByteArray (I# len#) = ST $ \s -> case newPinnedByteArray# len# s of (# s', mba# #) -> (# s', MBA# mba# #) newByteArray :: Int -> ST s (MBA s) newByteArray (I# len#) = ST $ \s -> case newByteArray# len# s of (# s', mba# #) -> (# s', MBA# mba# #) copyByteArray :: BA -> Int -> MBA s -> Int -> Int -> ST s () copyByteArray (BA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = ST $ \s -> case copyByteArray# src# src_off# dst# dst_off# len# s of s' -> (# s', () #) unsafeFreezeByteArray :: MBA s -> ST s BA unsafeFreezeByteArray (MBA# mba#) = ST $ \s -> case unsafeFreezeByteArray# mba# s of (# s', ba# #) -> (# s', BA# ba# #) copyAddrToByteArray :: Ptr a -> MBA RealWorld -> Int -> Int -> ST RealWorld () copyAddrToByteArray (Ptr src#) (MBA# dst#) (I# dst_off#) (I# len#) = ST $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of s' -> (# s', () #) -- this is a copy-paste from bytestring #if !MIN_VERSION_bytestring(0,10,9) ------------------------------------------------------------------------ -- Primop replacements -- --------------------------------------------------------------------- -- -- Standard C functions -- foreign import ccall unsafe "string.h strlen" c_strlen :: CString -> IO CSize -- --------------------------------------------------------------------- -- -- Uses our C code -- -- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The -- resulting @ShortByteString@ is an immutable copy of the original -- @CString@, and is managed on the Haskell heap. The original -- @CString@ must be null terminated. -- -- @since 0.10.10.0 packCString :: CString -> IO ShortByteString packCString cstr = do len <- c_strlen cstr packCStringLen (cstr, fromIntegral len) -- | /O(n)./ Construct a new @ShortByteString@ from a @CStringLen@. The -- resulting @ShortByteString@ is an immutable copy of the original @CStringLen@. -- The @ShortByteString@ is a normal Haskell value and will be managed on the -- Haskell heap. -- -- @since 0.10.10.0 packCStringLen :: CStringLen -> IO ShortByteString packCStringLen (cstr, len) | len >= 0 = BS.createFromPtr cstr len packCStringLen (_, len) = moduleErrorIO "packCStringLen" ("negative length: " ++ show len) -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a -- null-terminated @CString@. The @CString@ is a copy and will be freed -- automatically; it must not be stored or used after the -- subcomputation finishes. -- -- @since 0.10.10.0 useAsCString :: ShortByteString -> (CString -> IO a) -> IO a useAsCString bs action = allocaBytes (l+1) $ \buf -> do BS.copyToPtr bs 0 buf (fromIntegral l) pokeByteOff buf l (0::Word8) action buf where l = length bs -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a 'CStringLen'. -- As for 'useAsCString' this function makes a copy of the original @ShortByteString@. -- It must not be stored or used after the subcomputation finishes. -- -- Beware that this function does not add a terminating @\NUL@ byte at the end of 'CStringLen'. -- If you need to construct a pointer to a null-terminated sequence, use 'useAsCString' -- (and measure length independently if desired). -- -- @since 0.10.10.0 useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a useAsCStringLen bs action = allocaBytes l $ \buf -> do BS.copyToPtr bs 0 buf (fromIntegral l) action (buf, l) where l = length bs #endif -- | /O(n)./ Construct a new @ShortByteString@ from a @CWString@. The -- resulting @ShortByteString@ is an immutable copy of the original -- @CWString@, and is managed on the Haskell heap. The original -- @CWString@ must be null terminated. -- -- @since 0.10.10.0 packCWString :: Ptr Word16 -> IO ShortByteString packCWString cwstr = do cs <- peekArray0 _nul cwstr return (packWord16 cs) -- | /O(n)./ Construct a new @ShortByteString@ from a @CWStringLen@. The -- resulting @ShortByteString@ is an immutable copy of the original @CWStringLen@. -- The @ShortByteString@ is a normal Haskell value and will be managed on the -- Haskell heap. -- -- @since 0.10.10.0 packCWStringLen :: (Ptr Word16, Int) -> IO ShortByteString packCWStringLen (cp, len) = do cs <- peekArray len cp return (packWord16 cs) -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a -- null-terminated @CWString@. The @CWString@ is a copy and will be freed -- automatically; it must not be stored or used after the -- subcomputation finishes. -- -- @since 0.10.10.0 useAsCWString :: ShortByteString -> (Ptr Word16 -> IO a) -> IO a useAsCWString = withArray0 _nul . unpackWord16 -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. -- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. -- It must not be stored or used after the subcomputation finishes. -- -- @since 0.10.10.0 useAsCWStringLen :: ShortByteString -> ((Ptr Word16, Int) -> IO a) -> IO a useAsCWStringLen bs action = withArrayLen (unpackWord16 bs) $ \ len ptr -> action (ptr, len) -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. -- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. -- It must not be stored or used after the subcomputation finishes. -- -- @since 0.10.10.0 newCWString :: ShortByteString -> IO (Ptr Word16) newCWString = newArray0 _nul . unpackWord16 -- --------------------------------------------------------------------- -- Internal utilities moduleErrorIO :: String -> String -> IO a moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg {-# NOINLINE moduleErrorIO #-} moduleErrorMsg :: String -> String -> String moduleErrorMsg fun msg = "System.OsPath.Data.ByteString.Short." ++ fun ++ ':':' ':msg packWord16 :: [Word16] -> ShortByteString packWord16 cs = packLenWord16 (List.length cs) cs packLenWord16 :: Int -> [Word16] -> ShortByteString packLenWord16 len ws0 = create (len * 2) (\mba -> go mba 0 ws0) where go :: MBA s -> Int -> [Word16] -> ST s () go !_ !_ [] = return () go !mba !i (w:ws) = do writeWord16Array mba i w go mba (i+2) ws unpackWord16 :: ShortByteString -> [Word16] unpackWord16 sbs = go len [] where len = length sbs go !i !acc | i < 1 = acc | otherwise = let !w = indexWord16Array (asBA sbs) (i - 2) in go (i - 2) (w:acc) packWord16Rev :: [Word16] -> ShortByteString packWord16Rev cs = packLenWord16Rev (List.length cs * 2) cs packLenWord16Rev :: Int -> [Word16] -> ShortByteString packLenWord16Rev len ws0 = create len (\mba -> go mba len ws0) where go :: MBA s -> Int -> [Word16] -> ST s () go !_ !_ [] = return () go !mba !i (w:ws) = do writeWord16Array mba (i - 2) w go mba (i - 2) ws -- | Encode Word16 as little-endian. writeWord16Array :: MBA s -> Int -- ^ Word8 index (not Word16) -> Word16 -> ST s () writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = ST $ \s -> case writeWord8ArrayAsWord16# mba# i# (word16ToLE# w#) s of s' -> (# s', () #) indexWord8Array :: BA -> Int -- ^ Word8 index -> Word8 indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#) -- | Decode Word16 from little-endian. indexWord16Array :: BA -> Int -- ^ Word8 index (not Word16) -> Word16 indexWord16Array (BA# ba#) (I# i#) = W16# (word16FromLE# (indexWord8ArrayAsWord16# ba# i#)) #if MIN_VERSION_base(4,16,0) word16ToLE#, word16FromLE# :: Word16# -> Word16# #else word16ToLE#, word16FromLE# :: Word# -> Word# #endif #ifdef WORDS_BIGENDIAN #if MIN_VERSION_base(4,16,0) word16ToLE# w = wordToWord16# (byteSwap16# (word16ToWord# w)) #else word16ToLE# = byteSwap16# #endif #else word16ToLE# w# = w# #endif word16FromLE# = word16ToLE# setByteArray :: MBA s -> Int -> Int -> Int -> ST s () setByteArray (MBA# dst#) (I# off#) (I# len#) (I# c#) = ST $ \s -> case setByteArray# dst# off# len# c# s of s' -> (# s', () #) copyMutableByteArray :: MBA s -> Int -> MBA s -> Int -> Int -> ST s () copyMutableByteArray (MBA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = ST $ \s -> case copyMutableByteArray# src# src_off# dst# dst_off# len# s of s' -> (# s', () #) -- | Given the maximum size needed and a function to make the contents -- of a ShortByteString, createAndTrim makes the 'ShortByteString'. -- The generating function is required to return the actual final size -- (<= the maximum size) and the result value. The resulting byte array -- is realloced to this size. createAndTrim :: Int -> (forall s. MBA s -> ST s (Int, a)) -> (ShortByteString, a) createAndTrim l fill = runST $ do mba <- newByteArray l (l', res) <- fill mba if assert (l' <= l) $ l' >= l then do BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#, res) else do mba2 <- newByteArray l' copyMutableByteArray mba 0 mba2 0 l' BA# ba# <- unsafeFreezeByteArray mba2 return (SBS ba#, res) {-# INLINE createAndTrim #-} createAndTrim' :: Int -> (forall s. MBA s -> ST s Int) -> ShortByteString createAndTrim' l fill = runST $ do mba <- newByteArray l l' <- fill mba if assert (l' <= l) $ l' >= l then do BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#) else do mba2 <- newByteArray l' copyMutableByteArray mba 0 mba2 0 l' BA# ba# <- unsafeFreezeByteArray mba2 return (SBS ba#) {-# INLINE createAndTrim' #-} createAndTrim'' :: Int -> (forall s. MBA s -> MBA s -> ST s (Int, Int)) -> (ShortByteString, ShortByteString) createAndTrim'' l fill = runST $ do mba1 <- newByteArray l mba2 <- newByteArray l (l1, l2) <- fill mba1 mba2 sbs1 <- freeze' l1 mba1 sbs2 <- freeze' l2 mba2 pure (sbs1, sbs2) where freeze' :: Int -> MBA s -> ST s ShortByteString freeze' l' mba = if assert (l' <= l) $ l' >= l then do BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#) else do mba2 <- newByteArray l' copyMutableByteArray mba 0 mba2 0 l' BA# ba# <- unsafeFreezeByteArray mba2 return (SBS ba#) {-# INLINE createAndTrim'' #-} -- Returns the index of the first match or the length of the whole -- bytestring if nothing matched. findIndexOrLength :: (Word16 -> Bool) -> ShortByteString -> Int findIndexOrLength k (assertEven -> sbs) = go 0 where l = BS.length sbs ba = asBA sbs w = indexWord16Array ba go !n | n >= l = l `div` 2 | k (w n) = n `div` 2 | otherwise = go (n + 2) {-# INLINE findIndexOrLength #-} -- | Returns the length of the substring matching, not the index. -- If no match, returns 0. findFromEndUntil :: (Word16 -> Bool) -> ShortByteString -> Int findFromEndUntil k sbs = go (BS.length sbs - 2) where ba = asBA sbs w = indexWord16Array ba go !n | n < 0 = 0 | k (w n) = (n `div` 2) + 1 | otherwise = go (n - 2) {-# INLINE findFromEndUntil #-} assertEven :: ShortByteString -> ShortByteString assertEven sbs@(SBS barr#) | even (I# (sizeofByteArray# barr#)) = sbs | otherwise = error ("Uneven number of bytes: " <> show (BS.length sbs) <> ". This is not a Word16 bytestream.") -- Common up near identical calls to `error' to reduce the number -- constant strings created when compiled: errorEmptySBS :: HasCallStack => String -> a errorEmptySBS fun = moduleError fun "empty ShortByteString" {-# NOINLINE errorEmptySBS #-} moduleError :: HasCallStack => String -> String -> a moduleError fun msg = error (moduleErrorMsg fun msg) {-# NOINLINE moduleError #-} compareByteArraysOff :: BA -- ^ array 1 -> Int -- ^ offset for array 1 -> BA -- ^ array 2 -> Int -- ^ offset for array 2 -> Int -- ^ length to compare -> Int -- ^ like memcmp #if MIN_VERSION_base(4,11,0) compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) = I# (compareByteArrays# ba1# ba1off# ba2# ba2off# len#) #else compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len = assert (ba1off + len <= (I# (sizeofByteArray# ba1#))) $ assert (ba2off + len <= (I# (sizeofByteArray# ba2#))) $ fromIntegral $ accursedUnutterablePerformIO $ c_memcmp_ByteArray ba1# ba1off ba2# ba2off (fromIntegral len) foreign import ccall unsafe "static sbs_memcmp_off" c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt #endif os-string-2.0.6/System/OsString/Data/ByteString/Short/Word16.hs0000644000000000000000000007406107346545000022361 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} -- | -- Module : System.OsString.Data.ByteString.Short.Word16 -- Copyright : © 2022 Julian Ospald -- License : MIT -- -- Maintainer : Julian Ospald -- Stability : experimental -- Portability : portable -- -- ShortByteStrings encoded as UTF16-LE, suitable for windows FFI calls. -- -- Word16s are *always* in BE encoding (both input and output), so e.g. 'pack' -- takes a list of BE encoded @[Word16]@ and produces a UTF16-LE encoded ShortByteString. -- -- Likewise, 'unpack' takes a UTF16-LE encoded ShortByteString and produces a list of BE encoded @[Word16]@. -- -- Indices and lengths are always in respect to Word16, not Word8. -- -- All functions will error out if the input string is not a valid UTF16 stream (uneven number of bytes). -- So use this module with caution. module System.OsString.Data.ByteString.Short.Word16 ( -- * The @ShortByteString@ type and representation ShortByteString(..), -- * Introducing and eliminating 'ShortByteString's empty, singleton, pack, unpack, fromShort, toShort, -- * Basic interface snoc, cons, append, last, tail, uncons, uncons2, head, init, unsnoc, null, length, numWord16, -- * Transforming ShortByteStrings map, reverse, intercalate, -- * Reducing 'ShortByteString's (folds) foldl, foldl', foldl1, foldl1', foldr, foldr', foldr1, foldr1', -- ** Special folds all, any, concat, -- ** Generating and unfolding ByteStrings replicate, unfoldr, unfoldrN, -- * Substrings -- ** Breaking strings take, takeEnd, takeWhileEnd, takeWhile, drop, dropEnd, dropWhile, dropWhileEnd, breakEnd, break, span, spanEnd, splitAt, split, splitWith, stripSuffix, stripPrefix, -- * Predicates isInfixOf, isPrefixOf, isSuffixOf, -- ** Search for arbitrary substrings breakSubstring, -- * Searching ShortByteStrings -- ** Searching by equality elem, -- ** Searching with a predicate find, filter, partition, -- * Indexing ShortByteStrings index, indexMaybe, (!?), elemIndex, elemIndices, count, findIndex, findIndices, -- ** Encoding validation -- isValidUtf8, -- * Low level conversions -- ** Packing 'CString's and pointers packCWString, packCWStringLen, newCWString, -- ** Using ShortByteStrings as 'CString's useAsCWString, useAsCWStringLen ) where import System.OsString.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isPrefixOf, isSuffixOf, length, empty, null, ShortByteString(..), fromShort, toShort ) import System.OsString.Data.ByteString.Short.Internal import Data.Bits ( shiftR ) import Data.Word import Prelude hiding ( Foldable(..) , all , any , reverse , break , concat , drop , dropWhile , filter , head , init , last , map , replicate , span , splitAt , tail , take , takeWhile ) import qualified Data.Foldable as Foldable import GHC.ST ( ST ) import GHC.Stack ( HasCallStack ) import GHC.Exts ( inline ) import qualified Data.ByteString.Short.Internal as BS import qualified Data.List as List -- ----------------------------------------------------------------------------- -- Introducing and eliminating 'ShortByteString's -- | /O(1)/ Convert a 'Word16' into a 'ShortByteString' singleton :: Word16 -> ShortByteString singleton = \w -> create 2 (\mba -> writeWord16Array mba 0 w) -- | /O(n)/. Convert a list into a 'ShortByteString' pack :: [Word16] -> ShortByteString pack = packWord16 -- | /O(n)/. Convert a 'ShortByteString' into a list. unpack :: ShortByteString -> [Word16] unpack = unpackWord16 . assertEven -- --------------------------------------------------------------------- -- Basic interface -- | This is like 'length', but the number of 'Word16', not 'Word8'. numWord16 :: ShortByteString -> Int numWord16 = (`shiftR` 1) . BS.length . assertEven infixr 5 `cons` --same as list (:) infixl 5 `snoc` -- | /O(n)/ Append a Word16 to the end of a 'ShortByteString' -- -- Note: copies the entire byte array snoc :: ShortByteString -> Word16 -> ShortByteString snoc = \(assertEven -> sbs) c -> let l = BS.length sbs nl = l + 2 in create nl $ \mba -> do copyByteArray (asBA sbs) 0 mba 0 l writeWord16Array mba l c -- | /O(n)/ 'cons' is analogous to (:) for lists. -- -- Note: copies the entire byte array cons :: Word16 -> ShortByteString -> ShortByteString cons c = \(assertEven -> sbs) -> let l = BS.length sbs nl = l + 2 in create nl $ \mba -> do writeWord16Array mba 0 c copyByteArray (asBA sbs) 0 mba 2 l -- | /O(1)/ Extract the last element of a ShortByteString, which must be finite and at least one Word16. -- An exception will be thrown in the case of an empty ShortByteString. last :: HasCallStack => ShortByteString -> Word16 last = \(assertEven -> sbs) -> case null sbs of True -> errorEmptySBS "last" False -> indexWord16Array (asBA sbs) (BS.length sbs - 2) -- | /O(n)/ Extract the elements after the head of a ShortByteString, which must at least one Word16. -- An exception will be thrown in the case of an empty ShortByteString. -- -- Note: copies the entire byte array tail :: HasCallStack => ShortByteString -> ShortByteString tail = \(assertEven -> sbs) -> let l = BS.length sbs nl = l - 2 in if | l <= 0 -> errorEmptySBS "tail" | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl -- | /O(n)/ Extract the head and tail of a ByteString, returning Nothing -- if it is empty. uncons :: ShortByteString -> Maybe (Word16, ShortByteString) uncons = \(assertEven -> sbs) -> let l = BS.length sbs nl = l - 2 in if | l <= 0 -> Nothing | otherwise -> let h = indexWord16Array (asBA sbs) 0 t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl in Just (h, t) -- | /O(n)/ Extract first two elements and the rest of a ByteString, -- returning Nothing if it is shorter than two elements. uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) uncons2 = \(assertEven -> sbs) -> let l = BS.length sbs nl = l - 4 in if | l <= 2 -> Nothing | otherwise -> let h = indexWord16Array (asBA sbs) 0 h' = indexWord16Array (asBA sbs) 2 t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl in Just (h, h', t) -- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. -- An exception will be thrown in the case of an empty ShortByteString. head :: HasCallStack => ShortByteString -> Word16 head = \(assertEven -> sbs) -> case null sbs of True -> errorEmptySBS "last" False -> indexWord16Array (asBA sbs) 0 -- | /O(n)/ Return all the elements of a 'ShortByteString' except the last one. -- An exception will be thrown in the case of an empty ShortByteString. -- -- Note: copies the entire byte array init :: HasCallStack => ShortByteString -> ShortByteString init = \(assertEven -> sbs) -> let l = BS.length sbs nl = l - 2 in if | l <= 0 -> errorEmptySBS "tail" | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl -- | /O(n)/ Extract the 'init' and 'last' of a ByteString, returning Nothing -- if it is empty. unsnoc :: ShortByteString -> Maybe (ShortByteString, Word16) unsnoc = \(assertEven -> sbs) -> let l = BS.length sbs nl = l - 2 in if | l <= 0 -> Nothing | otherwise -> let l' = indexWord16Array (asBA sbs) (l - 2) i = create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl in Just (i, l') -- --------------------------------------------------------------------- -- Transformations -- | /O(n)/ 'map' @f xs@ is the ShortByteString obtained by applying @f@ to each -- element of @xs@. map :: (Word16 -> Word16) -> ShortByteString -> ShortByteString map f = \(assertEven -> sbs) -> let l = BS.length sbs ba = asBA sbs in create l (\mba -> go ba mba 0 l) where go :: BA -> MBA s -> Int -> Int -> ST s () go !ba !mba !i !l | i >= l = return () | otherwise = do let w = indexWord16Array ba i writeWord16Array mba i (f w) go ba mba (i+2) l -- TODO: implement more efficiently -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. reverse :: ShortByteString -> ShortByteString reverse = \(assertEven -> sbs) -> let l = BS.length sbs ba = asBA sbs in create l (\mba -> go ba mba 0 l) where go :: BA -> MBA s -> Int -> Int -> ST s () go !ba !mba !i !l | i >= l = return () | otherwise = do let w = indexWord16Array ba i writeWord16Array mba (l - 2 - i) w go ba mba (i+2) l -- --------------------------------------------------------------------- -- Special folds -- | /O(n)/ Applied to a predicate and a 'ShortByteString', 'all' determines -- if all elements of the 'ShortByteString' satisfy the predicate. all :: (Word16 -> Bool) -> ShortByteString -> Bool all k = \(assertEven -> sbs) -> let l = BS.length sbs ba = asBA sbs w = indexWord16Array ba go !n | n >= l = True | otherwise = k (w n) && go (n + 2) in go 0 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if -- any element of the 'ByteString' satisfies the predicate. any :: (Word16 -> Bool) -> ShortByteString -> Bool any k = \(assertEven -> sbs) -> let l = BS.length sbs ba = asBA sbs w = indexWord16Array ba go !n | n >= l = False | otherwise = k (w n) || go (n + 2) in go 0 -- --------------------------------------------------------------------- -- Unfolds and replicates -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ -- the value of every element. The following holds: -- -- > replicate w c = unfoldr w (\u -> Just (u,u)) c replicate :: Int -> Word16 -> ShortByteString replicate w c | w <= 0 = empty -- can't use setByteArray here, because we write UTF-16LE | otherwise = create (w * 2) (`go` 0) where go mba ix | ix < 0 || ix >= w * 2 = pure () | otherwise = writeWord16Array mba ix c >> go mba (ix + 2) -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a -- ShortByteString from a seed value. The function takes the element and -- returns 'Nothing' if it is done producing the ShortByteString or returns -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, -- and @b@ is the seed value for further production. -- -- This function is not efficient/safe. It will build a list of @[Word16]@ -- and run the generator until it returns `Nothing`, otherwise recurse infinitely, -- then finally create a 'ShortByteString'. -- -- Examples: -- -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 -- > == pack [0, 1, 2, 3, 4, 5] -- unfoldr :: (a -> Maybe (Word16, a)) -> a -> ShortByteString unfoldr f x0 = packWord16Rev $ go x0 mempty where go x words' = case f x of Nothing -> words' Just (w, x') -> go x' (w:words') -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ShortByteString from a seed -- value. However, the length of the result is limited by the first -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' -- when the maximum length of the result is known. -- -- The following equation relates 'unfoldrN' and 'unfoldr': -- -- > fst (unfoldrN n f s) == take n (unfoldr f s) -- unfoldrN :: forall a. Int -- ^ number of 'Word16' -> (a -> Maybe (Word16, a)) -> a -> (ShortByteString, Maybe a) unfoldrN i f = \x0 -> if | i < 0 -> (empty, Just x0) | otherwise -> createAndTrim (i * 2) $ \mba -> go mba x0 0 where go :: forall s. MBA s -> a -> Int -> ST s (Int, Maybe a) go !mba !x !n = go' x n where go' :: a -> Int -> ST s (Int, Maybe a) go' !x' !n' | n' == i * 2 = return (n', Just x') | otherwise = case f x' of Nothing -> return (n', Nothing) Just (w, x'') -> do writeWord16Array mba n' w go' x'' (n'+2) -- -------------------------------------------------------------------- -- Predicates -- --------------------------------------------------------------------- -- Substrings -- | /O(n)/ 'take' @n@, applied to a ShortByteString @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. -- -- Note: copies the entire byte array take :: Int -- ^ number of Word16 -> ShortByteString -> ShortByteString take = \n (assertEven -> sbs) -> let sl = numWord16 sbs len8 = n * 2 in if | n >= sl -> sbs | n <= 0 -> empty | otherwise -> create len8 $ \mba -> copyByteArray (asBA sbs) 0 mba 0 len8 -- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. -- Takes @n@ elements from end of bytestring. -- -- >>> takeEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" -- "e\NULf\NULg\NUL" -- >>> takeEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" -- "" -- >>> takeEnd 4 "a\NULb\NULc\NUL" -- "a\NULb\NULc\NUL" takeEnd :: Int -- ^ number of 'Word16' -> ShortByteString -> ShortByteString takeEnd n = \(assertEven -> sbs) -> let sl = BS.length sbs n2 = n * 2 in if | n2 >= sl -> sbs | n2 <= 0 -> empty | otherwise -> create n2 $ \mba -> copyByteArray (asBA sbs) (max 0 (sl - n2)) mba 0 n2 -- | Similar to 'P.takeWhile', -- returns the longest (possibly empty) prefix of elements -- satisfying the predicate. takeWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString takeWhile f ps = take (findIndexOrLength (not . f) ps) ps -- | Returns the longest (possibly empty) suffix of elements -- satisfying the predicate. -- -- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. takeWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString takeWhileEnd f ps = drop (findFromEndUntil (not . f) ps) ps -- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or @[]@ if @n > 'length' xs@. -- -- Note: copies the entire byte array drop :: Int -- ^ number of 'Word16' -> ShortByteString -> ShortByteString drop = \n' (assertEven -> sbs) -> let len = BS.length sbs n = n' * 2 in if | n <= 0 -> sbs | n >= len -> empty | otherwise -> let newLen = len - n in create newLen $ \mba -> copyByteArray (asBA sbs) n mba 0 newLen -- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. -- Drops @n@ elements from end of bytestring. -- -- >>> dropEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" -- "a\NULb\NULc\NULd\NUL" -- >>> dropEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" -- "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" -- >>> dropEnd 4 "a\NULb\NULc\NUL" -- "" dropEnd :: Int -- ^ number of 'Word16' -> ShortByteString -> ShortByteString dropEnd n' = \(assertEven -> sbs) -> let sl = BS.length sbs nl = sl - n n = n' * 2 in if | n >= sl -> empty | n <= 0 -> sbs | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl -- | Similar to 'P.dropWhile', -- drops the longest (possibly empty) prefix of elements -- satisfying the predicate and returns the remainder. -- -- Note: copies the entire byte array dropWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString dropWhile f = \(assertEven -> ps) -> drop (findIndexOrLength (not . f) ps) ps -- | Similar to 'P.dropWhileEnd', -- drops the longest (possibly empty) suffix of elements -- satisfying the predicate and returns the remainder. -- -- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. -- -- @since 0.10.12.0 dropWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString dropWhileEnd f = \(assertEven -> ps) -> take (findFromEndUntil (not . f) ps) ps -- | Returns the longest (possibly empty) suffix of elements which __do not__ -- satisfy the predicate and the remainder of the string. -- -- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. breakEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) breakEnd p = \(assertEven -> sbs) -> splitAt (findFromEndUntil p sbs) sbs -- | Similar to 'P.break', -- returns the longest (possibly empty) prefix of elements which __do not__ -- satisfy the predicate and the remainder of the string. -- -- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. break :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) break = \p (assertEven -> ps) -> case findIndexOrLength p ps of n -> splitAt n ps -- | Similar to 'P.span', -- returns the longest (possibly empty) prefix of elements -- satisfying the predicate and the remainder of the string. -- -- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. -- span :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) {- HLINT ignore "Use span" -} span p = break (not . p) . assertEven -- | Returns the longest (possibly empty) suffix of elements -- satisfying the predicate and the remainder of the string. -- -- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. -- -- We have -- -- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") -- -- and -- -- > spanEnd (not . isSpace) ps -- > == -- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x) -- spanEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) spanEnd p = \(assertEven -> ps) -> splitAt (findFromEndUntil (not.p) ps) ps -- | /O(n)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. -- -- Note: copies the substrings splitAt :: Int -- ^ number of Word16 -> ShortByteString -> (ShortByteString, ShortByteString) splitAt n' = \(assertEven -> sbs) -> if | n <= 0 -> (empty, sbs) | otherwise -> let slen = BS.length sbs in if | n >= BS.length sbs -> (sbs, empty) | otherwise -> let llen = min slen (max 0 n) rlen = max 0 (slen - max 0 n) lsbs = create llen $ \mba -> copyByteArray (asBA sbs) 0 mba 0 llen rsbs = create rlen $ \mba -> copyByteArray (asBA sbs) n mba 0 rlen in (lsbs, rsbs) where n = n' * 2 -- | /O(n)/ Break a 'ShortByteString' into pieces separated by the byte -- argument, consuming the delimiter. I.e. -- -- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 -- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 -- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 -- > split undefined "" == [] -- and not [""] -- -- and -- -- > intercalate [c] . split c == id -- > split == splitWith . (==) -- -- Note: copies the substrings split :: Word16 -> ShortByteString -> [ShortByteString] split w = splitWith (== w) . assertEven -- | /O(n)/ Splits a 'ShortByteString' into components delimited by -- separators, where the predicate returns True for a separator element. -- The resulting components do not contain the separators. Two adjacent -- separators result in an empty component in the output. eg. -- -- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 -- > splitWith undefined "" == [] -- and not [""] -- splitWith :: (Word16 -> Bool) -> ShortByteString -> [ShortByteString] splitWith p = \(assertEven -> sbs) -> if | BS.null sbs -> [] | otherwise -> go sbs where go sbs' | BS.null sbs' = [mempty] | otherwise = case break p sbs' of (a, b) | BS.null b -> [a] | otherwise -> a : go (tail b) -- | Check whether one string is a substring of another. isInfixOf :: ShortByteString -> ShortByteString -> Bool isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s) -- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713 breakSubstring :: ShortByteString -- ^ String to search for -> ShortByteString -- ^ String to search in -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0 where lpat = BS.length bPat linp = BS.length bInp go ix | let ix' = ix * 2 , linp >= ix' + lpat = if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp | otherwise -> go (ix + 1) | otherwise = (bInp, mempty) -- --------------------------------------------------------------------- -- Reducing 'ByteString's -- | 'foldl', applied to a binary operator, a starting value (typically -- the left-identity of the operator), and a ShortByteString, reduces the -- ShortByteString using the binary operator, from left to right. -- foldl :: (a -> Word16 -> a) -> a -> ShortByteString -> a foldl f v = List.foldl f v . unpack . assertEven -- | 'foldl'' is like 'foldl', but strict in the accumulator. -- foldl' :: (a -> Word16 -> a) -> a -> ShortByteString -> a foldl' f v = List.foldl' f v . unpack . assertEven -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a ShortByteString, -- reduces the ShortByteString using the binary operator, from right to left. foldr :: (Word16 -> a -> a) -> a -> ShortByteString -> a foldr f v = List.foldr f v . unpack . assertEven -- | 'foldr'' is like 'foldr', but strict in the accumulator. foldr' :: (Word16 -> a -> a) -> a -> ShortByteString -> a foldr' k v = Foldable.foldr' k v . unpack . assertEven -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'ShortByteString's. -- An exception will be thrown in the case of an empty ShortByteString. foldl1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 foldl1 k = List.foldl1 k . unpack . assertEven -- | 'foldl1'' is like 'foldl1', but strict in the accumulator. -- An exception will be thrown in the case of an empty ShortByteString. foldl1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 foldl1' k = List.foldl1' k . unpack . assertEven -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'ShortByteString's -- An exception will be thrown in the case of an empty ShortByteString. foldr1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 foldr1 k = List.foldr1 k . unpack . assertEven -- | 'foldr1'' is a variant of 'foldr1', but is strict in the -- accumulator. foldr1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 foldr1' k = \(assertEven -> sbs) -> if null sbs then errorEmptySBS "foldr1'" else foldr' k (last sbs) (init sbs) -- -------------------------------------------------------------------- -- Searching ShortByteString -- | /O(1)/ 'ShortByteString' index (subscript) operator, starting from 0. index :: HasCallStack => ShortByteString -> Int -- ^ number of 'Word16' -> Word16 index = \(assertEven -> sbs) i -> if | i >= 0 && i < numWord16 sbs -> unsafeIndex sbs i | otherwise -> indexError sbs i -- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: -- -- > 0 <= n < length bs -- -- @since 0.11.0.0 indexMaybe :: ShortByteString -> Int -- ^ number of 'Word16' -> Maybe Word16 indexMaybe = \(assertEven -> sbs) i -> if | i >= 0 && i < numWord16 sbs -> Just $! unsafeIndex sbs i | otherwise -> Nothing {-# INLINE indexMaybe #-} unsafeIndex :: ShortByteString -> Int -- ^ number of 'Word16' -> Word16 unsafeIndex sbs i = indexWord16Array (asBA sbs) (i * 2) indexError :: HasCallStack => ShortByteString -> Int -> a indexError sbs i = moduleError "index" $ "error in array index: " ++ show i ++ " not in range [0.." ++ show (numWord16 sbs) ++ "]" -- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: -- -- > 0 <= n < length bs -- -- @since 0.11.0.0 (!?) :: ShortByteString -> Int -- ^ number of 'Word16' -> Maybe Word16 (!?) = indexMaybe {-# INLINE (!?) #-} -- | /O(n)/ 'elem' is the 'ShortByteString' membership predicate. elem :: Word16 -> ShortByteString -> Bool elem c = \(assertEven -> sbs) -> case elemIndex c sbs of Nothing -> False ; _ -> True -- | /O(n)/ 'filter', applied to a predicate and a ByteString, -- returns a ByteString containing those characters that satisfy the -- predicate. filter :: (Word16 -> Bool) -> ShortByteString -> ShortByteString filter k = \(assertEven -> sbs) -> let l = BS.length sbs in if | l <= 0 -> sbs | otherwise -> createAndTrim' l $ \mba -> go mba (asBA sbs) l where go :: forall s. MBA s -- mutable output bytestring -> BA -- input bytestring -> Int -- length of input bytestring -> ST s Int go !mba ba !l = go' 0 0 where go' :: Int -- bytes read -> Int -- bytes written -> ST s Int go' !br !bw | br >= l = return bw | otherwise = do let w = indexWord16Array ba br if k w then do writeWord16Array mba bw w go' (br+2) (bw+2) else go' (br+2) bw -- | /O(n)/ The 'find' function takes a predicate and a ByteString, -- and returns the first element in matching the predicate, or 'Nothing' -- if there is no such element. -- -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing -- find :: (Word16 -> Bool) -> ShortByteString -> Maybe Word16 find f = \(assertEven -> sbs) -> case findIndex f sbs of Just n -> Just (sbs `index` n) _ -> Nothing -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns -- the pair of ByteStrings with elements which do and do not satisfy the -- predicate, respectively; i.e., -- -- > partition p bs == (filter p xs, filter (not . p) xs) -- partition :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) partition k = \(assertEven -> sbs) -> let l = BS.length sbs in if | l <= 0 -> (sbs, sbs) | otherwise -> createAndTrim'' l $ \mba1 mba2 -> go mba1 mba2 (asBA sbs) l where go :: forall s. MBA s -- mutable output bytestring1 -> MBA s -- mutable output bytestring2 -> BA -- input bytestring -> Int -- length of input bytestring -> ST s (Int, Int) -- (length mba1, length mba2) go !mba1 !mba2 ba !l = go' 0 0 where go' :: Int -- bytes read -> Int -- bytes written to bytestring 1 -> ST s (Int, Int) -- (length mba1, length mba2) go' !br !bw1 | br >= l = return (bw1, br - bw1) | otherwise = do let w = indexWord16Array ba br if k w then do writeWord16Array mba1 bw1 w go' (br+2) (bw1+2) else do writeWord16Array mba2 (br - bw1) w go' (br+2) bw1 -- -------------------------------------------------------------------- -- Indexing ShortByteString -- | /O(n)/ The 'elemIndex' function returns the index of the first -- element in the given 'ShortByteString' which is equal to the query -- element, or 'Nothing' if there is no such element. elemIndex :: Word16 -> ShortByteString -> Maybe Int -- ^ number of 'Word16' {- HLINT ignore "Use elemIndex" -} elemIndex k = findIndex (==k) . assertEven -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. elemIndices :: Word16 -> ShortByteString -> [Int] {- HLINT ignore "Use elemIndices" -} elemIndices k = findIndices (==k) . assertEven -- | count returns the number of times its argument appears in the ShortByteString count :: Word16 -> ShortByteString -> Int count w = List.length . elemIndices w . assertEven -- | /O(n)/ The 'findIndex' function takes a predicate and a 'ShortByteString' and -- returns the index of the first element in the ByteString -- satisfying the predicate. findIndex :: (Word16 -> Bool) -> ShortByteString -> Maybe Int findIndex k = \(assertEven -> sbs) -> let l = BS.length sbs ba = asBA sbs w = indexWord16Array ba go !n | n >= l = Nothing | k (w n) = Just (n `shiftR` 1) | otherwise = go (n + 2) in go 0 -- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the -- indices of all elements satisfying the predicate, in ascending order. findIndices :: (Word16 -> Bool) -> ShortByteString -> [Int] findIndices k = \(assertEven -> sbs) -> let l = BS.length sbs ba = asBA sbs w = indexWord16Array ba go !n | n >= l = [] | k (w n) = (n `shiftR` 1) : go (n + 2) | otherwise = go (n + 2) in go 0 os-string-2.0.6/System/OsString/Encoding.hs0000644000000000000000000000105207346545000016751 0ustar0000000000000000module System.OsString.Encoding ( -- * Types EncodingException(..) , showEncodingException -- * UCS-2 , ucs2le , mkUcs2le , ucs2le_DF , ucs2le_EF , ucs2le_decode , ucs2le_encode -- * UTF-16LE_b , utf16le_b , mkUTF16le_b , utf16le_b_DF , utf16le_b_EF , utf16le_b_decode , utf16le_b_encode -- * base encoding , encodeWithBasePosix , decodeWithBasePosix , encodeWithBasePosix' , decodeWithBasePosix' , encodeWithBaseWindows , decodeWithBaseWindows ) where import System.OsString.Encoding.Internal os-string-2.0.6/System/OsString/Encoding/0000755000000000000000000000000007346545000016417 5ustar0000000000000000os-string-2.0.6/System/OsString/Encoding/Internal.hs0000644000000000000000000003341207346545000020532 0ustar0000000000000000{-# LANGUAGE NoImplicitPrelude , BangPatterns , TypeApplications , MultiWayIf #-} {-# OPTIONS_GHC -funbox-strict-fields #-} module System.OsString.Encoding.Internal where import qualified System.OsString.Data.ByteString.Short as BS8 import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 import GHC.Base import GHC.Real import GHC.Num -- import GHC.IO import GHC.IO.Buffer import GHC.IO.Encoding.Failure import GHC.IO.Encoding.Types import Data.Bits import Control.Exception (SomeException, try, Exception (displayException), evaluate) import qualified GHC.Foreign as GHC import Data.Either (Either) import GHC.IO (unsafePerformIO) import Control.DeepSeq (force, NFData (rnf)) import Data.Bifunctor (first) import Data.Data (Typeable) import GHC.Show (Show (show)) import Numeric (showHex) import Foreign.C (CStringLen) import Data.Char (chr) import Foreign import GHC.IO.Encoding (getFileSystemEncoding, getLocaleEncoding) -- ----------------------------------------------------------------------------- -- UCS-2 LE -- ucs2le :: TextEncoding ucs2le = mkUcs2le ErrorOnCodingFailure mkUcs2le :: CodingFailureMode -> TextEncoding mkUcs2le cfm = TextEncoding { textEncodingName = "UCS-2LE", mkTextDecoder = ucs2le_DF cfm, mkTextEncoder = ucs2le_EF cfm } ucs2le_DF :: CodingFailureMode -> IO (TextDecoder ()) ucs2le_DF cfm = return (BufferCodec { encode = ucs2le_decode, recover = recoverDecode cfm, close = return (), getState = return (), setState = const $ return () }) ucs2le_EF :: CodingFailureMode -> IO (TextEncoder ()) ucs2le_EF cfm = return (BufferCodec { encode = ucs2le_encode, recover = recoverEncode cfm, close = return (), getState = return (), setState = const $ return () }) ucs2le_decode :: DecodeBuffer ucs2le_decode input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } = let loop !ir !ow | ow >= os = done OutputUnderflow ir ow | ir >= iw = done InputUnderflow ir ow | ir + 1 == iw = done InputUnderflow ir ow | otherwise = do c0 <- readWord8Buf iraw ir c1 <- readWord8Buf iraw (ir+1) let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 ow' <- writeCharBuf oraw ow (unsafeChr x1) loop (ir+2) ow' -- lambda-lifted, to avoid thunks being built in the inner-loop: done why !ir !ow = return (why, if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }, output{ bufR=ow }) in loop ir0 ow0 ucs2le_encode :: EncodeBuffer ucs2le_encode input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } = let done why !ir !ow = return (why, if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }, output{ bufR=ow }) loop !ir !ow | ir >= iw = done InputUnderflow ir ow | os - ow < 2 = done OutputUnderflow ir ow | otherwise = do (c,ir') <- readCharBuf iraw ir case ord c of x | x < 0x10000 -> do writeWord8Buf oraw ow (fromIntegral x) writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) loop ir' (ow+2) | otherwise -> done InvalidSequence ir ow in loop ir0 ow0 -- ----------------------------------------------------------------------------- -- UTF-16b -- -- | Mimics the base encoding for filesystem operations. This should be total on all inputs (word16 byte arrays). -- -- Note that this has a subtle difference to 'encodeWithBaseWindows'/'decodeWithBaseWindows': it doesn't care for -- the @0x0000@ end marker and will as such produce different results. Use @takeWhile (/= '\NUL')@ on the input -- to recover this behavior. utf16le_b :: TextEncoding utf16le_b = mkUTF16le_b ErrorOnCodingFailure mkUTF16le_b :: CodingFailureMode -> TextEncoding mkUTF16le_b cfm = TextEncoding { textEncodingName = "UTF-16LE_b", mkTextDecoder = utf16le_b_DF cfm, mkTextEncoder = utf16le_b_EF cfm } utf16le_b_DF :: CodingFailureMode -> IO (TextDecoder ()) utf16le_b_DF cfm = return (BufferCodec { encode = utf16le_b_decode, recover = recoverDecode cfm, close = return (), getState = return (), setState = const $ return () }) utf16le_b_EF :: CodingFailureMode -> IO (TextEncoder ()) utf16le_b_EF cfm = return (BufferCodec { encode = utf16le_b_encode, recover = recoverEncode cfm, close = return (), getState = return (), setState = const $ return () }) utf16le_b_decode :: DecodeBuffer utf16le_b_decode input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } = let loop !ir !ow | ow >= os = done OutputUnderflow ir ow | ir >= iw = done InputUnderflow ir ow | ir + 1 == iw = done InputUnderflow ir ow | otherwise = do c0 <- readWord8Buf iraw ir c1 <- readWord8Buf iraw (ir+1) let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 if | iw - ir >= 4 -> do c2 <- readWord8Buf iraw (ir+2) c3 <- readWord8Buf iraw (ir+3) let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2 if | 0xd800 <= x1 && x1 <= 0xdbff , 0xdc00 <= x2 && x2 <= 0xdfff -> do ow' <- writeCharBuf oraw ow (unsafeChr ((x1 - 0xd800)*0x400 + (x2 - 0xdc00) + 0x10000)) loop (ir+4) ow' | otherwise -> do ow' <- writeCharBuf oraw ow (unsafeChr x1) loop (ir+2) ow' | iw - ir >= 2 -> do ow' <- writeCharBuf oraw ow (unsafeChr x1) loop (ir+2) ow' | otherwise -> done InputUnderflow ir ow -- lambda-lifted, to avoid thunks being built in the inner-loop: done why !ir !ow = return (why, if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }, output{ bufR=ow }) in loop ir0 ow0 utf16le_b_encode :: EncodeBuffer utf16le_b_encode input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } = let done why !ir !ow = return (why, if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }, output{ bufR=ow }) loop !ir !ow | ir >= iw = done InputUnderflow ir ow | os - ow < 2 = done OutputUnderflow ir ow | otherwise = do (c,ir') <- readCharBuf iraw ir case ord c of x | x < 0x10000 -> do writeWord8Buf oraw ow (fromIntegral x) writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) loop ir' (ow+2) | otherwise -> if os - ow < 4 then done OutputUnderflow ir ow else do let x' = x - 0x10000 w1 = x' `div` 0x400 + 0xd800 w2 = x' `mod` 0x400 + 0xdc00 writeWord8Buf oraw ow (fromIntegral w1) writeWord8Buf oraw (ow+1) (fromIntegral (w1 `shiftR` 8)) writeWord8Buf oraw (ow+2) (fromIntegral w2) writeWord8Buf oraw (ow+3) (fromIntegral (w2 `shiftR` 8)) loop ir' (ow+4) in loop ir0 ow0 -- ----------------------------------------------------------------------------- -- Windows encoding (ripped off from base) -- cWcharsToChars_UCS2 :: [Word16] -> [Char] cWcharsToChars_UCS2 = map (chr . fromIntegral) -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding. -- coding errors generate Chars in the surrogate range cWcharsToChars :: [Word16] -> [Char] cWcharsToChars = map chr . fromUTF16 . map fromIntegral where fromUTF16 :: [Int] -> [Int] fromUTF16 (c1:c2:wcs) | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff = ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs fromUTF16 (c:wcs) = c : fromUTF16 wcs fromUTF16 [] = [] charsToCWchars :: [Char] -> [Word16] charsToCWchars = foldr (utf16Char . ord) [] where utf16Char :: Int -> [Word16] -> [Word16] utf16Char c wcs | c < 0x10000 = fromIntegral c : wcs | otherwise = let c' = c - 0x10000 in fromIntegral (c' `div` 0x400 + 0xd800) : fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs -- ----------------------------------------------------------------------------- -- ----------------------------------------------------------------------------- -- FFI -- withWindowsString :: String -> (Int -> Ptr Word16 -> IO a) -> IO a withWindowsString = withArrayLen . charsToCWchars peekWindowsString :: (Ptr Word16, Int) -> IO String peekWindowsString (cp, l) = do cs <- peekArray l cp return (cWcharsToChars cs) withPosixString :: String -> (CStringLen -> IO a) -> IO a withPosixString fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f withPosixString' :: String -> (CStringLen -> IO a) -> IO a withPosixString' fp f = getLocaleEncoding >>= \enc -> GHC.withCStringLen enc fp f peekPosixString :: CStringLen -> IO String peekPosixString fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp peekPosixString' :: CStringLen -> IO String peekPosixString' fp = getLocaleEncoding >>= \enc -> GHC.peekCStringLen enc fp -- | Decode with the given 'TextEncoding'. decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String decodeWithTE enc ba = unsafePerformIO $ do r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp evaluate $ force $ first (flip EncodingError Nothing . displayException) r -- | Encode with the given 'TextEncoding'. encodeWithTE :: TextEncoding -> String -> Either EncodingException BS8.ShortByteString encodeWithTE enc str = unsafePerformIO $ do r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr evaluate $ force $ first (flip EncodingError Nothing . displayException) r -- ----------------------------------------------------------------------------- -- Encoders / decoders -- -- | This mimics the filepath decoder base uses on unix (using PEP-383), -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). decodeWithBasePosix :: BS8.ShortByteString -> IO String decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekPosixString fp -- | This mimics the string decoder base uses on unix, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). decodeWithBasePosix' :: BS8.ShortByteString -> IO String decodeWithBasePosix' ba = BS8.useAsCStringLen ba $ \fp -> peekPosixString' fp -- | This mimics the filepath encoder base uses on unix (using PEP-383), -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). encodeWithBasePosix :: String -> IO BS8.ShortByteString encodeWithBasePosix str = withPosixString str $ \cstr -> BS8.packCStringLen cstr -- | This mimics the string encoder base uses on unix, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). encodeWithBasePosix' :: String -> IO BS8.ShortByteString encodeWithBasePosix' str = withPosixString' str $ \cstr -> BS8.packCStringLen cstr -- | This mimics the filepath decoder base uses on windows, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). decodeWithBaseWindows :: BS16.ShortByteString -> IO String decodeWithBaseWindows ba = BS16.useAsCWStringLen ba $ \fp -> peekWindowsString fp -- | This mimics the filepath dencoder base uses on windows, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). encodeWithBaseWindows :: String -> IO BS16.ShortByteString encodeWithBaseWindows str = withWindowsString str $ \l cstr -> BS16.packCWStringLen (cstr, l) -- ----------------------------------------------------------------------------- -- Types -- data EncodingException = EncodingError String (Maybe Word8) -- ^ Could not decode a byte sequence because it was invalid under -- the given encoding, or ran out of input in mid-decode. deriving (Eq, Typeable) showEncodingException :: EncodingException -> String showEncodingException (EncodingError desc (Just w)) = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc) showEncodingException (EncodingError desc Nothing) = "Cannot decode input: " ++ desc instance Show EncodingException where show = showEncodingException instance Exception EncodingException instance NFData EncodingException where rnf (EncodingError desc w) = rnf desc `seq` rnf w -- ----------------------------------------------------------------------------- -- Words -- wNUL :: Word16 wNUL = 0x00 os-string-2.0.6/System/OsString/Internal.hs0000644000000000000000000006262007346545000017007 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UnliftedFFITypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TemplateHaskellQuotes #-} {-# LANGUAGE ViewPatterns #-} -- needed to quote a view pattern module System.OsString.Internal where import System.OsString.Internal.Types import Control.Monad.Catch ( MonadThrow ) import Data.ByteString ( ByteString ) import Data.Char import Language.Haskell.TH.Quote ( QuasiQuoter (..) ) import Language.Haskell.TH.Syntax ( Lift (..), lift ) import System.IO ( TextEncoding ) import System.OsString.Encoding ( EncodingException(..) ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) #if defined(mingw32_HOST_OS) || defined(__MINGW32__) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import System.OsString.Encoding ( encodeWithBaseWindows, decodeWithBaseWindows ) import qualified System.OsString.Windows as PF #else import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import System.OsString.Encoding ( encodeWithBasePosix, decodeWithBasePosix ) import qualified System.OsString.Posix as PF #endif import GHC.Stack (HasCallStack) import Data.Coerce (coerce) import Data.Type.Coercion (coerceWith) -- | Partial unicode friendly encoding. -- -- On windows this encodes as UTF16-LE (strictly), which is a pretty good guess. -- On unix this encodes as UTF8 (strictly), which is a good guess. -- -- Throws an 'EncodingException' if encoding fails. If the input does not -- contain surrogate chars, you can use 'unsafeEncodeUtf'. encodeUtf :: MonadThrow m => String -> m OsString encodeUtf = fmap OsString . PF.encodeUtf -- | Unsafe unicode friendly encoding. -- -- Like 'encodeUtf', except it crashes when the input contains -- surrogate chars. For sanitized input, this can be useful. unsafeEncodeUtf :: HasCallStack => String -> OsString unsafeEncodeUtf = OsString . PF.unsafeEncodeUtf -- | Encode a 'FilePath' with the specified encoding. -- -- Note: on windows, we expect a "wide char" encoding (e.g. UCS-2 or UTF-16). Anything -- that works with @Word16@ boundaries. Picking an incompatible encoding may crash -- filepath operations. encodeWith :: TextEncoding -- ^ unix text encoding -> TextEncoding -- ^ windows text encoding (wide char) -> String -> Either EncodingException OsString #if defined(mingw32_HOST_OS) || defined(__MINGW32__) encodeWith _ winEnc str = OsString <$> PF.encodeWith winEnc str #else encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str #endif -- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which is: -- -- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck) -- 2. on windows does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). encodeFS :: String -> IO OsString #if defined(mingw32_HOST_OS) || defined(__MINGW32__) encodeFS = fmap (OsString . WindowsString) . encodeWithBaseWindows #else encodeFS = fmap (OsString . PosixString) . encodeWithBasePosix #endif -- | Like 'encodeUtf', except this mimics the behavior of the base library when doing string -- operations, which is: -- -- 1. on unix this uses 'getLocaleEncoding' -- 2. on windows does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). encodeLE :: String -> IO OsString encodeLE = fmap OsString . PF.encodeLE -- | Partial unicode friendly decoding. -- -- On windows this decodes as UTF16-LE (strictly), which is a pretty good guess. -- On unix this decodes as UTF8 (strictly), which is a good guess. Note that -- filenames on unix are encoding agnostic char arrays. -- -- Throws a 'EncodingException' if decoding fails. decodeUtf :: MonadThrow m => OsString -> m String decodeUtf (OsString x) = PF.decodeUtf x -- | Decode an 'OsString' with the specified encoding. -- -- The String is forced into memory to catch all exceptions. decodeWith :: TextEncoding -- ^ unix text encoding -> TextEncoding -- ^ windows text encoding -> OsString -> Either EncodingException String #if defined(mingw32_HOST_OS) || defined(__MINGW32__) decodeWith _ winEnc (OsString x) = PF.decodeWith winEnc x #else decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x #endif -- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem -- operations (usually filepaths), which is: -- -- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, -- but PEP 383 only works properly on UTF-8 encodings, so good luck) -- 2. on windows does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). decodeFS :: OsString -> IO String #if defined(mingw32_HOST_OS) || defined(__MINGW32__) decodeFS (OsString (WindowsString x)) = decodeWithBaseWindows x #else decodeFS (OsString (PosixString x)) = decodeWithBasePosix x #endif -- | Like 'decodeUtf', except this mimics the behavior of the base library when doing string operations, -- which is: -- -- 1. on unix this uses 'getLocaleEncoding' -- 2. on windows does permissive UTF-16 encoding, where coding errors generate -- Chars in the surrogate range -- -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure -- to deeply evaluate the result to catch exceptions). decodeLE :: OsString -> IO String decodeLE (OsString x) = PF.decodeLE x -- | Constructs an @OsString@ from a ByteString. -- -- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. -- -- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). fromBytes :: MonadThrow m => ByteString -> m OsString fromBytes = fmap OsString . PF.fromBytes -- | QuasiQuote an 'OsString'. This accepts Unicode characters -- and encodes as UTF-8 on unix and UTF-16 on windows. -- If used as pattern, requires turning on the @ViewPatterns@ extension. osstr :: QuasiQuoter osstr = QuasiQuoter #if defined(mingw32_HOST_OS) || defined(__MINGW32__) { quoteExp = \s -> do osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s lift osp , quotePat = \s -> do osp' <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s [p|((==) osp' -> True)|] , quoteType = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a type)" , quoteDec = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a declaration)" } #else { quoteExp = \s -> do osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s lift osp , quotePat = \s -> do osp' <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s [p|((==) osp' -> True)|] , quoteType = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a type)" , quoteDec = \_ -> fail "illegal QuasiQuote (allowed as expression or pattern only, used as a declaration)" } #endif -- | Unpack an 'OsString' to a list of 'OsChar'. unpack :: OsString -> [OsChar] unpack = coerce PF.unpack -- | Pack a list of 'OsChar' to an 'OsString' -- -- Note that using this in conjunction with 'unsafeFromChar' to -- convert from @[Char]@ to 'OsString' is probably not what -- you want, because it will truncate unicode code points. pack :: [OsChar] -> OsString pack = coerce PF.pack empty :: OsString empty = mempty singleton :: OsChar -> OsString singleton = coerce PF.singleton -- | Truncates on unix to 1 and on Windows to 2 octets. unsafeFromChar :: Char -> OsChar unsafeFromChar = coerce PF.unsafeFromChar -- | Converts back to a unicode codepoint (total). toChar :: OsChar -> Char toChar = case coercionToPlatformTypes of Left (co, _) -> chr . fromIntegral . getWindowsChar . coerceWith co Right (co, _) -> chr . fromIntegral . getPosixChar . coerceWith co -- | /O(n)/ Append a byte to the end of a 'OsString' -- -- @since 1.4.200.0 snoc :: OsString -> OsChar -> OsString snoc = coerce PF.snoc -- | /O(n)/ 'cons' is analogous to (:) for lists. -- -- @since 1.4.200.0 cons :: OsChar -> OsString -> OsString cons = coerce PF.cons -- | /O(1)/ Extract the last element of a OsString, which must be finite and non-empty. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'unsnoc' instead. -- -- @since 1.4.200.0 last :: HasCallStack => OsString -> OsChar last = coerce PF.last -- | /O(n)/ Extract the elements after the head of a OsString, which must be non-empty. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'uncons' instead. -- -- @since 1.4.200.0 tail :: HasCallStack => OsString -> OsString tail = coerce PF.tail -- | /O(n)/ Extract the 'head' and 'tail' of a OsString, returning 'Nothing' -- if it is empty. -- -- @since 1.4.200.0 uncons :: OsString -> Maybe (OsChar, OsString) uncons = coerce PF.uncons -- | /O(1)/ Extract the first element of a OsString, which must be non-empty. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'uncons' instead. -- -- @since 1.4.200.0 head :: HasCallStack => OsString -> OsChar head = coerce PF.head -- | /O(n)/ Return all the elements of a 'OsString' except the last one. -- An exception will be thrown in the case of an empty OsString. -- -- This is a partial function, consider using 'unsnoc' instead. -- -- @since 1.4.200.0 init :: HasCallStack => OsString -> OsString init = coerce PF.init -- | /O(n)/ Extract the 'init' and 'last' of a OsString, returning 'Nothing' -- if it is empty. -- -- @since 1.4.200.0 unsnoc :: OsString -> Maybe (OsString, OsChar) unsnoc = coerce PF.unsnoc -- | /O(1)/ Test whether a 'OsString' is empty. -- -- @since 1.4.200.0 null :: OsString -> Bool null = coerce PF.null -- | /O(1)/ The length of a 'OsString'. -- -- @since 1.4.200.0 length :: OsString -> Int length = coerce PF.length -- | /O(n)/ 'map' @f xs@ is the OsString obtained by applying @f@ to each -- element of @xs@. -- -- @since 1.4.200.0 map :: (OsChar -> OsChar) -> OsString -> OsString map = coerce PF.map -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. -- -- @since 1.4.200.0 reverse :: OsString -> OsString reverse = coerce PF.reverse -- | /O(n)/ The 'intercalate' function takes a 'OsString' and a list of -- 'OsString's and concatenates the list after interspersing the first -- argument between each element of the list. -- -- @since 1.4.200.0 intercalate :: OsString -> [OsString] -> OsString intercalate = coerce PF.intercalate -- | 'foldl', applied to a binary operator, a starting value (typically -- the left-identity of the operator), and a OsString, reduces the -- OsString using the binary operator, from left to right. -- -- @since 1.4.200.0 foldl :: forall a. (a -> OsChar -> a) -> a -> OsString -> a foldl = coerce (PF.foldl @a) -- | 'foldl'' is like 'foldl', but strict in the accumulator. -- -- @since 1.4.200.0 foldl' :: forall a. (a -> OsChar -> a) -> a -> OsString -> a foldl' = coerce (PF.foldl' @a) -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'OsString's. -- An exception will be thrown in the case of an empty OsString. -- -- @since 1.4.200.0 foldl1 :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar foldl1 = coerce PF.foldl1 -- | 'foldl1'' is like 'foldl1', but strict in the accumulator. -- An exception will be thrown in the case of an empty OsString. -- -- @since 1.4.200.0 foldl1' :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar foldl1' = coerce PF.foldl1' -- | 'foldr', applied to a binary operator, a starting value -- (typically the right-identity of the operator), and a OsString, -- reduces the OsString using the binary operator, from right to left. -- -- @since 1.4.200.0 foldr :: forall a. (OsChar -> a -> a) -> a -> OsString -> a foldr = coerce (PF.foldr @a) -- | 'foldr'' is like 'foldr', but strict in the accumulator. -- -- @since 1.4.200.0 foldr' :: forall a. (OsChar -> a -> a) -> a -> OsString -> a foldr' = coerce (PF.foldr' @a) -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, -- and thus must be applied to non-empty 'OsString's -- An exception will be thrown in the case of an empty OsString. -- -- @since 1.4.200.0 foldr1 :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar foldr1 = coerce PF.foldr1 -- | 'foldr1'' is a variant of 'foldr1', but is strict in the -- accumulator. -- -- @since 1.4.200.0 foldr1' :: (OsChar -> OsChar -> OsChar) -> OsString -> OsChar foldr1' = coerce PF.foldr1' -- | /O(n)/ Applied to a predicate and a 'OsString', 'all' determines -- if all elements of the 'OsString' satisfy the predicate. -- -- @since 1.4.200.0 all :: (OsChar -> Bool) -> OsString -> Bool all = coerce PF.all -- | /O(n)/ Applied to a predicate and a 'OsString', 'any' determines if -- any element of the 'OsString' satisfies the predicate. -- -- @since 1.4.200.0 any :: (OsChar -> Bool) -> OsString -> Bool any = coerce PF.any -- /O(n)/ Concatenate a list of OsStrings. -- -- @since 1.4.200.0 concat :: [OsString] -> OsString concat = mconcat -- | /O(n)/ 'replicate' @n x@ is a OsString of length @n@ with @x@ -- the value of every element. The following holds: -- -- > replicate w c = unfoldr w (\u -> Just (u,u)) c -- -- @since 1.4.200.0 replicate :: Int -> OsChar -> OsString replicate = coerce PF.replicate -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a -- OsString from a seed value. The function takes the element and -- returns 'Nothing' if it is done producing the OsString or returns -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, -- and @b@ is the seed value for further production. -- -- This function is not efficient/safe. It will build a list of @[Word8]@ -- and run the generator until it returns `Nothing`, otherwise recurse infinitely, -- then finally create a 'OsString'. -- -- If you know the maximum length, consider using 'unfoldrN'. -- -- Examples: -- -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 -- > == pack [0, 1, 2, 3, 4, 5] -- -- @since 1.4.200.0 unfoldr :: forall a. (a -> Maybe (OsChar, a)) -> a -> OsString unfoldr = coerce (PF.unfoldr @a) -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a OsString from a seed -- value. However, the length of the result is limited by the first -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' -- when the maximum length of the result is known. -- -- The following equation relates 'unfoldrN' and 'unfoldr': -- -- > fst (unfoldrN n f s) == take n (unfoldr f s) -- -- @since 1.4.200.0 unfoldrN :: forall a. Int -> (a -> Maybe (OsChar, a)) -> a -> (OsString, Maybe a) unfoldrN = coerce (PF.unfoldrN @a) -- | /O(n)/ 'take' @n@, applied to a OsString @xs@, returns the prefix -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. -- -- @since 1.4.200.0 take :: Int -> OsString -> OsString take = coerce PF.take -- | /O(n)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. -- Takes @n@ elements from end of bytestring. -- -- >>> takeEnd 3 "abcdefg" -- "efg" -- >>> takeEnd 0 "abcdefg" -- "" -- >>> takeEnd 4 "abc" -- "abc" -- -- @since 1.4.200.0 takeEnd :: Int -> OsString -> OsString takeEnd = coerce PF.takeEnd -- | Returns the longest (possibly empty) suffix of elements -- satisfying the predicate. -- -- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. -- -- @since 1.4.200.0 takeWhileEnd :: (OsChar -> Bool) -> OsString -> OsString takeWhileEnd = coerce PF.takeWhileEnd -- | Similar to 'Prelude.takeWhile', -- returns the longest (possibly empty) prefix of elements -- satisfying the predicate. -- -- @since 1.4.200.0 takeWhile :: (OsChar -> Bool) -> OsString -> OsString takeWhile = coerce PF.takeWhile -- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or 'empty' if @n > 'length' xs@. -- -- @since 1.4.200.0 drop :: Int -> OsString -> OsString drop = coerce PF.drop -- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. -- Drops @n@ elements from end of bytestring. -- -- >>> dropEnd 3 "abcdefg" -- "abcd" -- >>> dropEnd 0 "abcdefg" -- "abcdefg" -- >>> dropEnd 4 "abc" -- "" -- -- @since 1.4.200.0 dropEnd :: Int -> OsString -> OsString dropEnd = coerce PF.dropEnd -- | Similar to 'Prelude.dropWhile', -- drops the longest (possibly empty) prefix of elements -- satisfying the predicate and returns the remainder. -- -- @since 1.4.200.0 dropWhile :: (OsChar -> Bool) -> OsString -> OsString dropWhile = coerce PF.dropWhile -- | Similar to 'Prelude.dropWhileEnd', -- drops the longest (possibly empty) suffix of elements -- satisfying the predicate and returns the remainder. -- -- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. -- -- @since 1.4.200.0 dropWhileEnd :: (OsChar -> Bool) -> OsString -> OsString dropWhileEnd = coerce PF.dropWhileEnd -- | Returns the longest (possibly empty) suffix of elements which __do not__ -- satisfy the predicate and the remainder of the string. -- -- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. -- -- @since 1.4.200.0 breakEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString) breakEnd = coerce PF.breakEnd -- | Similar to 'Prelude.break', -- returns the longest (possibly empty) prefix of elements which __do not__ -- satisfy the predicate and the remainder of the string. -- -- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. -- -- @since 1.4.200.0 break :: (OsChar -> Bool) -> OsString -> (OsString, OsString) break = coerce PF.break -- | Similar to 'Prelude.span', -- returns the longest (possibly empty) prefix of elements -- satisfying the predicate and the remainder of the string. -- -- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. -- -- @since 1.4.200.0 span :: (OsChar -> Bool) -> OsString -> (OsString, OsString) span = coerce PF.span -- | Returns the longest (possibly empty) suffix of elements -- satisfying the predicate and the remainder of the string. -- -- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. -- -- We have -- -- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") -- -- and -- -- > spanEnd (not . isSpace) sbs -- > == -- > let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x) -- -- @since 1.4.200.0 spanEnd :: (OsChar -> Bool) -> OsString -> (OsString, OsString) spanEnd = coerce PF.spanEnd -- | /O(n)/ 'splitAt' @n sbs@ is equivalent to @('take' n sbs, 'drop' n sbs)@. -- -- @since 1.4.200.0 splitAt :: Int -> OsString -> (OsString, OsString) splitAt = coerce PF.splitAt -- | /O(n)/ Break a 'OsString' into pieces separated by the byte -- argument, consuming the delimiter. I.e. -- -- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 -- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 -- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 -- > split undefined "" == [] -- and not [""] -- -- and -- -- > intercalate [c] . split c == id -- > split == splitWith . (==) -- -- @since 1.4.200.0 split :: OsChar -> OsString -> [OsString] split = coerce PF.split -- | /O(n)/ Splits a 'OsString' into components delimited by -- separators, where the predicate returns True for a separator element. -- The resulting components do not contain the separators. Two adjacent -- separators result in an empty component in the output. eg. -- -- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 -- > splitWith undefined "" == [] -- and not [""] -- -- @since 1.4.200.0 splitWith :: (OsChar -> Bool) -> OsString -> [OsString] splitWith = coerce PF.splitWith -- | /O(n)/ The 'stripSuffix' function takes two OsStrings and returns 'Just' -- the remainder of the second iff the first is its suffix, and otherwise -- 'Nothing'. -- -- @since 1.4.200.0 stripSuffix :: OsString -> OsString -> Maybe OsString stripSuffix = coerce PF.stripSuffix -- | /O(n)/ The 'stripPrefix' function takes two OsStrings and returns 'Just' -- the remainder of the second iff the first is its prefix, and otherwise -- 'Nothing'. -- -- @since 1.4.200.0 stripPrefix :: OsString -> OsString -> Maybe OsString stripPrefix = coerce PF.stripPrefix -- | Check whether one string is a substring of another. -- -- @since 1.4.200.0 isInfixOf :: OsString -> OsString -> Bool isInfixOf = coerce PF.isInfixOf -- |/O(n)/ The 'isPrefixOf' function takes two OsStrings and returns 'True' -- -- @since 1.4.200.0 isPrefixOf :: OsString -> OsString -> Bool isPrefixOf = coerce PF.isPrefixOf -- | /O(n)/ The 'isSuffixOf' function takes two OsStrings and returns 'True' -- iff the first is a suffix of the second. -- -- The following holds: -- -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y -- -- @since 1.4.200.0 isSuffixOf :: OsString -> OsString -> Bool isSuffixOf = coerce PF.isSuffixOf -- | Break a string on a substring, returning a pair of the part of the -- string prior to the match, and the rest of the string. -- -- The following relationships hold: -- -- > break (== c) l == breakSubstring (singleton c) l -- -- For example, to tokenise a string, dropping delimiters: -- -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) -- > where (h,t) = breakSubstring x y -- -- To skip to the first occurrence of a string: -- -- > snd (breakSubstring x y) -- -- To take the parts of a string before a delimiter: -- -- > fst (breakSubstring x y) -- -- Note that calling `breakSubstring x` does some preprocessing work, so -- you should avoid unnecessarily duplicating breakSubstring calls with the same -- pattern. -- -- @since 1.4.200.0 breakSubstring :: OsString -> OsString -> (OsString, OsString) breakSubstring = coerce PF.breakSubstring -- | /O(n)/ 'elem' is the 'OsString' membership predicate. -- -- @since 1.4.200.0 elem :: OsChar -> OsString -> Bool elem = coerce PF.elem -- | /O(n)/ The 'find' function takes a predicate and a OsString, -- and returns the first element in matching the predicate, or 'Nothing' -- if there is no such element. -- -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing -- -- @since 1.4.200.0 find :: (OsChar -> Bool) -> OsString -> Maybe OsChar find = coerce PF.find -- | /O(n)/ 'filter', applied to a predicate and a OsString, -- returns a OsString containing those characters that satisfy the -- predicate. -- -- @since 1.4.200.0 filter :: (OsChar -> Bool) -> OsString -> OsString filter = coerce PF.filter -- | /O(n)/ The 'partition' function takes a predicate a OsString and returns -- the pair of OsStrings with elements which do and do not satisfy the -- predicate, respectively; i.e., -- -- > partition p bs == (filter p sbs, filter (not . p) sbs) -- -- @since 1.4.200.0 partition :: (OsChar -> Bool) -> OsString -> (OsString, OsString) partition = coerce PF.partition -- | /O(1)/ 'OsString' index (subscript) operator, starting from 0. -- -- @since 1.4.200.0 index :: HasCallStack => OsString -> Int -> OsChar index = coerce PF.index -- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: -- -- > 0 <= n < length bs -- -- @since 1.4.200.0 indexMaybe :: OsString -> Int -> Maybe OsChar indexMaybe = coerce PF.indexMaybe -- | /O(1)/ 'OsString' index, starting from 0, that returns 'Just' if: -- -- > 0 <= n < length bs -- -- @since 1.4.200.0 (!?) :: OsString -> Int -> Maybe OsChar (!?) = indexMaybe -- | /O(n)/ The 'elemIndex' function returns the index of the first -- element in the given 'OsString' which is equal to the query -- element, or 'Nothing' if there is no such element. -- -- @since 1.4.200.0 elemIndex :: OsChar -> OsString -> Maybe Int elemIndex = coerce PF.elemIndex -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning -- the indices of all elements equal to the query element, in ascending order. -- -- @since 1.4.200.0 elemIndices :: OsChar -> OsString -> [Int] elemIndices = coerce PF.elemIndices -- | count returns the number of times its argument appears in the OsString -- -- @since 1.4.200.0 count :: OsChar -> OsString -> Int count = coerce PF.count -- | /O(n)/ The 'findIndex' function takes a predicate and a 'OsString' and -- returns the index of the first element in the OsString -- satisfying the predicate. -- -- @since 1.4.200.0 findIndex :: (OsChar -> Bool) -> OsString -> Maybe Int findIndex = coerce PF.findIndex -- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the -- indices of all elements satisfying the predicate, in ascending order. -- -- @since 1.4.200.0 findIndices :: (OsChar -> Bool) -> OsString -> [Int] findIndices = coerce PF.findIndices os-string-2.0.6/System/OsString/Internal/0000755000000000000000000000000007346545000016445 5ustar0000000000000000os-string-2.0.6/System/OsString/Internal/Types.hs0000644000000000000000000001624507346545000020115 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TemplateHaskellQuotes #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE PatternSynonyms #-} module System.OsString.Internal.Types ( WindowsString(..) , pattern WS , unWS , PosixString(..) , unPS , pattern PS , PlatformString , WindowsChar(..) , unWW , pattern WW , PosixChar(..) , unPW , pattern PW , PlatformChar , OsString(..) , OsChar(..) , coercionToPlatformTypes ) where import Control.DeepSeq import Data.Coerce (coerce) import Data.Data import Data.Type.Coercion (Coercion(..), coerceWith) import Data.Word import Language.Haskell.TH.Syntax ( Lift (..), lift ) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup #endif import GHC.Generics (Generic) import System.OsString.Encoding.Internal import qualified System.OsString.Data.ByteString.Short as BS import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 import qualified Language.Haskell.TH.Syntax as TH -- Using unpinned bytearrays to avoid Heap fragmentation and -- which are reasonably cheap to pass to FFI calls -- wrapped with typeclass-friendly types allowing to avoid CPP -- -- Note that, while unpinned bytearrays incur a memcpy on each -- FFI call, this overhead is generally much preferable to -- the memory fragmentation of pinned bytearrays -- | Commonly used windows string as wide character bytes. newtype WindowsString = WindowsString { getWindowsString :: BS.ShortByteString } deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) -- | Decodes as UCS-2. instance Show WindowsString where -- cWcharsToChars_UCS2 is total show = show . cWcharsToChars_UCS2 . BS16.unpack . getWindowsString -- | Just a short bidirectional synonym for 'WindowsString' constructor. pattern WS :: BS.ShortByteString -> WindowsString pattern WS { unWS } <- WindowsString unWS where WS a = WindowsString a #if __GLASGOW_HASKELL__ >= 802 {-# COMPLETE WS #-} #endif instance Lift WindowsString where lift (WindowsString bs) = TH.AppE (TH.ConE 'WindowsString) <$> (lift bs) #if MIN_VERSION_template_haskell(2,17,0) liftTyped = TH.unsafeCodeCoerce . TH.lift #elif MIN_VERSION_template_haskell(2,16,0) liftTyped = TH.unsafeTExpCoerce . TH.lift #endif -- | Commonly used Posix string as uninterpreted @char[]@ -- array. newtype PosixString = PosixString { getPosixString :: BS.ShortByteString } deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) -- | Prints the raw bytes without decoding. instance Show PosixString where show (PosixString ps) = show ps -- | Just a short bidirectional synonym for 'PosixString' constructor. pattern PS :: BS.ShortByteString -> PosixString pattern PS { unPS } <- PosixString unPS where PS a = PosixString a #if __GLASGOW_HASKELL__ >= 802 {-# COMPLETE PS #-} #endif instance Lift PosixString where lift (PosixString bs) = TH.AppE (TH.ConE 'PosixString) <$> (lift bs) #if MIN_VERSION_template_haskell(2,17,0) liftTyped = TH.unsafeCodeCoerce . TH.lift #elif MIN_VERSION_template_haskell(2,16,0) liftTyped = TH.unsafeTExpCoerce . TH.lift #endif #if defined(mingw32_HOST_OS) || defined(__MINGW32__) type PlatformString = WindowsString #else type PlatformString = PosixString #endif newtype WindowsChar = WindowsChar { getWindowsChar :: Word16 } deriving (Eq, Ord, Typeable, Generic, NFData) instance Show WindowsChar where show (WindowsChar wc) = show wc newtype PosixChar = PosixChar { getPosixChar :: Word8 } deriving (Eq, Ord, Typeable, Generic, NFData) instance Show PosixChar where show (PosixChar pc) = show pc -- | Just a short bidirectional synonym for 'WindowsChar' constructor. pattern WW :: Word16 -> WindowsChar pattern WW { unWW } <- WindowsChar unWW where WW a = WindowsChar a #if __GLASGOW_HASKELL__ >= 802 {-# COMPLETE WW #-} #endif -- | Just a short bidirectional synonym for 'PosixChar' constructor. pattern PW :: Word8 -> PosixChar pattern PW { unPW } <- PosixChar unPW where PW a = PosixChar a #if __GLASGOW_HASKELL__ >= 802 {-# COMPLETE PW #-} #endif #if defined(mingw32_HOST_OS) || defined(__MINGW32__) type PlatformChar = WindowsChar #else type PlatformChar = PosixChar #endif -- | Newtype representing short operating system specific strings. -- -- Internally this is either 'WindowsString' or 'PosixString', -- depending on the platform. Both use unpinned -- 'ShortByteString' for efficiency. -- -- The constructor is only exported via "System.OsString.Internal.Types", since -- dealing with the internals isn't generally recommended, but supported -- in case you need to write platform specific code. newtype OsString = OsString { getOsString :: PlatformString } deriving (Typeable, Generic, NFData) -- | On windows, decodes as UCS-2. On unix prints the raw bytes without decoding. instance Show OsString where show (OsString os) = show os -- | Byte equality of the internal representation. instance Eq OsString where (OsString a) == (OsString b) = a == b -- | Byte ordering of the internal representation. instance Ord OsString where compare (OsString a) (OsString b) = compare a b -- | \"String-Concatenation\" for 'OsString'. This is __not__ the same -- as '()'. instance Monoid OsString where mempty = coerce BS.empty #if MIN_VERSION_base(4,11,0) mappend = (<>) #else mappend = coerce (mappend :: BS.ShortByteString -> BS.ShortByteString -> BS.ShortByteString) #endif #if MIN_VERSION_base(4,11,0) instance Semigroup OsString where (<>) = coerce (mappend :: BS.ShortByteString -> BS.ShortByteString -> BS.ShortByteString) #endif instance Lift OsString where lift xs = case coercionToPlatformTypes of Left (_, co) -> TH.AppE (TH.ConE 'OsString) <$> (lift $ coerceWith co xs) Right (_, co) -> do TH.AppE (TH.ConE 'OsString) <$> (lift $ coerceWith co xs) #if MIN_VERSION_template_haskell(2,17,0) liftTyped = TH.unsafeCodeCoerce . TH.lift #elif MIN_VERSION_template_haskell(2,16,0) liftTyped = TH.unsafeTExpCoerce . TH.lift #endif -- | Newtype representing a code unit. -- -- On Windows, this is restricted to two-octet codepoints 'Word16', -- on POSIX one-octet ('Word8'). newtype OsChar = OsChar { getOsChar :: PlatformChar } deriving (Typeable, Generic, NFData) instance Show OsChar where show (OsChar pc) = show pc -- | Byte equality of the internal representation. instance Eq OsChar where (OsChar a) == (OsChar b) = a == b -- | Byte ordering of the internal representation. instance Ord OsChar where compare (OsChar a) (OsChar b) = compare a b -- | This is a type-level evidence that 'OsChar' is a newtype wrapper -- over 'WindowsChar' or 'PosixChar' and 'OsString' is a newtype wrapper -- over 'WindowsString' or 'PosixString'. If you pattern match on -- 'coercionToPlatformTypes', GHC will know that relevant types -- are coercible to each other. This helps to avoid CPP in certain scenarios. coercionToPlatformTypes :: Either (Coercion OsChar WindowsChar, Coercion OsString WindowsString) (Coercion OsChar PosixChar, Coercion OsString PosixString) #if defined(mingw32_HOST_OS) coercionToPlatformTypes = Left (Coercion, Coercion) #else coercionToPlatformTypes = Right (Coercion, Coercion) #endif os-string-2.0.6/System/OsString/Posix.hs0000644000000000000000000000027307346545000016331 0ustar0000000000000000{-# LANGUAGE CPP #-} #undef WINDOWS #define MODULE_NAME Posix #define PLATFORM_STRING PosixString #define PLATFORM_WORD PosixChar #define IS_WINDOWS False #include "Common.hs" os-string-2.0.6/System/OsString/Windows.hs0000644000000000000000000000045407346545000016662 0ustar0000000000000000{-# LANGUAGE CPP #-} #undef POSIX #define MODULE_NAME Windows #define PLATFORM_STRING WindowsString #define PLATFORM_WORD WindowsChar #define IS_WINDOWS True #define WINDOWS #include "Common.hs" #undef MODULE_NAME #undef FILEPATH_NAME #undef OSSTRING_NAME #undef IS_WINDOWS #undef WINDOWS os-string-2.0.6/bench/0000755000000000000000000000000007346545000012714 5ustar0000000000000000os-string-2.0.6/bench/Bench.hs0000644000000000000000000000051407346545000014267 0ustar0000000000000000module Main (main) where import Test.Tasty.Bench import qualified BenchOsString import qualified BenchPosixString import qualified BenchWindowsString main :: IO () main = do defaultMain [ BenchOsString.benchMark , BenchPosixString.benchMark , BenchWindowsString.benchMark ] os-string-2.0.6/bench/BenchOsString.hs0000644000000000000000000000272107346545000015762 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TypeApplications #-} #define OSSTR osstr #define OS_STRING OsString #define OS_CHAR OsChar module BenchOsString (benchMark) where import Data.Type.Coercion (coerceWith, sym) import System.OsString (osstr) import qualified System.OsString as S import System.OsString.Internal.Types (OsString(..), OsChar(..), PosixChar(..), WindowsChar(..)) #include "Common.hs" benchStr :: String benchStr = "OsString" w :: Int -> OsChar w = case S.coercionToPlatformTypes of Left (co, _) -> coerceWith (sym co) . WindowsChar . fromIntegral Right (co, _) -> coerceWith (sym co) . PosixChar . fromIntegral hashWord8 :: OsChar -> OsChar hashWord8 = case S.coercionToPlatformTypes of Left (co, _) -> coerceWith (sym co) . WindowsChar . fromIntegral . hashInt . fromIntegral . getWindowsChar . coerceWith co Right (co, _) -> coerceWith (sym co) . PosixChar . fromIntegral . hashInt . fromIntegral . getPosixChar . coerceWith co iw :: OsChar -> Int iw = case S.coercionToPlatformTypes of Left (co, _) -> fromIntegral . getWindowsChar . coerceWith co Right (co, _) -> fromIntegral . getPosixChar . coerceWith co os-string-2.0.6/bench/BenchPosixString.hs0000644000000000000000000000161207346545000016501 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TypeApplications #-} #define OSSTR pstr #define OS_STRING PosixString #define OS_CHAR PosixChar module BenchPosixString (benchMark) where import System.OsString.Posix (PosixString, pstr) import qualified System.OsString.Posix as S import System.OsString.Internal.Types (PosixChar(..)) #include "Common.hs" benchStr :: String benchStr = "PosixString" w :: Int -> PosixChar w i = PosixChar (fromIntegral i) hashWord8 :: PosixChar -> PosixChar hashWord8 (PosixChar w) = PosixChar . fromIntegral . hashInt . fromIntegral $ w iw :: PosixChar -> Int iw (PosixChar w) = fromIntegral w os-string-2.0.6/bench/BenchWindowsString.hs0000644000000000000000000000166307346545000017037 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TypeApplications #-} #define OSSTR pstr #define OS_STRING WindowsString #define OS_CHAR WindowsChar module BenchWindowsString (benchMark) where import System.OsString.Windows (WindowsString, WindowsChar, pstr) import qualified System.OsString.Windows as S import System.OsString.Internal.Types (WindowsChar(..)) #include "Common.hs" benchStr :: String benchStr = "WindowsString" w :: Int -> WindowsChar w i = WindowsChar (fromIntegral i) hashWord8 :: WindowsChar -> WindowsChar hashWord8 (WindowsChar w) = WindowsChar . fromIntegral . hashInt . fromIntegral $ w iw :: WindowsChar -> Int iw (WindowsChar w) = fromIntegral w os-string-2.0.6/bench/Common.hs0000644000000000000000000002207607346545000014507 0ustar0000000000000000import Control.DeepSeq (force) import Data.Foldable (foldMap) import Data.Maybe (listToMaybe, fromJust) import Data.Monoid import Data.String import Prelude hiding (words, head, tail) import Test.Tasty.Bench import Data.ByteString.Builder import Data.ByteString.Builder.Extra (byteStringCopy, byteStringInsert, intHost) import Data.ByteString.Builder.Internal (ensureFree) import Data.ByteString.Builder.Prim (BoundedPrim, FixedPrim, (>$<)) import qualified Data.ByteString.Builder.Prim as P import qualified Data.ByteString.Builder.Prim.Internal as PI import Foreign import System.Random import Data.Bifunctor (first) ------------------------------------------------------------------------------ -- Benchmark ------------------------------------------------------------------------------ -- input data (NOINLINE to ensure memoization) ---------------------------------------------- -- | Few-enough repetitions to avoid making GC too expensive. nRepl :: Int nRepl = 10000 {-# NOINLINE intData #-} intData :: [Int] intData = [1..nRepl] {-# NOINLINE byteStringData #-} byteStringData :: S.OS_STRING byteStringData = S.pack $ map w intData {-# NOINLINE loremIpsum #-} loremIpsum :: S.OS_STRING loremIpsum = [OSSTR|incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.|] -- benchmark wrappers --------------------- {-# INLINE benchB' #-} benchB' :: String -> a -> (a -> OS_STRING) -> Benchmark benchB' name x b = bench name $ whnf (S.length . b) x -- We use this construction of just looping through @n,n-1,..,1@ to ensure that -- we measure the speed of the encoding and not the speed of generating the -- values to be encoded. {-# INLINE benchIntEncodingB #-} benchIntEncodingB :: Int -- ^ Maximal 'Int' to write -> BoundedPrim Int -- ^ 'BoundedPrim' to execute -> IO () -- ^ 'IO' action to benchmark benchIntEncodingB n0 w | n0 <= 0 = return () | otherwise = do fpbuf <- mallocForeignPtrBytes (n0 * PI.sizeBound w) withForeignPtr fpbuf (loop n0) >> return () where loop !n !op | n <= 0 = return op | otherwise = PI.runB w n op >>= loop (n - 1) -- Helpers ------------- hashInt :: Int -> Int hashInt x = iterate step x !! 10 where step a = e where b = (a `xor` 61) `xor` (a `shiftR` 16) c = b + (b `shiftL` 3) d = c `xor` (c `shiftR` 4) e = d * 0x27d4eb2d f = e `xor` (e `shiftR` 15) foldInputs' :: [[ OS_CHAR ]] foldInputs' = force (S.unpack <$> foldInputs) foldInputs :: [S.OS_STRING] foldInputs = map (\k -> S.pack . map w $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16] largeTraversalInput :: S.OS_STRING largeTraversalInput = S.concat (replicate 10 byteStringData) smallTraversalInput :: S.OS_STRING smallTraversalInput = [OSSTR|The quick brown fox|] zeroes :: S.OS_STRING zeroes = S.replicate 10000 (w 0) partitionStrict p = nf (S.partition p) . randomStrict $ mkStdGen 98423098 where randomStrict = fst . S.unfoldrN 10000 (Just . first S.unsafeFromChar . random) -- ASCII \n to ensure no typos nl :: OS_CHAR nl = w 0xa {-# INLINE nl #-} -- non-inlined equality test nilEq :: OS_CHAR -> OS_CHAR -> Bool {-# NOINLINE nilEq #-} nilEq = (==) -- lines of 200 letters from a to e, followed by repeated letter f absurdlong :: S.OS_STRING absurdlong = (S.replicate 200 (w 0x61) <> S.singleton nl <> S.replicate 200 (w 0x62) <> S.singleton nl <> S.replicate 200 (w 0x63) <> S.singleton nl <> S.replicate 200 (w 0x64) <> S.singleton nl <> S.replicate 200 (w 0x65) <> S.singleton nl) <> S.replicate 999999 (w 0x66) bench_find_index_second :: OS_STRING -> Maybe Int bench_find_index_second bs = let isNl = (== nl) in case S.findIndex isNl bs of Just !i -> S.findIndex isNl (S.drop (i+1) bs) Nothing -> Nothing {-# INLINE bench_find_index_second #-} bench_elem_index_second :: OS_STRING -> Maybe Int bench_elem_index_second bs = case S.elemIndex nl bs of Just !i -> S.elemIndex nl (S.drop (i+1) bs) Nothing -> Nothing {-# INLINE bench_elem_index_second #-} -- benchmarks ------------- benchMark :: Benchmark benchMark = absurdlong `seq` bgroup benchStr [ bgroup "Small payload" [ benchB' "mempty" () (const mempty) , benchB' "UTF-8 String (naive)" "hello world\0" (fromJust . S.encodeUtf) , benchB' "String (naive)" "hello world!" (fromJust . S.encodeUtf) ] , bgroup "intercalate" [ bench "intercalate (large)" $ whnf (S.intercalate $ [OSSTR| and also |]) (replicate 300 [OSSTR|expression|]) , bench "intercalate (small)" $ whnf (S.intercalate [OSSTR|&|]) (replicate 30 [OSSTR|foo|]) , bench "intercalate (tiny)" $ whnf (S.intercalate [OSSTR|&|]) [[OSSTR|foo|], [OSSTR|bar|], [OSSTR|baz|]] ] , bgroup "partition" [ bgroup "strict" [ bench "mostlyTrueFast" $ partitionStrict (< (w 225)) , bench "mostlyFalseFast" $ partitionStrict (< (w 10)) , bench "balancedFast" $ partitionStrict (< (w 128)) , bench "mostlyTrueSlow" $ partitionStrict (\x -> hashWord8 x < w 225) , bench "mostlyFalseSlow" $ partitionStrict (\x -> hashWord8 x < w 10) , bench "balancedSlow" $ partitionStrict (\x -> hashWord8 x < w 128) ] ] , bgroup "folds" [ bgroup "strict" [ bgroup "foldl" $ map (\s -> bench (show $ S.length s) $ nf (S.foldl (\acc x -> acc + iw x) (0 :: Int)) s) foldInputs , bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $ nf (S.foldl' (\acc x -> acc + iw x) (0 :: Int)) s) foldInputs , bgroup "foldr" $ map (\s -> bench (show $ S.length s) $ nf (S.foldr (\x acc -> iw x + acc) (0 :: Int)) s) foldInputs , bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $ nf (S.foldr' (\x acc -> iw x + acc) (0 :: Int)) s) foldInputs , bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $ nf (S.foldr1' (\x acc -> w $ iw x + iw acc)) s) foldInputs , bgroup "unfoldrN" $ map (\s -> bench (show $ S.length s) $ nf (S.unfoldrN (S.length s) (\a -> Just (w a, a + 1))) 0) foldInputs , bgroup "filter" $ map (\s -> bench (show $ S.length s) $ nf (S.filter (odd . iw)) s) foldInputs ] ] , bgroup "findIndexOrLength" [ bench "takeWhile" $ nf (S.takeWhile (even . iw)) zeroes , bench "dropWhile" $ nf (S.dropWhile (even . iw)) zeroes , bench "break" $ nf (S.break (odd . iw)) zeroes ] , bgroup "findIndex_" [ bench "findIndices" $ nf (sum . S.findIndices (\x -> x == w 129 || x == w 72)) byteStringData , bench "find" $ nf (S.find (>= w 198)) byteStringData ] , bgroup "traversals" [ bench "map (+1) large" $ nf (S.map (w . (+ 1) . iw)) largeTraversalInput , bench "map (+1) small" $ nf (S.map (w . (+ 1) . iw)) smallTraversalInput ] , bgroup (benchStr <> " strict first index") $ [ bench "FindIndices" $ nf (listToMaybe . S.findIndices (== nl)) absurdlong , bench "ElemIndices" $ nf (listToMaybe . S.elemIndices nl) absurdlong , bench "FindIndex" $ nf (S.findIndex (== nl)) absurdlong , bench "ElemIndex" $ nf (S.elemIndex nl) absurdlong ] , bgroup (benchStr <> " strict second index") $ [ bench "FindIndices" $ nf (listToMaybe . drop 1 . S.findIndices (== nl)) absurdlong , bench "ElemIndices" $ nf (listToMaybe . drop 1 . S.elemIndices nl) absurdlong , bench "FindIndex" $ nf bench_find_index_second absurdlong , bench "ElemIndex" $ nf bench_elem_index_second absurdlong ] , bgroup (benchStr <> " index equality inlining") $ [ bench "FindIndices/inlined" $ nf (S.findIndices (== nl)) absurdlong , bench "FindIndices/non-inlined" $ nf (S.findIndices (nilEq nl)) absurdlong , bench "FindIndex/inlined" $ nf (S.findIndex (== nl)) absurdlong , bench "FindIndex/non-inlined" $ nf (S.findIndex (nilEq nl)) absurdlong ] , bgroup (benchStr <> " conversions") $ [ bgroup "unpack" $ map (\s -> bench (show $ S.length s) $ nf (\x -> S.unpack x) s) foldInputs , bgroup "pack" $ map (\s -> bench (show $ length s) $ nf S.pack s) foldInputs' , bench "unpack and get last element" $ nf (\x -> last . S.unpack $ x) absurdlong , bench "unpack and get first 120 elements" $ nf (\x -> take 120 . S.unpack $ x) absurdlong ] ] os-string-2.0.6/changelog.md0000644000000000000000000000207707346545000014114 0ustar0000000000000000# Changelog for [`os-string` package](http://hackage.haskell.org/package/os-string) ## 2.0.6 *Jun 2024* * add `fromString` on windows * add `fromBytestring` on unix ## 2.0.5 *Jun 2024* * Add `decodeLE`/`encodeLE` and deprecate `decodeFS`/`encodeFS` (pointing users to `System.OsPath` instead), fixes [#19](https://github.com/haskell/os-string/issues/19) ## 2.0.4 *Jun 2024* * Use TemplateHaskellQuotes rather than TemplateHaskell extension wrt [#21](https://github.com/haskell/os-string/issues/21) ## 2.0.3 *May 2024* * Fix `length` function wrt [#17](https://github.com/haskell/os-string/issues/17) ## 2.0.2.2 *May 2024* * Fix compilation on big-endian arches, by Andrew Lelechenko ## 2.0.2.1 *Apr 2024* * Fix compabitiliby with GHC 9.10 ## 2.0.2 *Dec 2023* * Implement coercionToPlatformTypes, fixes #4 ## 2.0.1 *Dec 2023* * add `unsafeEncodeUtf`, fixes #5 ## 2.0.0 *Nov 2023* * Split out `OsString` modules from filepath library * add more bytestring like functions (index/search/etc.) ## 1.0.0 *Nov 2023* * dummy release to avoid name clashes with filepath <1.5 os-string-2.0.6/os-string.cabal0000644000000000000000000000627307346545000014556 0ustar0000000000000000cabal-version: 2.2 name: os-string version: 2.0.6 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause license-file: LICENSE author: Julian Ospald maintainer: Julian Ospald copyright: Julain Ospald 2021-2023 bug-reports: https://github.com/haskell/os-string/issues homepage: https://github.com/haskell/os-string/blob/master/README.md category: System build-type: Simple synopsis: Library for manipulating Operating system strings. tested-with: GHC ==8.6.5 || ==8.8.4 || ==8.10.7 || ==9.0.2 || ==9.2.8 || ==9.4.8 || ==9.6.3 || ==9.8.1 description: This package provides functionality for manipulating @OsString@ values, and is shipped with . extra-source-files: System/OsString/Common.hs tests/bytestring-tests/Properties/Common.hs bench/Common.hs extra-doc-files: changelog.md README.md source-repository head type: git location: https://github.com/haskell/os-string library exposed-modules: System.OsString.Data.ByteString.Short System.OsString.Data.ByteString.Short.Internal System.OsString.Data.ByteString.Short.Word16 System.OsString.Encoding System.OsString.Encoding.Internal System.OsString System.OsString.Internal System.OsString.Internal.Types System.OsString.Posix System.OsString.Windows other-extensions: CPP PatternGuards if impl(ghc >=7.2) other-extensions: Safe default-language: Haskell2010 build-depends: , base >=4.12.0.0 && <4.21 , bytestring >=0.11.3.0 , deepseq , exceptions , template-haskell ghc-options: -Wall test-suite bytestring-tests default-language: Haskell2010 ghc-options: -Wall type: exitcode-stdio-1.0 main-is: Main.hs hs-source-dirs: tests tests/bytestring-tests other-modules: Properties.ShortByteString Properties.WindowsString Properties.PosixString Properties.OsString Properties.ShortByteString.Word16 TestUtil build-depends: , base , bytestring >=0.11.3.0 , os-string , QuickCheck >=2.7 && <2.16 test-suite encoding-tests default-language: Haskell2010 ghc-options: -Wall type: exitcode-stdio-1.0 main-is: Main.hs hs-source-dirs: tests tests/encoding other-modules: Arbitrary EncodingSpec TestUtil build-depends: , base , bytestring >=0.11.3.0 , deepseq , os-string , QuickCheck >=2.7 && <2.16 , quickcheck-classes-base ^>=0.6.2 benchmark bench main-is: Bench.hs other-modules: BenchOsString BenchPosixString BenchWindowsString type: exitcode-stdio-1.0 hs-source-dirs: bench default-language: Haskell2010 ghc-options: -O2 "-with-rtsopts=-A32m" if impl(ghc >= 8.6) ghc-options: -fproc-alignment=64 build-depends: base, bytestring, os-string, deepseq, tasty-bench, random os-string-2.0.6/tests/0000755000000000000000000000000007346545000012777 5ustar0000000000000000os-string-2.0.6/tests/Arbitrary.hs0000644000000000000000000000375107346545000015300 0ustar0000000000000000{-# OPTIONS_GHC -Wno-orphans #-} module Arbitrary where import Data.Char import Data.Maybe import System.OsString import System.OsString.Internal.Types import qualified System.OsString.Posix as Posix import qualified System.OsString.Windows as Windows import Data.ByteString ( ByteString ) import qualified Data.ByteString as ByteString import Test.QuickCheck instance Arbitrary OsString where arbitrary = fmap fromJust $ encodeUtf <$> listOf filepathChar instance Arbitrary PosixString where arbitrary = fmap fromJust $ Posix.encodeUtf <$> listOf filepathChar instance Arbitrary WindowsString where arbitrary = fmap fromJust $ Windows.encodeUtf <$> listOf filepathChar newtype NonNullString = NonNullString { nonNullString :: String } deriving Show instance Arbitrary NonNullString where arbitrary = NonNullString <$> listOf filepathChar filepathChar :: Gen Char filepathChar = arbitraryUnicodeChar `suchThat` (\c -> not (isNull c) && isValidUnicode c) where isNull = (== '\NUL') isValidUnicode c = case generalCategory c of Surrogate -> False NotAssigned -> False _ -> True newtype NonNullAsciiString = NonNullAsciiString { nonNullAsciiString :: String } deriving Show instance Arbitrary NonNullAsciiString where arbitrary = NonNullAsciiString <$> listOf filepathAsciiChar filepathAsciiChar :: Gen Char filepathAsciiChar = arbitraryASCIIChar `suchThat` (\c -> not (isNull c)) where isNull = (== '\NUL') newtype NonNullSurrogateString = NonNullSurrogateString { nonNullSurrogateString :: String } deriving Show instance Arbitrary NonNullSurrogateString where arbitrary = NonNullSurrogateString <$> listOf filepathWithSurrogates filepathWithSurrogates :: Gen Char filepathWithSurrogates = frequency [(3, arbitraryASCIIChar), (1, arbitraryUnicodeChar), (1, arbitraryBoundedEnum) ] instance Arbitrary ByteString where arbitrary = ByteString.pack <$> arbitrary instance CoArbitrary ByteString where coarbitrary = coarbitrary . ByteString.unpack os-string-2.0.6/tests/TestUtil.hs0000644000000000000000000000343307346545000015113 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE TypeApplications #-} {-# OPTIONS_GHC -Wno-orphans #-} module TestUtil( module TestUtil, module Test.QuickCheck, module Data.List, module Data.Maybe ) where import Test.QuickCheck hiding ((==>)) import Data.List import Data.Maybe import Control.Monad import System.Environment infixr 0 ==> (==>) :: Bool -> Bool -> Bool a ==> b = not a || b runTests :: [(String, Property)] -> IO () runTests tests = do args <- getArgs let count = case args of i:_ -> read i; _ -> 10000 let testNum = case args of _:i:_ | let num = read i , num < 0 -> drop (negate num) tests | let num = read i , num > 0 -> take num tests | otherwise -> [] _ -> tests putStrLn $ "Testing with " ++ show count ++ " repetitions" let total' = length testNum let showOutput x = show x{output=""} ++ "\n" ++ output x bad <- fmap catMaybes $ forM (zip @Integer [1..] testNum) $ \(i,(msg,prop)) -> do putStrLn $ "Test " ++ show i ++ " of " ++ show total' ++ ": " ++ msg res <- quickCheckWithResult stdArgs{chatty=False, maxSuccess=count} prop case res of Success{} -> pure Nothing bad -> do putStrLn $ showOutput bad; putStrLn "TEST FAILURE!"; pure $ Just (msg,bad) if null bad then putStrLn $ "Success, " ++ show total' ++ " tests passed" else do putStrLn $ show (length bad) ++ " FAILURES\n" forM_ (zip @Integer [1..] bad) $ \(i,(a,b)) -> putStrLn $ "FAILURE " ++ show i ++ ": " ++ a ++ "\n" ++ showOutput b ++ "\n" fail $ "FAILURE, failed " ++ show (length bad) ++ " of " ++ show total' ++ " tests" os-string-2.0.6/tests/bytestring-tests/0000755000000000000000000000000007346545000016331 5ustar0000000000000000os-string-2.0.6/tests/bytestring-tests/Main.hs0000644000000000000000000000072107346545000017551 0ustar0000000000000000{-# LANGUAGE TypeApplications #-} module Main (main) where import qualified Properties.OsString as PropOs import qualified Properties.PosixString as PropPos import qualified Properties.WindowsString as PropWin import qualified Properties.ShortByteString as PropSBS import qualified Properties.ShortByteString.Word16 as PropSBSW16 import TestUtil main :: IO () main = runTests (PropSBS.tests ++ PropSBSW16.tests ++ PropWin.tests ++ PropPos.tests ++ PropOs.tests) os-string-2.0.6/tests/bytestring-tests/Properties/0000755000000000000000000000000007346545000020465 5ustar0000000000000000os-string-2.0.6/tests/bytestring-tests/Properties/Common.hs0000644000000000000000000005640707346545000022265 0ustar0000000000000000-- | -- Module : Properties.ShortByteString -- Copyright : (c) Andrew Lelechenko 2021 -- License : BSD-style {-# LANGUAGE CPP #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# OPTIONS_GHC -Wno-orphans #-} -- We are happy to sacrifice optimizations in exchange for faster compilation, -- but need to test rewrite rules. As one can check using -ddump-rule-firings, -- rewrite rules do not fire in -O0 mode, so we use -O1, but disable almost all -- optimizations. It roughly halves compilation time. {-# OPTIONS_GHC -O1 -fenable-rewrite-rules -fmax-simplifier-iterations=1 -fsimplifier-phases=0 -fno-call-arity -fno-case-merge -fno-cmm-elim-common-blocks -fno-cmm-sink -fno-cpr-anal -fno-cse -fno-do-eta-reduction -fno-float-in -fno-full-laziness -fno-loopification -fno-specialise -fno-strictness -Wno-unused-imports -Wno-unused-top-binds #-} #ifdef OSWORD module Properties.OsString (tests) where import System.OsString.Internal.Types (OsString(..), OsChar(..), getOsChar) import qualified System.OsString as B import qualified System.OsString as BS import qualified System.OsString.Data.ByteString.Short.Internal as BSI (_nul, isSpace) #else #ifdef WORD16 #ifdef WIN module Properties.WindowsString (tests) where import qualified System.OsString.Windows as B import qualified System.OsString.Windows as BS #else module Properties.ShortByteString.Word16 (tests) where import System.OsString.Data.ByteString.Short.Internal (_nul, isSpace) import qualified System.OsString.Data.ByteString.Short.Word16 as B import qualified System.OsString.Data.ByteString.Short as BS #endif #else #ifdef POSIX module Properties.PosixString (tests) where import qualified System.OsString.Posix as B import qualified System.OsString.Posix as BS #else module Properties.ShortByteString (tests) where import qualified System.OsString.Data.ByteString.Short as B #endif #endif #endif import Data.ByteString.Short (ShortByteString) import qualified Data.Char as C import qualified System.OsString.Data.ByteString.Short.Word16 as B16 import qualified System.OsString.Data.ByteString.Short as B8 import Data.Word import Control.Arrow import Data.Coerce (coerce) import Data.Type.Coercion (Coercion(..), coerceWith, sym) import Data.Foldable import Data.List as L import Data.Semigroup import Data.Tuple import Test.QuickCheck import Test.QuickCheck.Monadic ( monadicIO, run ) import Text.Show.Functions () import System.OsString.Internal.Types (WindowsString(..), WindowsChar(..), getWindowsChar, PosixChar(..), PosixString(..), getPosixChar, OsString(..), OsChar(..), getOsChar) import qualified System.OsString.Posix as PBS import qualified System.OsString.Windows as WBS import qualified System.OsString as OBS import qualified System.OsString.Data.ByteString.Short.Internal as BSI (_nul, isSpace) instance Arbitrary PosixString where arbitrary = do bs <- sized sizedByteString' n <- choose (0, 2) return (PBS.drop n bs) -- to give us some with non-0 offset where sizedByteString' :: Int -> Gen PosixString sizedByteString' n = do m <- choose(0, n) fmap (PosixString . B8.pack) $ vectorOf m arbitrary instance Arbitrary PosixChar where arbitrary = fmap PosixChar (arbitrary @Word8) instance CoArbitrary PosixChar where coarbitrary s = coarbitrary (PBS.toChar s) instance CoArbitrary PosixString where coarbitrary s = coarbitrary (PBS.unpack s) deriving instance Num PosixChar deriving instance Bounded PosixChar instance Arbitrary WindowsString where arbitrary = do bs <- sized sizedByteString' n <- choose (0, 2) return (WBS.drop n bs) -- to give us some with non-0 offset where sizedByteString' :: Int -> Gen WindowsString sizedByteString' n = do m <- choose(0, n) fmap (WindowsString . B16.pack) $ vectorOf m arbitrary instance Arbitrary WindowsChar where arbitrary = fmap WindowsChar (arbitrary @Word16) instance CoArbitrary WindowsChar where coarbitrary s = coarbitrary (WBS.toChar s) instance CoArbitrary WindowsString where coarbitrary s = coarbitrary (WBS.unpack s) deriving instance Num WindowsChar deriving instance Bounded WindowsChar isSpaceWin :: WindowsChar -> Bool isSpaceWin = BSI.isSpace . getWindowsChar numWordWin :: WindowsString -> Int numWordWin = B16.numWord16 . getWindowsString swapWWin :: WindowsChar -> WindowsChar swapWWin = WindowsChar . byteSwap16 . getWindowsChar isSpacePosix :: PosixChar -> Bool isSpacePosix = C.isSpace . word8ToChar . getPosixChar numWordPosix :: PosixString -> Int numWordPosix = B8.length . getPosixString swapWPosix :: PosixChar -> PosixChar swapWPosix = id #ifdef OSWORD isSpace :: OsChar -> Bool isSpace = case OBS.coercionToPlatformTypes of Left (co, _) -> isSpaceWin . coerceWith co Right (co, _) -> isSpacePosix . coerceWith co numWord :: OsString -> Int numWord = case OBS.coercionToPlatformTypes of Left (_, co) -> numWordWin . coerceWith co Right (_, co) -> numWordPosix . coerceWith co toElem :: OsChar -> OsChar toElem = id swapW :: OsChar -> OsChar swapW = case OBS.coercionToPlatformTypes of Left (co, _) -> coerceWith (sym co) . swapWWin . coerceWith co Right (co, _) -> coerceWith (sym co) . swapWPosix . coerceWith co instance Arbitrary OsString where arbitrary = OsString <$> arbitrary instance Arbitrary OsChar where arbitrary = OsChar <$> arbitrary instance CoArbitrary OsChar where coarbitrary s = coarbitrary (OBS.toChar s) instance CoArbitrary OsString where coarbitrary s = coarbitrary (OBS.unpack s) deriving instance Num OsChar deriving instance Bounded OsChar instance Arbitrary ShortByteString where arbitrary = case OBS.coercionToPlatformTypes of Left (_, _) -> getWindowsString <$> arbitrary Right (_, _) -> getPosixString <$> arbitrary #else #ifdef WORD16 instance Arbitrary ShortByteString where arbitrary = do bs <- sized sizedByteString n <- choose (0, 2) return (B16.drop n bs) -- to give us some with non-0 offset where sizedByteString :: Int -> Gen ShortByteString sizedByteString n = do m <- choose(0, n) fmap B16.pack $ vectorOf m arbitrary instance CoArbitrary ShortByteString where coarbitrary s = coarbitrary (B16.unpack s) #ifdef WIN isSpace :: WindowsChar -> Bool isSpace = isSpaceWin numWord :: WindowsString -> Int numWord = numWordWin toElem :: WindowsChar -> WindowsChar toElem = id swapW :: WindowsChar -> WindowsChar swapW = swapWWin #else numWord :: ShortByteString -> Int numWord = B.numWord16 toElem :: Word16 -> Word16 toElem = id swapW :: Word16 -> Word16 swapW = byteSwap16 #endif #else #ifdef POSIX isSpace :: PosixChar -> Bool isSpace = isSpacePosix numWord :: PosixString -> Int numWord = numWordPosix toElem :: PosixChar -> PosixChar toElem = id swapW :: PosixChar -> PosixChar swapW = swapWPosix #else _nul :: Word8 _nul = 0x00 isSpace :: Word8 -> Bool isSpace = C.isSpace . word8ToChar numWord :: ShortByteString -> Int numWord = B8.length toElem :: Word8 -> Word8 toElem = id swapW :: Word8 -> Word8 swapW = id #endif instance Arbitrary ShortByteString where arbitrary = do bs <- sized sizedByteString' n <- choose (0, 2) return (B8.drop n bs) -- to give us some with non-0 offset where sizedByteString' :: Int -> Gen ShortByteString sizedByteString' n = do m <- choose(0, n) fmap B8.pack $ vectorOf m arbitrary shrink = map B8.pack . shrink . B8.unpack instance CoArbitrary ShortByteString where coarbitrary s = coarbitrary (B8.unpack s) #endif #endif tests :: [(String, Property)] tests = [ ("pack . unpack", property $ \x -> x === B.pack (B.unpack x)) , ("unpack . pack" , property $ \(map toElem -> xs) -> xs === B.unpack (B.pack xs)) , ("read . show" , property $ \x -> (x :: ShortByteString) === read (show x)) , ("==" , property $ \x y -> (x == y) === (B.unpack x == B.unpack y)) , ("== refl" , property $ \x -> (x :: ShortByteString) == x) , ("== symm", property $ \x y -> ((x :: ShortByteString) == y) === (y == x)) , ("== pack unpack", property $ \x -> x == B.pack (B.unpack x)) , ("compare", property $ \x y -> compare x y === compare (swapW <$> B.unpack x) (swapW <$> B.unpack y)) , ("compare EQ", property $ \x -> compare (x :: ShortByteString) x == EQ) , ("compare GT", property $ \x (toElem -> c) -> compare (B.snoc x c) x == GT) , ("compare LT", property $ \x (toElem -> c) -> compare x (B.snoc x c) == LT) , ("compare GT empty", property $ \x -> not (B.null x) ==> compare x B.empty == GT) , ("compare LT empty", property $ \x -> not (B.null x) ==> compare B.empty x == LT) , ("compare GT concat", property $ \x y -> not (B.null y) ==> compare (x `mappend` y) x == GT) , ("compare char" , property $ \(toElem -> c) (toElem -> d) -> compare (swapW c) (swapW d) == compare (B.singleton c) (B.singleton d)) , ("compare unsigned", once $ compare (B.singleton 255) (B.singleton 127) == GT) , ("null" , property $ \x -> B.null x === null (B.unpack x)) , ("empty 0" , once $ numWord B.empty === 0) , ("empty []", once $ B.unpack B.empty === []) , ("mempty 0", once $ numWord mempty === 0) , ("mempty []", once $ B.unpack mempty === []) #ifdef WORD16 #ifdef WIN , ("isInfixOf works correctly under UTF16", once $ let foo = WindowsString $ B8.pack [0xbb, 0x03] foo' = WindowsString $ B8.pack [0xd2, 0xbb] bar = WindowsString $ B8.pack [0xd2, 0xbb, 0x03, 0xad] bar' = WindowsString $ B8.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00] in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True] ) #else , ("isInfixOf works correctly under UTF16", once $ let foo = BS.pack [0xbb, 0x03] foo' = BS.pack [0xd2, 0xbb] bar = BS.pack [0xd2, 0xbb, 0x03, 0xad] bar' = BS.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00] in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True] ) #endif #endif , ("break breakSubstring", property $ \(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x ) , ("breakSubstring", property $ \x y -> not (B.null x) ==> B.null (snd (B.breakSubstring x y)) === not (B.isInfixOf x y) ) , ("breakSubstring empty", property $ \x -> B.breakSubstring B.empty x === (B.empty, x) ) , ("isInfixOf", property $ \x y -> B.isInfixOf x y === L.isInfixOf (B.unpack x) (B.unpack y)) , ("mconcat" , property $ \xs -> B.unpack (mconcat xs) === mconcat (map B.unpack xs)) , ("mconcat [x,x]" , property $ \x -> B.unpack (mconcat [x, x]) === mconcat [B.unpack x, B.unpack x]) , ("mconcat [x,[]]" , property $ \x -> B.unpack (mconcat [x, B.empty]) === mconcat [B.unpack x, []]) , ("null" , property $ \x -> B.null x === null (B.unpack x)) , ("reverse" , property $ \x -> B.unpack (B.reverse x) === reverse (B.unpack x)) , ("all" , property $ \f x -> B.all f x === all f (B.unpack x)) , ("all ==" , property $ \(toElem -> c) x -> B.all (== c) x === all (== c) (B.unpack x)) , ("any" , property $ \f x -> B.any f x === any f (B.unpack x)) , ("any ==" , property $ \(toElem -> c) x -> B.any (== c) x === any (== c) (B.unpack x)) , ("mappend" , property $ \x y -> B.unpack (mappend x y) === B.unpack x `mappend` B.unpack y) , ("<>" , property $ \x y -> B.unpack (x `mappend` y) === B.unpack x `mappend` B.unpack y) , ("stimes" , property $ \(Positive n) x -> stimes (n :: Int) (x :: ShortByteString) === mtimesDefault n x) , ("break" , property $ \f x -> (B.unpack *** B.unpack) (B.break f x) === break f (B.unpack x)) , ("break ==" , property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (== c) x) === break (== c) (B.unpack x)) , ("break /=" , property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (/= c) x) === break (/= c) (B.unpack x)) , ("break span" , property $ \f x -> B.break f x === B.span (not . f) x) , ("breakEnd" , property $ \f x -> B.breakEnd f x === swap ((B.reverse *** B.reverse) (B.break f (B.reverse x)))) , ("breakEnd" , property $ \f x -> B.breakEnd f x === B.spanEnd (not . f) x) , ("break isSpace" , property $ \x -> (B.unpack *** B.unpack) (B.break isSpace x) === break isSpace (B.unpack x)) , ("singleton" , property $ \(toElem -> c) -> B.unpack (B.singleton c) === [c]) , ("cons" , property $ \(toElem -> c) x -> B.unpack (B.cons c x) === c : B.unpack x) , ("cons []" , property $ \(toElem -> c) -> B.unpack (B.cons c B.empty) === [c]) , ("uncons" , property $ \x -> fmap (second B.unpack) (B.uncons x) === L.uncons (B.unpack x)) , ("snoc" , property $ \(toElem -> c) x -> B.unpack (B.snoc x c) === B.unpack x ++ [c]) , ("snoc []" , property $ \(toElem -> c) -> B.unpack (B.snoc B.empty c) === [c]) , ("unsnoc" , property $ \x -> fmap (first B.unpack) (B.unsnoc x) === unsnoc (B.unpack x)) , ("drop" , property $ \n x -> B.unpack (B.drop n x) === drop (fromIntegral n) (B.unpack x)) , ("drop 10" , property $ \x -> B.unpack (B.drop 10 x) === drop 10 (B.unpack x)) , ("dropWhile" , property $ \f x -> B.unpack (B.dropWhile f x) === dropWhile f (B.unpack x)) , ("dropWhile ==" , property $ \(toElem -> c) x -> B.unpack (B.dropWhile (== c) x) === dropWhile (== c) (B.unpack x)) , ("dropWhile /=" , property $ \(toElem -> c) x -> B.unpack (B.dropWhile (/= c) x) === dropWhile (/= c) (B.unpack x)) , ("dropWhile isSpace" , property $ \x -> B.unpack (B.dropWhile isSpace x) === dropWhile isSpace (B.unpack x)) , ("take" , property $ \n x -> B.unpack (B.take n x) === take (fromIntegral n) (B.unpack x)) , ("take 10" , property $ \x -> B.unpack (B.take 10 x) === take 10 (B.unpack x)) , ("takeWhile" , property $ \f x -> B.unpack (B.takeWhile f x) === takeWhile f (B.unpack x)) , ("takeWhile ==" , property $ \(toElem -> c) x -> B.unpack (B.takeWhile (== c) x) === takeWhile (== c) (B.unpack x)) , ("takeWhile /=" , property $ \(toElem -> c) x -> B.unpack (B.takeWhile (/= c) x) === takeWhile (/= c) (B.unpack x)) , ("takeWhile isSpace" , property $ \x -> B.unpack (B.takeWhile isSpace x) === takeWhile isSpace (B.unpack x)) , ("dropEnd" , property $ \n x -> B.dropEnd n x === B.take (numWord x - n) x) , ("dropWhileEnd" , property $ \f x -> B.dropWhileEnd f x === B.reverse (B.dropWhile f (B.reverse x))) , ("takeEnd" , property $ \n x -> B.takeEnd n x === B.drop (numWord x - n) x) , ("takeWhileEnd" , property $ \f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))) , ("length" , property $ \x -> numWord x === fromIntegral (length (B.unpack x))) #if defined(OSWORD) || defined(WIN) || defined(POSIX) , ("length abc" , once $ B.length (B.pack [0xbb, 0x03]) == 2) #endif , ("count" , property $ \(toElem -> c) x -> B.count c x === fromIntegral (length (elemIndices c (B.unpack x)))) , ("filter" , property $ \f x -> B.unpack (B.filter f x) === filter f (B.unpack x)) , ("filter compose" , property $ \f g x -> B.filter f (B.filter g x) === B.filter (\c -> f c && g c) x) , ("filter ==" , property $ \(toElem -> c) x -> B.unpack (B.filter (== c) x) === filter (== c) (B.unpack x)) , ("filter /=" , property $ \(toElem -> c) x -> B.unpack (B.filter (/= c) x) === filter (/= c) (B.unpack x)) , ("partition" , property $ \f x -> (B.unpack *** B.unpack) (B.partition f x) === partition f (B.unpack x)) , ("find" , property $ \f x -> B.find f x === find f (B.unpack x)) , ("findIndex" , property $ \f x -> B.findIndex f x === fmap fromIntegral (findIndex f (B.unpack x))) , ("findIndices" , property $ \f x -> B.findIndices f x === fmap fromIntegral (findIndices f (B.unpack x))) , ("findIndices ==" , property $ \(toElem -> c) x -> B.findIndices (== c) x === fmap fromIntegral (findIndices (== c) (B.unpack x))) , ("elem" , property $ \(toElem -> c) x -> B.elem c x === elem c (B.unpack x)) , ("not elem" , property $ \(toElem -> c) x -> not (B.elem c x) === notElem c (B.unpack x)) , ("elemIndex" , property $ \(toElem -> c) x -> B.elemIndex c x === fmap fromIntegral (elemIndex c (B.unpack x))) , ("elemIndices" , property $ \(toElem -> c) x -> B.elemIndices c x === fmap fromIntegral (elemIndices c (B.unpack x))) , ("map" , property $ \f x -> B.unpack (B.map (toElem . f) x) === map (toElem . f) (B.unpack x)) , ("map compose" , property $ \f g x -> B.map (toElem . f) (B.map (toElem . g) x) === B.map (toElem . f . toElem . g) x) , ("replicate" , property $ \n (toElem -> c) -> B.unpack (B.replicate (fromIntegral n) c) === replicate n c) , ("replicate 0" , property $ \(toElem -> c) -> B.unpack (B.replicate 0 c) === replicate 0 c) , ("span" , property $ \f x -> (B.unpack *** B.unpack) (B.span f x) === span f (B.unpack x)) , ("span ==" , property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (== c) x) === span (== c) (B.unpack x)) , ("span /=" , property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (/= c) x) === span (/= c) (B.unpack x)) , ("spanEnd" , property $ \f x -> B.spanEnd f x === swap ((B.reverse *** B.reverse) (B.span f (B.reverse x)))) , ("split" , property $ \(toElem -> c) x -> map B.unpack (B.split c x) === split c (B.unpack x)) , ("split empty" , property $ \(toElem -> c) -> B.split c B.empty === []) , ("splitWith" , property $ \f x -> map B.unpack (B.splitWith f x) === splitWith f (B.unpack x)) , ("splitWith split" , property $ \(toElem -> c) x -> B.splitWith (== c) x === B.split c x) , ("splitWith empty" , property $ \f -> B.splitWith f B.empty === []) , ("splitWith length" , property $ \f x -> let splits = B.splitWith f x; l1 = fromIntegral (length splits); l2 = numWord (B.filter f x) in (l1 == l2 || l1 == l2 + 1) && sum (map numWord splits) + l2 == numWord x) , ("splitAt" , property $ \n x -> (B.unpack *** B.unpack) (B.splitAt n x) === splitAt (fromIntegral n) (B.unpack x)) , ("head" , property $ \x -> not (B.null x) ==> B.head x == head (B.unpack x)) , ("last" , property $ \x -> not (B.null x) ==> B.last x == last (B.unpack x)) , ("tail" , property $ \x -> not (B.null x) ==> B.unpack (B.tail x) == tail (B.unpack x)) , ("tail length" , property $ \x -> not (B.null x) ==> numWord x == 1 + numWord (B.tail x)) , ("init" , property $ \x -> not (B.null x) ==> B.unpack (B.init x) == init (B.unpack x)) , ("init length" , property $ \x -> not (B.null x) ==> numWord x == 1 + numWord (B.init x)) , ("foldl" , property $ \f (toElem -> c) x -> B.foldl ((toElem .) . f) c x === foldl ((toElem .) . f) c (B.unpack x)) , ("foldl'" , property $ \f (toElem -> c) x -> B.foldl' ((toElem .) . f) c x === foldl' ((toElem .) . f) c (B.unpack x)) , ("foldr" , property $ \f (toElem -> c) x -> B.foldr ((toElem .) . f) c x === foldr ((toElem .) . f) c (B.unpack x)) , ("foldr'" , property $ \f (toElem -> c) x -> B.foldr' ((toElem .) . f) c x === foldr' ((toElem .) . f) c (B.unpack x)) , ("foldl cons" , property $ \x -> B.foldl (flip B.cons) B.empty x === B.reverse x) , ("foldr cons" , property $ \x -> B.foldr B.cons B.empty x === x) , ("foldl special" , property $ \x (toElem -> c) -> B.unpack (B.foldl (\acc t -> if t == c then acc else B.cons t acc) B.empty x) === foldl (\acc t -> if t == c then acc else t : acc) [] (B.unpack x)) , ("foldr special" , property $ \x (toElem -> c) -> B.unpack (B.foldr (\t acc -> if t == c then acc else B.cons t acc) B.empty x) === foldr (\t acc -> if t == c then acc else t : acc) [] (B.unpack x)) , ("foldl1" , property $ \f x -> not (B.null x) ==> B.foldl1 ((toElem .) . f) x == foldl1 ((toElem .) . f) (B.unpack x)) , ("foldl1'" , property $ \f x -> not (B.null x) ==> B.foldl1' ((toElem .) . f) x == foldl1' ((toElem .) . f) (B.unpack x)) , ("foldr1" , property $ \f x -> not (B.null x) ==> B.foldr1 ((toElem .) . f) x == foldr1 ((toElem .) . f) (B.unpack x)) , ("foldr1'", -- there is not Data.List.foldr1' property $ \f x -> not (B.null x) ==> B.foldr1' ((toElem .) . f) x == foldr1 ((toElem .) . f) (B.unpack x)) , ("foldl1 const" , property $ \x -> not (B.null x) ==> B.foldl1 const x == B.head x) , ("foldl1 flip const" , property $ \x -> not (B.null x) ==> B.foldl1 (flip const) x == B.last x) , ("foldr1 const" , property $ \x -> not (B.null x) ==> B.foldr1 const x == B.head x) , ("foldr1 flip const" , property $ \x -> not (B.null x) ==> B.foldr1 (flip const) x == B.last x) , ("foldl1 max" , property $ \x -> not (B.null x) ==> B.foldl1 max x == B.foldl max minBound x) , ("foldr1 max" , property $ \x -> not (B.null x) ==> B.foldr1 max x == B.foldr max minBound x) , ("index" , property $ \(NonNegative n) x -> fromIntegral n < numWord x ==> B.index x (fromIntegral n) == B.unpack x !! n) , ("indexMaybe" , property $ \(NonNegative n) x -> fromIntegral n < numWord x ==> B.indexMaybe x (fromIntegral n) == Just (B.unpack x !! n)) , ("indexMaybe Nothing" , property $ \n x -> (n :: Int) < 0 || fromIntegral n >= numWord x ==> B.indexMaybe x (fromIntegral n) == Nothing) , ("!?" , property $ \n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n)) , ("unfoldrN" , property $ \n f (toElem -> c) -> B.unpack (fst (B.unfoldrN n (fmap (first toElem) . f) c)) === take (fromIntegral n) (unfoldr (fmap (first toElem) . f) c)) , ("unfoldrN replicate" , property $ \n (toElem -> c) -> fst (B.unfoldrN n (\t -> Just (t, t)) c) === B.replicate n c) , ("unfoldr" , property $ \n a (toElem -> c) -> B.unpack (B.unfoldr (\x -> if x <= 100 * n then Just (c, x + 1 :: Int) else Nothing) a) === unfoldr (\x -> if x <= 100 * n then Just (c, x + 1) else Nothing) a) --, ("unfoldr" , -- property $ \n f (toElem -> a) -> B.unpack (B.take (fromIntegral n) (B.unfoldr (fmap (first toElem) . f) a)) === -- take n (unfoldr (fmap (first toElem) . f) a)) -- #if defined(WORD16) && !defined(WIN) && !defined(OSWORD) && !defined(POSIX) , ("useAsCWString str packCWString == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCWString x B.packCWString >>= \x' -> pure (x == x'))) , ("useAsCWStringLen str packCWStringLen == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCWStringLen x B.packCWStringLen >>= \x' -> pure (x == x'))) #endif #if !defined(WORD16) && !defined(WIN) && !defined(OSWORD) && !defined(POSIX) , ("useAsCString str packCString == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCString x B.packCString >>= \x' -> pure (x == x'))) , ("useAsCStringLen str packCStringLen == str" , property $ \x -> not (B.any (== _nul) x) ==> monadicIO $ run (B.useAsCStringLen x B.packCStringLen >>= \x' -> pure (x == x'))) #endif ] split :: Eq a => a -> [a] -> [[a]] split c = splitWith (== c) splitWith :: (a -> Bool) -> [a] -> [[a]] splitWith _ [] = [] splitWith f ys = go [] ys where go acc [] = [reverse acc] go acc (x : xs) | f x = reverse acc : go [] xs | otherwise = go (x : acc) xs #if !MIN_VERSION_base(4, 19, 0) unsnoc :: [a] -> Maybe ([a], a) unsnoc [] = Nothing unsnoc xs = Just (init xs, last xs) #endif -- | Total conversion to char. word8ToChar :: Word8 -> Char word8ToChar = C.chr . fromIntegral os-string-2.0.6/tests/bytestring-tests/Properties/OsString.hs0000644000000000000000000000014007346545000022564 0ustar0000000000000000{-# LANGUAGE CPP #-} #undef WORD16 #undef POSIX #undef WIN #define OSWORD #include "Common.hs" os-string-2.0.6/tests/bytestring-tests/Properties/PosixString.hs0000644000000000000000000000014007346545000023305 0ustar0000000000000000{-# LANGUAGE CPP #-} #undef WORD16 #define POSIX #undef WIN #undef OSWORD #include "Common.hs" os-string-2.0.6/tests/bytestring-tests/Properties/ShortByteString.hs0000644000000000000000000000013707346545000024134 0ustar0000000000000000{-# LANGUAGE CPP #-} #undef WORD16 #undef WIN #undef POSIX #undef OSWORD #include "Common.hs" os-string-2.0.6/tests/bytestring-tests/Properties/ShortByteString/0000755000000000000000000000000007346545000023577 5ustar0000000000000000os-string-2.0.6/tests/bytestring-tests/Properties/ShortByteString/Word16.hs0000644000000000000000000000014207346545000025212 0ustar0000000000000000{-# LANGUAGE CPP #-} #define WORD16 #undef WIN #undef POSIX #undef OSWORD #include "../Common.hs" os-string-2.0.6/tests/bytestring-tests/Properties/WindowsString.hs0000644000000000000000000000014107346545000023636 0ustar0000000000000000{-# LANGUAGE CPP #-} #define WORD16 #define WIN #undef POSIX #undef OSWORD #include "Common.hs" os-string-2.0.6/tests/encoding/0000755000000000000000000000000007346545000014565 5ustar0000000000000000os-string-2.0.6/tests/encoding/EncodingSpec.hs0000644000000000000000000001673607346545000017477 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE TypeApplications #-} module EncodingSpec where import Data.ByteString ( ByteString ) import qualified Data.ByteString as BS import Arbitrary import Test.QuickCheck import Data.Either ( isRight ) import qualified System.OsString.Data.ByteString.Short as BS8 import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 import System.OsString.Encoding.Internal import GHC.IO (unsafePerformIO) import GHC.IO.Encoding ( setFileSystemEncoding ) import System.IO ( utf16le ) import Control.Exception import Control.DeepSeq import Data.Bifunctor ( first ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) tests :: [(String, Property)] tests = [ ("ucs2le_decode . ucs2le_encode == id", property $ \(padEven -> ba) -> let decoded = decodeWithTE ucs2le (BS8.toShort ba) encoded = encodeWithTE ucs2le =<< decoded in (BS8.fromShort <$> encoded) === Right ba) , ("utf16 doesn't handle invalid surrogate pairs", property $ let str = [toEnum 55296, toEnum 55297] encoded = encodeWithTE utf16le str decoded = decodeWithTE utf16le =<< encoded #if __GLASGOW_HASKELL__ >= 910 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing)) #elif __GLASGOW_HASKELL__ >= 904 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) #else in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing)) #endif , ("ucs2 handles invalid surrogate pairs", property $ let str = [toEnum 55296, toEnum 55297] encoded = encodeWithTE ucs2le str decoded = decodeWithTE ucs2le =<< encoded in decoded === Right str) , ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)", property $ \bs -> let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs))) , ("can decode arbitrary strings through utf-8 (with RoundtripFailure)", property $ \(NonNullSurrogateString str) -> let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str)) , ("utf-8 roundtrip encode cannot deal with some surrogates", property $ let str = [toEnum 0xDFF0, toEnum 0xDFF2] encoded = encodeWithTE (mkUTF8 RoundtripFailure) str decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded #if __GLASGOW_HASKELL__ >= 910 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing)) #elif __GLASGOW_HASKELL__ >= 904 in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing)) #else in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing)) #endif , ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)", property $ \(padEven -> bs) -> let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs))) , ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)", property $ \(padEven -> bs) -> let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded in expectFailure $ (isRight encoded, isRight decoded) === (True, True)) , ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)", property $ \bs -> let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded in expectFailure $ (isRight encoded, isRight decoded) === (True, True)) , ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)", property $ \(padEven -> bs) -> let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded in (isRight encoded, isRight decoded) === (True, True)) , ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)", property $ \bs -> let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded in (isRight encoded, isRight decoded) === (True, True)) , ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)", property $ \(padEven -> bs) -> let decoded = decodeW' (BS8.toShort bs) encoded = encodeW' =<< decoded in (isRight encoded, isRight decoded) === (True, True)) , ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)", property $ \bs -> ioProperty $ do setFileSystemEncoding (mkUTF8 TransliterateCodingFailure) let decoded = decodeP' (BS8.toShort bs) encoded = encodeP' =<< decoded pure $ (isRight encoded, isRight decoded) === (True, True)) , ("decodeWithBaseWindows == utf16le_b", property $ \(BS8.toShort . padEven -> bs) -> let decoded = decodeW' bs decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs in decoded === decoded') , ("encodeWithBaseWindows == utf16le_b", property $ \(NonNullSurrogateString str) -> let decoded = encodeW' str decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str in decoded === decoded') , ("encodeWithTE/decodeWithTE never fails (utf16le_b)", property $ \(padEven -> bs) -> let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs) encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded in (isRight encoded, isRight decoded) === (True, True)) ] padEven :: ByteString -> ByteString padEven bs | even (BS.length bs) = bs | otherwise = bs `BS.append` BS.pack [70] decodeP' :: BS8.ShortByteString -> Either String String decodeP' ba = unsafePerformIO $ do r <- try @SomeException $ decodeWithBasePosix ba evaluate $ force $ first displayException r encodeP' :: String -> Either String BS8.ShortByteString encodeP' str = unsafePerformIO $ do r <- try @SomeException $ encodeWithBasePosix str evaluate $ force $ first displayException r decodeW' :: BS16.ShortByteString -> Either String String decodeW' ba = unsafePerformIO $ do r <- try @SomeException $ decodeWithBaseWindows ba evaluate $ force $ first displayException r encodeW' :: String -> Either String BS8.ShortByteString encodeW' str = unsafePerformIO $ do r <- try @SomeException $ encodeWithBaseWindows str evaluate $ force $ first displayException r os-string-2.0.6/tests/encoding/Main.hs0000644000000000000000000000024007346545000016001 0ustar0000000000000000{-# LANGUAGE TypeApplications #-} module Main (main) where import qualified EncodingSpec as Spec import TestUtil main :: IO () main = runTests (Spec.tests)