integer-conversion-0.1.0.1/0000755000000000000000000000000007346545000013660 5ustar0000000000000000integer-conversion-0.1.0.1/ChangeLog.md0000644000000000000000000000003007346545000016022 0ustar0000000000000000## 0.1 Initial release integer-conversion-0.1.0.1/LICENSE0000644000000000000000000000276207346545000014674 0ustar0000000000000000Copyright (c) 2023, Oleg Grenrus All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Oleg Grenrus nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. integer-conversion-0.1.0.1/bench/0000755000000000000000000000000007346545000014737 5ustar0000000000000000integer-conversion-0.1.0.1/bench/integer-conversion-bench.hs0000644000000000000000000000312507346545000022171 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Main where import Test.Tasty.Bench (Benchmark, bench, bgroup, defaultMain, whnf) import qualified Data.ByteString as BS import qualified Data.Text as T import qualified Alternative import qualified Naive import Data.Integer.Conversion main :: IO () main = defaultMain [ bgroup "text" [ bgroup "naive" $ seriesT Naive.textToInteger , bgroup "alt" $ seriesT Alternative.textToInteger , bgroup "proper" $ seriesT textToInteger ] , bgroup "bs" [ bgroup "naive" $ seriesB Naive.byteStringToInteger , bgroup "alt" $ seriesB Alternative.byteStringToInteger , bgroup "proper" $ seriesB byteStringToInteger ] , bgroup "string" [ bgroup "naive" $ seriesL Naive.stringToInteger , bgroup "alt" $ seriesL Alternative.stringToInteger , bgroup "read" $ seriesL read , bgroup "proper" $ seriesL stringToInteger ] ] where seriesT :: (T.Text -> Integer) -> [Benchmark] seriesT f = [ bench (show n) $ whnf f t | e <- [6 .. 18 :: Int] , let n = 2 ^ e , let t = T.replicate n "9" ] seriesB :: (BS.ByteString -> Integer) -> [Benchmark] seriesB f = [ bench (show n) $ whnf f t | e <- [6 .. 18 :: Int] , let n = 2 ^ e , let t = BS.replicate n (48 + 9) ] seriesL :: (String -> Integer) -> [Benchmark] seriesL f = [ bench (show n) $ whnf f t | e <- [6 .. 18 :: Int] , let n = 2 ^ e , let t = replicate n '9' ] integer-conversion-0.1.0.1/integer-conversion.cabal0000644000000000000000000000425107346545000020466 0ustar0000000000000000cabal-version: 2.2 name: integer-conversion version: 0.1.0.1 synopsis: Conversion from strings to Integer category: Data description: The naive @foldl' (\acc d -> acc * 10 + d) 0@ is expensive (quadratic!) for large @Integer@s. This package provides sub-quadratic implementation. homepage: https://github.com/phadej/integer-conversion bug-reports: https://github.com/phadej/integer-conversion/issues license: BSD-3-Clause license-file: LICENSE author: Oleg Grenrus maintainer: Oleg.Grenrus copyright: (c) 2023 Oleg Grenrus build-type: Simple extra-source-files: ChangeLog.md tested-with: GHC ==8.0.2 || ==8.2.2 || ==8.4.4 || ==8.6.5 || ==8.8.4 || ==8.10.7 || ==9.0.2 || ==9.2.8 || ==9.4.7 || ==9.6.3 || ==9.8.1 source-repository head type: git location: https://github.com/phadej/integer-conversion.git library default-language: Haskell2010 hs-source-dirs: src exposed-modules: Data.Integer.Conversion build-depends: , base >=4.9.0.0 && <4.20 , bytestring ^>=0.10.8.1 || ^>=0.11.4.0 || ^>=0.12.0.2 , primitive ^>=0.8.0.0 , text ^>=1.2.3.0 || >=2.0.1 && <2.1 || ^>=2.1 test-suite integer-conversion-tests default-language: Haskell2010 hs-source-dirs: tests src-other type: exitcode-stdio-1.0 main-is: integer-conversion-tests.hs build-depends: , base , bytestring , integer-conversion , text -- test dependencies build-depends: , QuickCheck ^>=2.14.3 , tasty ^>=1.4.3 , tasty-quickcheck ^>=0.10.2 other-modules: Alternative Naive benchmark integer-conversion-bench default-language: Haskell2010 ghc-options: -threaded -rtsopts type: exitcode-stdio-1.0 main-is: integer-conversion-bench.hs hs-source-dirs: bench src-other build-depends: , base , bytestring , integer-conversion , text -- bench dependencies build-depends: tasty-bench ^>=0.3.4 other-modules: Alternative Naive integer-conversion-0.1.0.1/src-other/0000755000000000000000000000000007346545000015566 5ustar0000000000000000integer-conversion-0.1.0.1/src-other/Alternative.hs0000644000000000000000000000651607346545000020410 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} -- | A sub-quadratic algorithm for conversion of digits into 'Integer'. -- Pairs of adjacent radix @b@ digits are combined into a single radix @b^2@ digit. -- This process is repeated until we are left with a single digit. -- This algorithm performs well only on large inputs, -- so we use the simple algorithm for smaller inputs. -- -- This implementation is taken from aeson-2.1. module Alternative ( byteStringToInteger, textToInteger, stringToInteger, ) where import Data.Char (ord) import Data.Word (Word8) import qualified Data.ByteString as BS import qualified Data.List as L import qualified Data.Text as T byteStringToInteger :: BS.ByteString -> Integer byteStringToInteger bs -- here (and similarly in 'textToInteger') it could make sense -- to do first loop directly on 'ByteString' (or 'Text'), -- but as this is already a slow path, we opt rather for a simpler implementation. | l > 40 = valInteger' 10 l [ fromWord8 w | w <- BS.unpack bs ] | otherwise = byteStringToIntegerSimple bs where !l = BS.length bs byteStringToIntegerSimple :: BS.ByteString -> Integer byteStringToIntegerSimple = BS.foldl' step 0 where step a b = a * 10 + fromWord8 b textToInteger :: T.Text -> Integer textToInteger bs | l > 40 = valInteger' 10 l [ fromChar w | w <- T.unpack bs ] | otherwise = textToIntegerSimple bs where !l = T.length bs textToIntegerSimple :: T.Text -> Integer textToIntegerSimple = T.foldl' step 0 where step a b = a * 10 + fromChar b stringToInteger :: String -> Integer stringToInteger s | l > 40 = valInteger' 10 l (map fromChar s) | otherwise = stringToIntegerSimple s where !l = length s stringToIntegerSimple :: String -> Integer stringToIntegerSimple = L.foldl' step 0 where step a b = a * 10 + fromChar b fromChar :: Char -> Integer fromChar c = toInteger (ord c - 48 :: Int) {-# INLINE fromChar #-} fromWord8 :: Word8 -> Integer fromWord8 w = toInteger (fromIntegral w - 48 :: Int) {-# INLINE fromWord8 #-} -- | A sub-quadratic algorithm. -- -- Call 'valInteger'' directly if you know length of @digits@ in advance. -- valInteger :: Integer -> [Integer] -> Integer -- valInteger base ds = valInteger' base (length ds) ds -- | A sub-quadratic algorithm implementation. valInteger' :: Integer -- ^ base -> Int -- ^ length of digits -> [Integer] -- ^ digits -> Integer valInteger' = go where go :: Integer -> Int -> [Integer] -> Integer go _ _ [] = 0 go _ _ [d] = d go b l ds | l > 40 = b' `seq` go b' l' (combine b ds') | otherwise = valIntegerSimple b ds where -- ensure that we have an even number of digits -- before we call combine: ds' = if even l then ds else 0 : ds b' = b * b l' = (l + 1) `quot` 2 combine b (d1 : d2 : ds) = d `seq` (d : combine b ds) where d = d1 * b + d2 combine _ [] = [] combine _ [_] = errorWithoutStackTrace "this should not happen" -- | The following algorithm is only linear for types whose Num operations -- are in constant time. -- -- We export this (mostly) for testing purposes. -- valIntegerSimple :: Integer -> [Integer] -> Integer valIntegerSimple base = go 0 where go r [] = r go r (d : ds) = r' `seq` go r' ds where r' = r * base + fromIntegral d integer-conversion-0.1.0.1/src-other/Naive.hs0000644000000000000000000000107507346545000017167 0ustar0000000000000000module Naive ( textToInteger, byteStringToInteger, stringToInteger, ) where import Data.Char (ord) import qualified Data.ByteString as BS import qualified Data.List as L import qualified Data.Text as T textToInteger :: T.Text -> Integer textToInteger = T.foldl' (\acc c -> acc * 10 + toInteger (ord c - 48)) 0 byteStringToInteger :: BS.ByteString -> Integer byteStringToInteger = BS.foldl' (\acc c -> acc * 10 + toInteger c - 48) 0 stringToInteger :: String -> Integer stringToInteger = L.foldl' (\acc c -> acc * 10 + toInteger (ord c - 48)) 0 integer-conversion-0.1.0.1/src/Data/Integer/0000755000000000000000000000000007346545000016715 5ustar0000000000000000integer-conversion-0.1.0.1/src/Data/Integer/Conversion.hs0000644000000000000000000002305007346545000021376 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -ddump-simpl -dsuppress-all -ddump-to-file #-} -- | The naive left fold to convert digits to integer is quadratic -- as multiplying (big) 'Integer's is not a constant time operation. -- -- This module provides sub-quadratic algorithm for conversion of 'Text' -- or 'ByteString' into 'Integer'. -- -- For example for a text of 262144 9 digits, fold implementation -- takes 1.5 seconds, and 'textToInteger' just 26 milliseconds on my machine. -- Difference is already noticeable around 100-200 digits. -- -- In particular 'read' is correct (i.e. faster) than @List.foldl'@ (better complexity), -- 'stringToInteger' is a bit faster than 'read' (same complexity, lower coeffcient). -- module Data.Integer.Conversion ( textToInteger, byteStringToInteger, stringToInteger, stringToIntegerWithLen, ) where import Control.Monad.ST (ST, runST) import Data.ByteString (ByteString) import Data.Char (ord) import Data.Primitive.Array (MutableArray, newArray, readArray, writeArray) import Data.Text.Internal (Text (..)) import Data.Word (Word8) import qualified Data.ByteString as BS import qualified Data.List as L import qualified Data.Text as T -- $setup -- >>> :set -XOverloadedStrings ------------------------------------------------------------------------------- -- Text ------------------------------------------------------------------------------- -- | Convert 'Text' to 'Integer'. -- -- Semantically same as @T.foldl' (\acc c -> acc * 10 + toInteger (ord c - 48)) 0@, -- but this is more efficient. -- -- >>> textToInteger "123456789" -- 123456789 -- -- For non-decimal inputs some nonsense is calculated -- -- >>> textToInteger "foobar" -- 6098556 -- textToInteger :: Text -> Integer textToInteger t@(Text _arr _off len) -- len >= 20000 = algorithmL 10 (T.length t) [ toInteger (ord c - 48) | c <- T.unpack t ] | len >= 40 = complexTextToInteger t | otherwise = simpleTextToInteger t simpleTextToInteger :: Text -> Integer simpleTextToInteger = T.foldl' (\acc c -> acc * 10 + fromChar c) 0 -- Text doesn't have cheap length: -- -- * We can (over)estimate the size of the needed buffer by the length of text's underlying bytearray. -- * As we don't know whether the length is even or odd, we cannot do the first pass, -- so we just copy the contents of given Text as is first. -- complexTextToInteger :: Text -> Integer complexTextToInteger t0@(Text _ _ len) = runST $ do arr <- newArray len integer0 -- we overestimate the size here loop arr t0 0 where loop :: MutableArray s Integer -> Text -> Int -> ST s Integer loop !arr !t !o = case T.uncons t of Just (c, t') -> do writeArray arr o $! fromChar c loop arr t' (o + 1) Nothing -> algorithm arr o 10 fromChar :: Char -> Integer fromChar c = toInteger (ord c - 48 :: Int) {-# INLINE fromChar #-} ------------------------------------------------------------------------------- -- ByteString ------------------------------------------------------------------------------- -- | Convert 'ByteString' to 'Integer'. -- -- Semantically same as @BS.foldl' (\acc c -> acc * 10 + toInteger c - 48) 0@, -- but this is more efficient. -- -- >>> byteStringToInteger "123456789" -- 123456789 -- -- For non-decimal inputs some nonsense is calculated -- -- >>> byteStringToInteger "foobar" -- 6098556 -- byteStringToInteger :: ByteString -> Integer byteStringToInteger bs -- len >= 20000 = algorithmL 10 len [ toInteger w - 48 | w <- BS.unpack bs ] | len >= 40 = complexByteStringToInteger len bs | otherwise = simpleByteStringToInteger bs where !len = BS.length bs simpleByteStringToInteger :: BS.ByteString -> Integer simpleByteStringToInteger = BS.foldl' (\acc w -> acc * 10 + fromWord8 w) 0 complexByteStringToInteger :: Int -> BS.ByteString -> Integer complexByteStringToInteger len bs = runST $ do arr <- newArray len' 0 if even len then do loop arr 0 0 else do writeArray arr 0 $! indexBS bs 0 loop arr 1 1 where len' = (len + 1) `div` 2 loop :: MutableArray s Integer -> Int -> Int -> ST s Integer loop !arr !i !o | i < len = do writeArray arr o $! indexBS bs i * 10 + indexBS bs (i + 1) loop arr (i + 2) (o + 1) loop arr _ _ = algorithm arr len' 100 indexBS :: BS.ByteString -> Int -> Integer indexBS bs i = fromWord8 (BS.index bs i) {-# INLINE indexBS #-} fromWord8 :: Word8 -> Integer fromWord8 w = toInteger (fromIntegral w - 48 :: Int) {-# INLINE fromWord8 #-} ------------------------------------------------------------------------------- -- String ------------------------------------------------------------------------------- -- | Convert 'String' to 'Integer'. -- -- Semantically same as @List.foldl' (\acc c -> acc * 10 + toInteger c - 48) 0@, -- but this is more efficient. -- -- >>> stringToInteger "123456789" -- 123456789 -- -- For non-decimal inputs some nonsense is calculated -- -- >>> stringToInteger "foobar" -- 6098556 -- stringToInteger :: String -> Integer stringToInteger str = stringToIntegerWithLen str (length str) -- | Convert 'String' to 'Integer' when you know the length beforehand. -- -- >>> stringToIntegerWithLen "123" 3 -- 123 -- -- If the length is wrong, you may get wrong results. -- (Simple algorithm is used for short strings). -- -- >>> stringToIntegerWithLen (replicate 40 '0' ++ "123") 45 -- 12300 -- -- >>> stringToIntegerWithLen (replicate 40 '0' ++ "123") 44 -- 1200 -- -- >>> stringToIntegerWithLen (replicate 40 '0' ++ "123") 42 -- 12 -- stringToIntegerWithLen :: String -> Int -> Integer stringToIntegerWithLen str len | len >= 40 = complexStringToInteger len str | otherwise = simpleStringToInteger str simpleStringToInteger :: String -> Integer simpleStringToInteger = L.foldl' step 0 where step a b = a * 10 + fromChar b complexStringToInteger :: Int -> String -> Integer complexStringToInteger len str = runST $ do arr <- newArray len' integer0 if even len then loop arr str 0 else case str of [] -> return integer0 -- cannot happen, length is odd! but could, via stringToIntegerWithLen. a:bs -> do writeArray arr 0 $ fromChar a loop arr bs 1 where len' = (len + 1) `div` 2 loop :: MutableArray s Integer -> String -> Int -> ST s Integer loop !arr (a:b:cs) !o | o < len' = do writeArray arr o $! fromChar a * 10 + fromChar b loop arr cs (o + 1) loop arr _ _ = algorithm arr len' 100 ------------------------------------------------------------------------------- -- Algorithm ------------------------------------------------------------------------------- -- The core of algorithm uses mutable arrays. -- An alternative (found in e.g. @base@) uses lists. -- For very big integers (thousands of decimal digits) the difference -- is small (runtime is dominated by integer multiplication), -- but for medium sized integers this is slightly faster, as we avoid cons cell allocation. -- algorithm :: forall s. MutableArray s Integer -- ^ working buffer -> Int -- ^ buffer size -> Integer -- ^ base -> ST s Integer algorithm !arr !len !base | len <= 40 = finish 0 0 | even len = loop 0 0 | otherwise = loop 1 1 where loop :: Int -> Int -> ST s Integer loop !i !o | i < len = do -- read at i, i +1 a <- readArray arr i b <- readArray arr (i + 1) -- rewrite with constant to release memory writeArray arr i integer0 writeArray arr (i + 1) integer0 -- write at o writeArray arr o $! a * base + b -- continue loop (i + 2) (o + 1) loop _ _ = algorithm arr len' base' where !base' = base * base !len' = (len + 1) `div` 2 finish :: Integer -> Int -> ST s Integer finish !acc !i | i < len = do a <- readArray arr i finish (acc * base + a) (i + 1) finish !acc !_ = return acc ------------------------------------------------------------------------------- -- List variant ------------------------------------------------------------------------------- {- -- | A sub-quadratic algorithm implementation using lists. -- -- Sometimes this is faster, but I fail to quantify when exactly. -- algorithmL :: Integer -- ^ base -> Int -- ^ length of digits -> [Integer] -- ^ digits -> Integer algorithmL = go where go :: Integer -> Int -> [Integer] -> Integer go _ _ [] = 0 go _ _ [d] = d go b l ds | l > 40 = b' `seq` go b' l' (combine b ds') | otherwise = finishAlgorithmL b ds where -- ensure that we have an even number of digits -- before we call combine: ds' = if even l then ds else 0 : ds b' = b * b l' = (l + 1) `quot` 2 combine b (d1 : d2 : ds) = d `seq` (d : combine b ds) where d = d1 * b + d2 combine _ [] = [] combine _ [_] = errorWithoutStackTrace "this should not happen" -- | The following algorithm is only linear for types whose Num operations -- are in constant time. -- -- We export this (mostly) for testing purposes. -- finishAlgorithmL :: Integer -> [Integer] -> Integer finishAlgorithmL base = go 0 where go r [] = r go r (d : ds) = r' `seq` go r' ds where r' = r * base + fromIntegral d -} ------------------------------------------------------------------------------- -- Misc ------------------------------------------------------------------------------- integer0 :: Integer integer0 = 0 integer-conversion-0.1.0.1/tests/0000755000000000000000000000000007346545000015022 5ustar0000000000000000integer-conversion-0.1.0.1/tests/integer-conversion-tests.hs0000644000000000000000000000342707346545000022344 0ustar0000000000000000{-# OPTIONS_GHC -Wno-orphans #-} module Main (main) where import Test.QuickCheck ((===)) import Test.Tasty (defaultMain, testGroup) import Test.Tasty.QuickCheck (Arbitrary (..), label, testProperty) import qualified Data.ByteString as BS import qualified Data.Text as T import Data.Integer.Conversion import qualified Alternative import qualified Naive main :: IO () main = defaultMain $ testGroup "integer-conversion" [ testGroup "text" [ testProperty "naive" $ \t -> labelT t $ textToInteger t === Naive.textToInteger t , testProperty "alt" $ \t -> labelT t $ textToInteger t === Alternative.textToInteger t ] , testGroup "bs" [ testProperty "naive" $ \bs -> labelB bs $ byteStringToInteger bs === Naive.byteStringToInteger bs , testProperty "alt" $ \bs -> labelB bs $ byteStringToInteger bs === Alternative.byteStringToInteger bs ] , testGroup "string" [ testProperty "naive" $ \s -> labelS s $ stringToInteger s === Naive.stringToInteger s , testProperty "alt" $ \s -> labelS s $ stringToInteger s === Alternative.stringToInteger s ] ] where labelT t = label (if T.length t >= 40 then "long" else "short") labelB b = label (if BS.length b >= 40 then "long" else "short") labelS s = label (if length s >= 40 then "long" else "short") ------------------------------------------------------------------------------- -- Orphans ------------------------------------------------------------------------------- -- we could use quickcheck-instances, -- but by defining these instances here we make adopting newer GHC smoother. instance Arbitrary T.Text where arbitrary = fmap T.pack arbitrary instance Arbitrary BS.ByteString where arbitrary = fmap BS.pack arbitrary