doclayout-0.4.0.1/0000755000000000000000000000000007346545000012046 5ustar0000000000000000doclayout-0.4.0.1/LICENSE0000644000000000000000000000277507346545000013066 0ustar0000000000000000Copyright John MacFarlane (c) 2016-2019 All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Author name here nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. doclayout-0.4.0.1/README.md0000644000000000000000000000155107346545000013327 0ustar0000000000000000# doclayout [![CI tests](https://github.com/jgm/doclayout/workflows/CI%20tests/badge.svg)](https://github.com/jgm/doclayout/actions) This is a prettyprinting library designed for laying out plain-text documents. It originated in the pandoc module Text.Pandoc.Pretty, and its development has been guided by pandoc's needs in rendering wrapped textual documents. In supports wrapping of text on breaking spaces, indentation and other line prefixes, blank lines, and tabular content. Example: ``` haskell Text.DocLayout> mydoc = hang 2 "- " (text "foo" <+> text "bar") Text.DocLayout> putStrLn $ render (Just 20) mydoc - foo bar Text.DocLayout> putStrLn $ render (Just 10) (prefixed "> " (mydoc $+$ mydoc)) > - foo > bar > > - foo > bar ``` The `Doc` type may be parameterized to either `String` or (strict or lazy) `Text`, depending on the desired render target. doclayout-0.4.0.1/Setup.hs0000644000000000000000000000005607346545000013503 0ustar0000000000000000import Distribution.Simple main = defaultMain doclayout-0.4.0.1/bench/0000755000000000000000000000000007346545000013125 5ustar0000000000000000doclayout-0.4.0.1/bench/bench.hs0000644000000000000000000001007107346545000014537 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} import Text.DocLayout import Text.Emoji import Control.DeepSeq (force) import Control.Exception (evaluate) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as T (readFile) import Criterion.Main import Criterion.Types (Config (..)) #if MIN_VERSION_base(4,11,0) #else import Data.Semigroup #endif main :: IO () main = do udhrEng <- udhrLang "eng" udhrFrn <- udhrLang "frn" udhrVie <- udhrLang "vie" udhrChn <- udhrLang "chn" udhrArz <- udhrLang "arz" udhrHnd <- udhrLang "hnd" udhrBng <- udhrLang "bng" udhrRus <- udhrLang "rus" udhrJpn <- udhrLang "jpn" udhrKkn <- udhrLang "kkn" udhrTcw <- udhrLang "tcw" udhrTcv <- udhrLang "tcv" udhrThj <- udhrLang "thj" udhrGrk <- udhrLang "grk" emojiTxt <- evaluate . force . T.replicate 1000 $ mconcat baseEmojis <> mconcat zwjEmojis defaultMainWith defaultConfig{ timeLimit = 5.0 } $ [ bench "sample document 2" $ nf (render Nothing :: Doc Text -> Text) (nest 3 $ cblock 20 $ vcat $ replicate 15 $ hsep $ map text $ words bigtext) , bench "reflow English" $ nf (render (Just 20) :: Doc Text -> Text) $ flowedDoc udhrEng , bench "reflow Greek" $ nf (render (Just 20) :: Doc Text -> Text) $ flowedDoc udhrGrk , bench "tabular English" $ nf (render (Just 80) :: Doc Text -> Text) (let blah = flowedDoc udhrEng in cblock 20 blah <> lblock 30 blah <> rblock 10 blah $$ cblock 50 (nest 5 blah) <> rblock 10 blah) , bench "tabular Greek" $ nf (render (Just 80) :: Doc Text -> Text) (let blah = flowedDoc udhrGrk in cblock 20 blah <> lblock 30 blah <> rblock 10 blah $$ cblock 50 (nest 5 blah) <> rblock 10 blah) , bench "soft spaces at end of line" $ nf (render Nothing :: Doc Text -> Text) ("a" <> mconcat (replicate 50 (space <> lblock 1 mempty))) ] ++ -- Benchmarks for languages using all scripts used by more than 50 million people -- https://en.wikipedia.org/wiki/List_of_writing_systems#List_of_writing_systems_by_adoption -- https://www.unicode.org/udhr/translations.html [ bench "UDHR English" $ nf realLengthNarrowContext udhrEng -- Plain ASCII , bench "UDHR French" $ nf realLengthNarrowContext udhrFrn -- Latin with some diacritics , bench "UDHR Vietnamese" $ nf realLengthNarrowContext udhrVie -- Latin with more diacritics , bench "UDHR Mandarin" $ nf realLengthWideContext udhrChn -- Mandarin , bench "UDHR Arabic" $ nf realLengthNarrowContext udhrArz -- Arabic , bench "UDHR Hindi" $ nf realLengthNarrowContext udhrHnd -- Hindi , bench "UDHR Bengali" $ nf realLengthNarrowContext udhrBng -- Bengali , bench "UDHR Russian" $ nf realLengthNarrowContext udhrRus -- Russian , bench "UDHR Japanese" $ nf realLengthWideContext udhrJpn -- Japanese , bench "UDHR Korean" $ nf realLengthWideContext udhrKkn -- Korean , bench "UDHR Telugu" $ nf realLengthNarrowContext udhrTcw -- Telugu , bench "UDHR Tamil" $ nf realLengthNarrowContext udhrTcv -- Tamil -- Benchmarks for other languages , bench "UDHR Thai" $ nf realLengthNarrowContext udhrThj -- Thai , bench "UDHR Greek" $ nf realLengthNarrowContext udhrGrk -- Greek , bench "Emoji" $ nf realLengthNarrowContext emojiTxt -- Emoji , bench "UDHR Mandarin (no shortcuts)" $ nf realLengthWideContextNoShortcut udhrChn -- No shortcuts ] -- | The Universal declaration of human rights in a given language, repeated 1000 times. udhrLang :: String -> IO Text udhrLang lang = do txt <- T.readFile ("udhr/txt/" ++ lang ++ ".txt") evaluate . force $ T.replicate 10000 txt bigtext :: String bigtext = "Hello there. This is a big text." flowedDoc :: Text -> Doc Text flowedDoc txt = hsep $ map literal . T.words . T.take 5000 $ txt doclayout-0.4.0.1/changelog.md0000644000000000000000000001103107346545000014313 0ustar0000000000000000# doclayout ## 0.4.0.1 * Add clause for Empty to renderList (#22). * Remove upper bound for criterion ## 0.4 * Expose `unfoldD` [API change]. * Remove `realLengthNoShortcut`, `isEmojiModifier`, and `isEmojiJoiner` [API change] (Stephen Morgan). * Add new exported functions `realLengthNarrowContext`, `realLengthWideContext`, `realLengthNarrowContextNoShortcut`, `realLengthWideContextNoShortcut`, `isSkinToneModifier`, `isZWJ` [API change] (Stephen Morgan). * Compute `realLength` strictly. * Make `getOffset` stricter. * Drop support for ghc <= 8.4, add test for ghc 9.2. * Don't collapse the CarriageReturn + Newline combination (#20). We want to ensure that a literal starting with a Newline doesn't lose the newline if it occurs after a CarriageReturn. This affects code blocks in pandoc that begin with newlines. * Improve performance of NoShortcut code (Stephen Morgan). * Simplify emoji processing (Stephen Morgan). * Add benchmarking for code with no shortcuts (Stephen Morgan). * Add unicodeWidth.inc to cabal file. * Fix `offset`, `minOffset`, `updateColumn` so they don't re-render. * Get unicode block widths directly from the Unicode specification, rather than writing it out ourselves (Stephen Morgan). * Resolve the width of ambiguous characters based on their context (Stephen Morgan). * Spacing marks should have nonzero width, even though they are combining characters (Stephen Morgan). * Add shortcuts for extended Latin, Arabic, Cyrillic, Greek, Devangari, Bengali, Korean, Telugu, and Tamil (Stephen Morgan). * Fix location of extra-source-files in cabal. * update.hs: require text package * Handle emoji variation modifiers specially, so the keypad emoji can be ignored (Stephen Morgan). This results in a 16% speedup of realLength on ascii text. * Add benchmarks for all scripts used by more than 50 million people, plus a couple more. (#9, Stephen Morgan). ## 0.3.1.1 * Fix the end of the block of zero width characters which contains the zero-width joiners and directional markings (Stephen Morgan, #5). This fixes a regression introduced in 0.3.1, affecting code points 0x2010 to 0x2030. ## 0.3.1 * Improved handling of emojis. Emojis are double-wide, but previously this library did not treat them as such. We now have comprehensive support of emojis, including variation modifiers and zero-width joiners, verified by a test suite. Performance has been confirmed to be no worse for text without emojis. (Stephen Morgan, #1). API changes: export `realLengthNoShortcut`, `isEmojiModifier`, `isEmojiVariation`, `isEmojiJoiner`. ## 0.3.0.2 * NOINLINE `literal` instead of `fromString` (#2, sjakobi). This produces a further reduction in allocations and pandoc compile time. ## 0.3.0.1 * NOINLINE `fromString` (#1). @sjakobi reports that this change reduced total allocations for building pandoc-2.12 with GHC 8.10.4 by 8.5% and reduced peak allocations are reduced from 3854MB to 3389MB. ## 0.3 * Add foldlChar to signature of HasChars [API change]. * Use foldlChar in realLength. This avoids a stack overflow we were getting with long strings in the previous version (with foldrChar). See jgm/pandoc#6031. * Replace isBlank with isBreakable and improved startsWithBlank. Previously isBlank was used in the layout algorithm where what we really wanted was isBreakable. * Avoid unnecessary calculation in updateColumns. * Replace a right fold with a strict left fold. * Add strictness annotations in realLength and updateColumn. ## 0.2.0.1 * Made `realLength` smarter about combining characters. If a string starts with a combining character, that character takes up a width of 1; if the combining character occurs after another character, it takes 0. See jgm/pandoc#5863. * Improve `isBlank`, re-use in rendering code `for BreakingSpace`. * Fixed incorrect `Text` width in renderig blocks. ## 0.2 * Add instances for `Doc`: `Data`, `Typeable`, `Ord`, `Read`, `Generic`. * Add `literal` (like `text`, but polymorphic). * Change some `IsString` constraints to `HasChars`. * Add some default definitions for methods in `HasChars`. * Change `offset` and `minOffset` to be more efficient (in simple cases they no longer render and count line lengths). * Add `updateColumn`. * Fix problem with `lblock`/`cblock`/`rblock` when `chop` is invoked. This caused very strange behavior in which text got reversed in certain circumstances. ## 0.1 * Initial release. doclayout-0.4.0.1/doclayout.cabal0000644000000000000000000000455107346545000015042 0ustar0000000000000000name: doclayout version: 0.4.0.1 synopsis: A prettyprinting library for laying out text documents. description: doclayout is a prettyprinting library for laying out text documents, with several features not present in prettyprinting libraries designed for code. It was designed for use in pandoc. homepage: https://github.com/jgm/doclayout license: BSD3 license-file: LICENSE author: John MacFarlane maintainer: jgm@berkeley.edu copyright: 2016-19 John MacFarlane category: Text build-type: Simple extra-source-files: udhr/txt/*.txt, udhr/README, udhr/languages.txt, src/Text/unicodeWidth.inc data-files: README.md changelog.md cabal-version: >=1.10 library hs-source-dirs: src exposed-modules: Text.DocLayout build-depends: base >= 4.12 && < 5, text, containers, emojis >=0.1.2, mtl, safe if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* default-language: Haskell2010 ghc-options: -Wall -fno-warn-unused-do-bind test-suite doclayout-test type: exitcode-stdio-1.0 hs-source-dirs: test main-is: test.hs build-depends: base, doclayout, mtl, tasty, tasty-golden, tasty-hunit, tasty-quickcheck, text, emojis >=0.1.2 ghc-options: -threaded -rtsopts -with-rtsopts=-N default-language: Haskell2010 benchmark doclayout-bench Type: exitcode-stdio-1.0 Main-Is: bench.hs Hs-Source-Dirs: bench Build-Depends: doclayout, base >= 4.8 && < 5, criterion >= 1.0, deepseq, text, emojis >= 0.1.2, mtl Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell2010 source-repository head type: git location: https://github.com/jgm/doclayout doclayout-0.4.0.1/src/Text/0000755000000000000000000000000007346545000013561 5ustar0000000000000000doclayout-0.4.0.1/src/Text/DocLayout.hs0000644000000000000000000012634007346545000016026 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.DocLayout Copyright : Copyright (C) 2010-2019 John MacFarlane License : BSD 3 Maintainer : John MacFarlane Stability : alpha Portability : portable A prettyprinting library for the production of text documents, including wrapped text, indentation and other prefixes, and blocks for tables. -} module Text.DocLayout ( -- * Rendering render -- * Doc constructors , cr , blankline , blanklines , space , literal , text , char , prefixed , flush , nest , hang , beforeNonBlank , nowrap , afterBreak , lblock , cblock , rblock , vfill , nestle , chomp , inside , braces , brackets , parens , quotes , doubleQuotes , empty -- * Functions for concatenating documents , (<+>) , ($$) , ($+$) , hcat , hsep , vcat , vsep -- * Functions for querying documents , isEmpty , offset , minOffset , updateColumn , height , charWidth , realLength , realLengthNarrowContext , realLengthWideContext , realLengthNarrowContextNoShortcut , realLengthWideContextNoShortcut -- * Char properties , isSkinToneModifier , isEmojiVariation , isZWJ -- * Utility functions , unfoldD -- * Types , Doc(..) , HasChars(..) ) where import Prelude import Data.Maybe (fromMaybe, isJust, mapMaybe) import Safe (lastMay, initSafe) import Control.Monad import Control.Monad.State.Strict import GHC.Generics import Data.Bifunctor (second) import Data.Char (isSpace, ord) import Data.List (foldl', intersperse) import qualified Data.IntMap.Strict as IM import qualified Data.Map.Strict as M import qualified Data.Map.Internal as MInt import Data.Data (Data, Typeable) import Data.String import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Text (Text) #if MIN_VERSION_base(4,11,0) #else import Data.Semigroup #endif import Text.Emoji (baseEmojis) -- | Class abstracting over various string types that -- can fold over characters. Minimal definition is 'foldrChar' -- and 'foldlChar', but defining the other methods can give better -- performance. class (IsString a, Semigroup a, Monoid a, Show a) => HasChars a where foldrChar :: (Char -> b -> b) -> b -> a -> b foldlChar :: (b -> Char -> b) -> b -> a -> b replicateChar :: Int -> Char -> a replicateChar n c = fromString (replicate n c) isNull :: a -> Bool isNull = foldrChar (\_ _ -> False) True splitLines :: a -> [a] splitLines s = (fromString firstline : otherlines) where (firstline, otherlines) = foldrChar go ([],[]) s go '\n' (cur,lns) = ([], fromString cur : lns) go c (cur,lns) = (c:cur, lns) instance HasChars Text where foldrChar = T.foldr foldlChar = T.foldl' splitLines = T.splitOn "\n" replicateChar n c = T.replicate n (T.singleton c) isNull = T.null instance HasChars String where foldrChar = foldr foldlChar = foldl' splitLines = lines . (++"\n") replicateChar = replicate isNull = null instance HasChars TL.Text where foldrChar = TL.foldr foldlChar = TL.foldl' splitLines = TL.splitOn "\n" replicateChar n c = TL.replicate (fromIntegral n) (TL.singleton c) isNull = TL.null -- | Document, including structure relevant for layout. data Doc a = Text Int a -- ^ Text with specified width. | Block Int [a] -- ^ A block with a width and lines. | VFill Int a -- ^ A vertically expandable block; -- when concatenated with a block, expands to height -- of block, with each line containing the specified text. | Prefixed Text (Doc a) -- ^ Doc with each line prefixed with text. -- Note that trailing blanks are omitted from the prefix -- when the line after it is empty. | BeforeNonBlank (Doc a) -- ^ Doc that renders only before nonblank. | Flush (Doc a) -- ^ Doc laid out flush to left margin. | BreakingSpace -- ^ A space or line break, in context. | AfterBreak Text -- ^ Text printed only at start of line. | CarriageReturn -- ^ Newline unless we're at start of line. | NewLine -- ^ newline. | BlankLines Int -- ^ Ensure a number of blank lines. | Concat (Doc a) (Doc a) -- ^ Two documents concatenated. | Empty deriving (Show, Read, Eq, Ord, Functor, Foldable, Traversable, Data, Typeable, Generic) instance Semigroup (Doc a) where x <> Empty = x Empty <> x = x x <> y = Concat x y instance Monoid (Doc a) where mappend = (<>) mempty = Empty instance HasChars a => IsString (Doc a) where fromString = text -- | Unfold a 'Doc' into a flat list. unfoldD :: Doc a -> [Doc a] unfoldD Empty = [] unfoldD (Concat x@Concat{} y) = unfoldD x <> unfoldD y unfoldD (Concat x y) = x : unfoldD y unfoldD x = [x] -- | True if the document is empty. isEmpty :: Doc a -> Bool isEmpty Empty = True isEmpty _ = False -- | The empty document. empty :: Doc a empty = mempty -- | Concatenate documents horizontally. hcat :: [Doc a] -> Doc a hcat = mconcat -- | Concatenate a list of 'Doc's, putting breakable spaces -- between them. infixr 6 <+> (<+>) :: Doc a -> Doc a -> Doc a (<+>) x y | isEmpty x = y | isEmpty y = x | otherwise = x <> space <> y -- | Same as 'hcat', but putting breakable spaces between the -- 'Doc's. hsep :: [Doc a] -> Doc a hsep = foldr (<+>) empty infixr 5 $$ -- | @a $$ b@ puts @a@ above @b@. ($$) :: Doc a -> Doc a -> Doc a ($$) x y | isEmpty x = y | isEmpty y = x | otherwise = x <> cr <> y infixr 5 $+$ -- | @a $+$ b@ puts @a@ above @b@, with a blank line between. ($+$) :: Doc a -> Doc a -> Doc a ($+$) x y | isEmpty x = y | isEmpty y = x | otherwise = x <> blankline <> y -- | List version of '$$'. vcat :: [Doc a] -> Doc a vcat = foldr ($$) empty -- | List version of '$+$'. vsep :: [Doc a] -> Doc a vsep = foldr ($+$) empty -- | Removes leading blank lines from a 'Doc'. nestle :: Doc a -> Doc a nestle d = case d of BlankLines _ -> Empty NewLine -> Empty Concat (Concat x y) z -> nestle (Concat x (Concat y z)) Concat BlankLines{} x -> nestle x Concat NewLine x -> nestle x _ -> d -- | Chomps trailing blank space off of a 'Doc'. chomp :: Doc a -> Doc a chomp d = case d of BlankLines _ -> Empty NewLine -> Empty CarriageReturn -> Empty BreakingSpace -> Empty Prefixed s d' -> Prefixed s (chomp d') Concat (Concat x y) z -> chomp (Concat x (Concat y z)) Concat x y -> case chomp y of Empty -> chomp x z -> x <> z _ -> d type DocState a = State (RenderState a) () data RenderState a = RenderState{ output :: [a] -- ^ In reverse order , prefix :: Text , usePrefix :: Bool , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping , column :: Int , newlines :: Int -- ^ Number of preceding newlines } newline :: HasChars a => DocState a newline = do st' <- get let rawpref = prefix st' when (column st' == 0 && usePrefix st' && not (T.null rawpref)) $ do let pref = fromString $ T.unpack $ T.dropWhileEnd isSpace rawpref modify $ \st -> st{ output = pref : output st , column = column st + realLength pref } modify $ \st -> st { output = "\n" : output st , column = 0 , newlines = newlines st + 1 } outp :: HasChars a => Int -> a -> DocState a outp off s = do -- offset >= 0 (0 might be combining char) st' <- get let pref = fromString $ T.unpack $ prefix st' when (column st' == 0 && usePrefix st' && not (isNull pref)) $ modify $ \st -> st{ output = pref : output st , column = column st + realLength pref } modify $ \st -> st{ output = s : output st , column = column st + off , newlines = 0 } -- | Render a 'Doc'. @render (Just n)@ will use -- a line length of @n@ to reflow text on breakable spaces. -- @render Nothing@ will not reflow text. render :: HasChars a => Maybe Int -> Doc a -> a render linelen doc = mconcat . reverse . output $ execState (renderDoc doc) startingState where startingState = RenderState{ output = mempty , prefix = mempty , usePrefix = True , lineLength = linelen , column = 0 , newlines = 2 } renderDoc :: HasChars a => Doc a -> DocState a renderDoc = renderList . normalize . unfoldD normalize :: HasChars a => [Doc a] -> [Doc a] normalize [] = [] normalize (Concat{} : xs) = normalize xs -- should not happen after unfoldD normalize (Empty : xs) = normalize xs -- should not happen after unfoldD normalize [NewLine] = normalize [CarriageReturn] normalize [BlankLines _] = normalize [CarriageReturn] normalize [BreakingSpace] = [] normalize (BlankLines m : BlankLines n : xs) = normalize (BlankLines (max m n) : xs) normalize (BlankLines num : BreakingSpace : xs) = normalize (BlankLines num : xs) normalize (BlankLines m : CarriageReturn : xs) = normalize (BlankLines m : xs) normalize (BlankLines m : NewLine : xs) = normalize (BlankLines m : xs) normalize (NewLine : BlankLines m : xs) = normalize (BlankLines m : xs) normalize (NewLine : BreakingSpace : xs) = normalize (NewLine : xs) normalize (NewLine : CarriageReturn : xs) = normalize (NewLine : xs) normalize (CarriageReturn : CarriageReturn : xs) = normalize (CarriageReturn : xs) normalize (CarriageReturn : BlankLines m : xs) = normalize (BlankLines m : xs) normalize (CarriageReturn : BreakingSpace : xs) = normalize (CarriageReturn : xs) normalize (BreakingSpace : CarriageReturn : xs) = normalize (CarriageReturn:xs) normalize (BreakingSpace : NewLine : xs) = normalize (NewLine:xs) normalize (BreakingSpace : BlankLines n : xs) = normalize (BlankLines n:xs) normalize (BreakingSpace : BreakingSpace : xs) = normalize (BreakingSpace:xs) normalize (x:xs) = x : normalize xs mergeBlocks :: HasChars a => Int -> (Int, [a]) -> (Int, [a]) -> (Int, [a]) mergeBlocks h (w1,lns1) (w2,lns2) = (w, zipWith (\l1 l2 -> pad w1 l1 <> l2) lns1' lns2') where w = w1 + w2 len1 = length $ take h lns1 -- note lns1 might be infinite len2 = length $ take h lns2 lns1' = if len1 < h then lns1 ++ replicate (h - len1) mempty else take h lns1 lns2' = if len2 < h then lns2 ++ replicate (h - len2) mempty else take h lns2 pad n s = s <> replicateChar (n - realLength s) ' ' renderList :: HasChars a => [Doc a] -> DocState a renderList [] = return () renderList (Empty : xs) = renderList xs renderList (Text off s : xs) = do outp off s renderList xs renderList (Prefixed pref d : xs) = do st <- get let oldPref = prefix st put st{ prefix = prefix st <> pref } renderDoc d modify $ \s -> s{ prefix = oldPref } -- renderDoc CarriageReturn renderList xs renderList (Flush d : xs) = do st <- get let oldUsePrefix = usePrefix st put st{ usePrefix = False } renderDoc d modify $ \s -> s{ usePrefix = oldUsePrefix } renderList xs renderList (BeforeNonBlank d : xs) = case xs of (x:_) | startsBlank x -> renderList xs | otherwise -> renderDoc d >> renderList xs [] -> renderList xs renderList (BlankLines num : xs) = do st <- get case output st of _ | newlines st > num -> return () | otherwise -> replicateM_ (1 + num - newlines st) newline renderList xs renderList (CarriageReturn : xs) = do st <- get if newlines st > 0 then renderList xs else do newline renderList xs renderList (NewLine : xs) = do newline renderList xs renderList (BreakingSpace : xs) = do let isBreakingSpace BreakingSpace = True isBreakingSpace _ = False let xs' = dropWhile isBreakingSpace xs let next = takeWhile (not . isBreakable) xs' st <- get let off = foldl' (\tot t -> tot + offsetOf t) 0 next case lineLength st of Just l | column st + 1 + off > l -> newline _ -> when (column st > 0) $ outp 1 " " renderList xs' renderList (AfterBreak t : xs) = do st <- get if newlines st > 0 then renderList (fromString (T.unpack t) : xs) else renderList xs renderList (b : xs) | isBlock b = do let (bs, rest) = span isBlock xs -- ensure we have right padding unless end of line let heightOf (Block _ ls) = length ls heightOf _ = 1 let maxheight = maximum $ map heightOf (b:bs) let toBlockSpec (Block w ls) = (w, ls) toBlockSpec (VFill w t) = (w, take maxheight $ repeat t) toBlockSpec _ = (0, []) let (_, lns') = foldl (mergeBlocks maxheight) (toBlockSpec b) (map toBlockSpec bs) st <- get let oldPref = prefix st case column st - realLength oldPref of n | n > 0 -> modify $ \s -> s{ prefix = oldPref <> T.replicate n " " } _ -> return () renderList $ intersperse CarriageReturn (map literal lns') modify $ \s -> s{ prefix = oldPref } renderList rest renderList (x:_) = error $ "renderList encountered " ++ show x isBreakable :: HasChars a => Doc a -> Bool isBreakable BreakingSpace = True isBreakable CarriageReturn = True isBreakable NewLine = True isBreakable (BlankLines _) = True isBreakable (Concat Empty y) = isBreakable y isBreakable (Concat x _) = isBreakable x isBreakable _ = False startsBlank' :: HasChars a => a -> Bool startsBlank' t = fromMaybe False $ foldlChar go Nothing t where go Nothing c = Just (isSpace c) go (Just b) _ = Just b startsBlank :: HasChars a => Doc a -> Bool startsBlank (Text _ t) = startsBlank' t startsBlank (Block n ls) = n > 0 && all startsBlank' ls startsBlank (VFill n t) = n > 0 && startsBlank' t startsBlank (BeforeNonBlank x) = startsBlank x startsBlank (Prefixed _ x) = startsBlank x startsBlank (Flush x) = startsBlank x startsBlank BreakingSpace = True startsBlank (AfterBreak t) = startsBlank (Text 0 t) startsBlank CarriageReturn = True startsBlank NewLine = True startsBlank (BlankLines _) = True startsBlank (Concat Empty y) = startsBlank y startsBlank (Concat x _) = startsBlank x startsBlank Empty = True isBlock :: Doc a -> Bool isBlock Block{} = True isBlock VFill{} = True isBlock _ = False offsetOf :: Doc a -> Int offsetOf (Text o _) = o offsetOf (Block w _) = w offsetOf (VFill w _) = w offsetOf BreakingSpace = 1 offsetOf _ = 0 -- | Create a 'Doc' from a stringlike value. literal :: HasChars a => a -> Doc a literal x = mconcat $ intersperse NewLine $ map (\s -> if isNull s then Empty else let !len = realLength s in Text len s) $ splitLines x {-# NOINLINE literal #-} -- | A literal string. (Like 'literal', but restricted to String.) text :: HasChars a => String -> Doc a text = literal . fromString -- | A character. char :: HasChars a => Char -> Doc a char c = text $ fromString [c] -- | A breaking (reflowable) space. space :: Doc a space = BreakingSpace -- | A carriage return. Does nothing if we're at the beginning of -- a line; otherwise inserts a newline. cr :: Doc a cr = CarriageReturn -- | Inserts a blank line unless one exists already. -- (@blankline <> blankline@ has the same effect as @blankline@. blankline :: Doc a blankline = BlankLines 1 -- | Inserts blank lines unless they exist already. -- (@blanklines m <> blanklines n@ has the same effect as @blanklines (max m n)@. blanklines :: Int -> Doc a blanklines = BlankLines -- | Uses the specified string as a prefix for every line of -- the inside document (except the first, if not at the beginning -- of the line). prefixed :: IsString a => String -> Doc a -> Doc a prefixed pref doc | isEmpty doc = Empty | otherwise = Prefixed (fromString pref) doc -- | Makes a 'Doc' flush against the left margin. flush :: Doc a -> Doc a flush doc | isEmpty doc = Empty | otherwise = Flush doc -- | Indents a 'Doc' by the specified number of spaces. nest :: IsString a => Int -> Doc a -> Doc a nest ind = prefixed (replicate ind ' ') -- | A hanging indent. @hang ind start doc@ prints @start@, -- then @doc@, leaving an indent of @ind@ spaces on every -- line but the first. hang :: IsString a => Int -> Doc a -> Doc a -> Doc a hang ind start doc = start <> nest ind doc -- | @beforeNonBlank d@ conditionally includes @d@ unless it is -- followed by blank space. beforeNonBlank :: Doc a -> Doc a beforeNonBlank = BeforeNonBlank -- | Makes a 'Doc' non-reflowable. nowrap :: IsString a => Doc a -> Doc a nowrap = mconcat . map replaceSpace . unfoldD where replaceSpace BreakingSpace = Text 1 $ fromString " " replaceSpace x = x -- | Content to print only if it comes at the beginning of a line, -- to be used e.g. for escaping line-initial `.` in roff man. afterBreak :: Text -> Doc a afterBreak = AfterBreak -- | Returns the width of a 'Doc'. offset :: (IsString a, HasChars a) => Doc a -> Int offset = uncurry max . getOffset (const False) (0, 0) -- | Returns the minimal width of a 'Doc' when reflowed at breakable spaces. minOffset :: HasChars a => Doc a -> Int minOffset = uncurry max . getOffset (> 0) (0,0) -- l = longest, c = current getOffset :: (IsString a, HasChars a) => (Int -> Bool) -> (Int, Int) -> Doc a -> (Int, Int) getOffset breakWhen (!l, !c) x = case x of Text n _ -> (l, c + n) Block n _ -> (l, c + n) VFill n _ -> (l, c + n) Empty -> (l, c) CarriageReturn -> (max l c, 0) NewLine -> (max l c, 0) BlankLines _ -> (max l c, 0) Prefixed t d -> let (l',c') = getOffset breakWhen (0, 0) d in (max l (l' + realLength t), c' + realLength t) BeforeNonBlank _ -> (l, c) Flush d -> getOffset breakWhen (l, c) d BreakingSpace | breakWhen c -> (max l c, 0) | otherwise -> (l, c + 1) AfterBreak t -> if c == 0 then (l, c + realLength t) else (l, c) Concat (Concat d y) z -> getOffset breakWhen (l, c) (Concat d (Concat y z)) Concat (BeforeNonBlank d) y -> if isNonBlank y then getOffset breakWhen (l, c) (Concat d y) else getOffset breakWhen (l, c) y Concat d y -> let (l', c') = getOffset breakWhen (l, c) d in getOffset breakWhen (l', c') y isNonBlank :: Doc a -> Bool isNonBlank (Text _ _) = True isNonBlank (BeforeNonBlank d) = isNonBlank d isNonBlank (Flush d) = isNonBlank d isNonBlank (Concat d _) = isNonBlank d isNonBlank _ = False -- | Returns the column that would be occupied by the last -- laid out character (assuming no wrapping). updateColumn :: HasChars a => Doc a -> Int -> Int updateColumn d k = snd . getOffset (const False) (0,k) $ d -- | @lblock n d@ is a block of width @n@ characters, with -- text derived from @d@ and aligned to the left. lblock :: HasChars a => Int -> Doc a -> Doc a lblock = block id -- | Like 'lblock' but aligned to the right. rblock :: HasChars a => Int -> Doc a -> Doc a rblock w = block (\s -> replicateChar (w - realLength s) ' ' <> s) w -- | Like 'lblock' but aligned centered. cblock :: HasChars a => Int -> Doc a -> Doc a cblock w = block (\s -> replicateChar ((w - realLength s) `div` 2) ' ' <> s) w -- | Returns the height of a block or other 'Doc'. height :: HasChars a => Doc a -> Int height = length . splitLines . render Nothing block :: HasChars a => (a -> a) -> Int -> Doc a -> Doc a block filler width d | width < 1 && not (isEmpty d) = block filler 1 d | otherwise = Block width ls where ls = map filler $ chop width $ render (Just width) d -- | An expandable border that, when placed next to a box, -- expands to the height of the box. Strings cycle through the -- list provided. vfill :: HasChars a => a -> Doc a vfill t = VFill (realLength t) t chop :: HasChars a => Int -> a -> [a] chop n = concatMap chopLine . removeFinalEmpty . map addRealLength . splitLines where removeFinalEmpty xs = case lastMay xs of Just (0, _) -> initSafe xs _ -> xs addRealLength l = (realLength l, l) chopLine (len, l) | len <= n = [l] | otherwise = map snd $ foldrChar (\c ls -> let clen = charWidth c cs = replicateChar 1 c in case ls of (len', l'):rest | len' + clen > n -> (clen, cs):(len', l'):rest | otherwise -> (len' + clen, cs <> l'):rest [] -> [(clen, cs)]) [] l -- | Encloses a 'Doc' inside a start and end 'Doc'. inside :: Doc a -> Doc a -> Doc a -> Doc a inside start end contents = start <> contents <> end -- | Puts a 'Doc' in curly braces. braces :: HasChars a => Doc a -> Doc a braces = inside (char '{') (char '}') -- | Puts a 'Doc' in square brackets. brackets :: HasChars a => Doc a -> Doc a brackets = inside (char '[') (char ']') -- | Puts a 'Doc' in parentheses. parens :: HasChars a => Doc a -> Doc a parens = inside (char '(') (char ')') -- | Wraps a 'Doc' in single quotes. quotes :: HasChars a => Doc a -> Doc a quotes = inside (char '\'') (char '\'') -- | Wraps a 'Doc' in double quotes. doubleQuotes :: HasChars a => Doc a -> Doc a doubleQuotes = inside (char '"') (char '"') -- | Returns width of a character in a monospace font: 0 for a combining -- character, 1 for a regular character, 2 for an East Asian wide character. -- Ambiguous characters are treated as width 1. charWidth :: Char -> Int charWidth = extractLength . updateMatchStateNarrow (MatchState False 0 ' ' 0) -- | Get real length of string, taking into account combining and double-wide -- characters. Ambiguous characters are treated as width 1. realLength :: HasChars a => a -> Int realLength = realLengthNarrowContext -- | Get the real length of a string, taking into account combining and -- double-wide characters. Ambiguous characters are treated as width 1. realLengthNarrowContext :: HasChars a => a -> Int realLengthNarrowContext = realLengthWith updateMatchStateNarrow -- | Get the real length of a string, taking into account combining and -- double-wide characters. Ambiguous characters are treated as width 2. realLengthWideContext :: HasChars a => a -> Int realLengthWideContext = realLengthWith updateMatchStateWide -- | Like 'realLengthNarrowContext', but avoids optimizations (shortcuts). -- This is exposed for testing, to ensure that the optimizations are safe. realLengthNarrowContextNoShortcut :: HasChars a => a -> Int realLengthNarrowContextNoShortcut = realLengthWith updateMatchStateNoShortcut -- | Like 'realLengthWideContext', but avoids optimizations (shortcuts). -- This is exposed for testing, to ensure that the optimizations are safe. realLengthWideContextNoShortcut :: HasChars a => a -> Int realLengthWideContextNoShortcut = realLengthWith updateMatchStateNoShortcutWide -- | Get real length of string, taking into account combining and double-wide -- characters, using the given accumulator. This is exposed for testing. realLengthWith :: HasChars a => (MatchState -> Char -> MatchState) -> a -> Int realLengthWith f = extractLength . foldlChar f (MatchState True 0 ' ' 0) -- | Update a 'MatchState' by processing a character. -- For efficiency, we isolate commonly used portions of the basic -- multilingual plane that do not have emoji in them. -- This works in a narrow context. updateMatchStateNarrow :: MatchState -> Char -> MatchState updateMatchStateNarrow (MatchState firstChar tot _ tentative) !c -- Control characters have width 0: friends don't let friends use tabs | c <= '\x001F' = controlState -- ASCII | c <= '\x007E' = narrowState -- More control characters | c <= '\x009F' = controlState -- Extended Latin: Latin 1-supplement, Extended-A, Extended-B, IPA Extensions. -- This block is full of ambiguous characters, so these shortcuts will not -- work in a wide context. | c == '\x00AD' = controlState -- Soft hyphen | c <= '\x02FF' = narrowState -- Combining diacritical marks used in Latin and other scripts | c <= '\x036F' = combiningState -- Han ideographs | c >= '\x3250' && c <= '\xA4CF' = if | c <= '\x4DBF' -> wideState -- Han ideographs | c <= '\x4DFF' -> narrowState -- Hexagrams | otherwise -> wideState -- More Han ideographs -- Arabic | c >= '\x0600' && c <= '\x06FF' = if | c <= '\x0605' -> controlState -- Number marks | c <= '\x060F' -> narrowState -- Punctuation and marks | c <= '\x061A' -> combiningState -- Combining marks | c == '\x061B' -> narrowState -- Arabic semicolon | c <= '\x061C' -> controlState -- Letter mark | c <= '\x064A' -> narrowState -- Main Arabic abjad | c <= '\x065F' -> combiningState -- Arabic vowel markers | c == '\x0670' -> combiningState -- Superscript alef | c <= '\x06D5' -> narrowState -- Arabic digits and letters used in other languages | c <= '\x06DC' -> combiningState -- Small high ligatures | c == '\x06DD' -> controlState -- End of ayah | c == '\x06DE' -> narrowState -- Start of rub el hizb | c <= '\x06E4' -> combiningState -- More small high ligatures | c <= '\x06E6' -> narrowState -- Small vowels | c == '\x06E9' -> narrowState -- Place of sajdah | c <= '\x06ED' -> combiningState -- More combining | otherwise -> narrowState -- All the rest -- Devanagari | c >= '\x0900' && c <= '\x097F' = if | c <= '\x0902' -> combiningState -- Combining characters | c <= '\x0939' -> narrowState -- Main Devanagari abugida | c == '\x093A' -> combiningState | c == '\x093C' -> combiningState | c <= '\x0940' -> narrowState -- Main Devanagari abugida | c <= '\x0948' -> combiningState -- Combining characters | c == '\x094D' -> combiningState -- Combining characters | c <= '\x0950' -> narrowState -- Devanagari om | c <= '\x0957' -> combiningState -- Combining characters | c == '\x0962' -> combiningState -- Combining character | c == '\x0963' -> combiningState -- Combining character | otherwise -> narrowState -- Devanagari digits and up to beginning of Bengali block -- Bengali (plus a couple Gurmukhi characters) | c >= '\x0980' && c <= '\x0A02' = if | c == '\x0981' -> combiningState -- Combining signs | c == '\x09BC' -> combiningState -- Combining signs | c <= '\x09C0' -> narrowState -- Main Bengali abugida | c <= '\x09C4' -> combiningState -- Combining signs | c == '\x09CD' -> combiningState -- Combining signs | c <= '\x09E1' -> narrowState -- Bengali | c <= '\x09E3' -> combiningState -- Combining marks | c == '\x09E2' -> combiningState -- Bengali vocalic vowel signs | c == '\x09E3' -> combiningState -- Bengali vocalic vowel signs | c <= '\x09FD' -> narrowState -- Bengali digits and other symbols | otherwise -> combiningState -- Bengali sandhi mark, plus a few symbols from Gurmukhi -- Cyrillic (plus Greek and Armenian for free) -- This block has many ambiguous characters, and so cannot be used in wide contexts | c >= '\x0370' && c <= '\x058F' = if | c <= '\x0482' -> narrowState -- Main Greek and Cyrillic block | c <= '\x0489' -> combiningState -- Cyrillic combining characters | otherwise -> narrowState -- Extra Cyrillic characters used in Ukrainian and others, plus Armenian -- Japanese | c >= '\x2E80' && c <= '\x324F' = if | c <= '\x3029' -> wideState -- Punctuation and others | c <= '\x302D' -> combiningState -- Tone marks | c == '\x303F' -> narrowState -- Half-fill space | c <= '\x3096' -> wideState -- Hiragana and others | c <= '\x309A' -> combiningState -- Hiragana voiced marks | c <= '\x3247' -> wideState -- Katakana plus compatibility Jamo for Korean | otherwise -> ambiguousState -- Circled numbers -- Korean | c >= '\xAC00' && c <= '\xD7A3' = wideState -- Precomposed Hangul -- Telugu (plus one character of Kannada) | c >= '\x0C00' && c <= '\x0C80' = if | c == '\x0C00' -> combiningState -- Combining characters | c == '\x0C04' -> combiningState -- Combining characters | c <= '\x0C39' -> narrowState -- Main Telugu abugida | c == '\x0C3D' -> narrowState -- Telugu avagraha | c <= '\x0C40' -> combiningState -- Vowel markers | c <= '\x0C44' -> narrowState -- Vowel markers | c <= '\x0C56' -> combiningState -- Vowel markers | c == '\x0C62' -> combiningState -- Combining character | c == '\x0C63' -> combiningState -- Combining character | otherwise -> narrowState -- Telugu digits -- Tamil | c >= '\x0B80' && c <= '\x0BFF' = if | c <= '\x0B82' -> combiningState -- Combining characters | c == '\x0BC0' -> combiningState -- Combining characters | c == '\x0BCD' -> combiningState -- Vowel markers | c <= '\x0BCC' -> narrowState -- Main Tamil abugida | otherwise -> narrowState -- Tamil digits and others where narrowState = MatchState False (tot + tentative) c 1 wideState = MatchState False (tot + tentative) c 2 combiningState = let w = if firstChar then 1 else 0 in MatchState False (tot + tentative) c w controlState = MatchState False (tot + tentative) c 0 ambiguousState = MatchState False (tot + tentative) c 1 updateMatchStateNarrow s c = updateMatchStateNoShortcut s c -- | Update a 'MatchState' by processing a character. -- For efficiency, we isolate commonly used portions of the basic -- multilingual plane that do not have emoji in them. -- This works in a wide context. updateMatchStateWide :: MatchState -> Char -> MatchState updateMatchStateWide (MatchState firstChar tot _ tentative) !c -- Control characters have width 0: friends don't let friends use tabs | c <= '\x001F' = controlState -- ASCII | c <= '\x007E' = narrowState -- Han ideographs | c >= '\x3250' && c <= '\xA4CF' = if | c <= '\x4DBF' -> wideState -- Han ideographs | c <= '\x4DFF' -> narrowState -- Hexagrams | otherwise -> wideState -- More Han ideographs -- Japanese | c >= '\x2E80' && c <= '\x324F' = if | c <= '\x3029' -> wideState -- Punctuation and others | c <= '\x302D' -> combiningState -- Tone marks | c == '\x303F' -> narrowState -- Half-fill space | c <= '\x3096' -> wideState -- Hiragana and others | c <= '\x309A' -> combiningState -- Hiragana voiced marks | c <= '\x3247' -> wideState -- Katakana plus compatibility Jamo for Korean | otherwise -> ambiguousState -- Circled numbers -- Korean | c >= '\xAC00' && c <= '\xD7A3' = wideState -- Precomposed Hangul -- Combining diacritical marks used in Latin and other scripts | c >= '\x0300' && c <= '\x036F' = combiningState -- Arabic | c >= '\x0600' && c <= '\x06FF' = if | c <= '\x0605' -> controlState -- Number marks | c <= '\x060F' -> narrowState -- Punctuation and marks | c <= '\x061A' -> combiningState -- Combining marks | c == '\x061B' -> narrowState -- Arabic semicolon | c <= '\x061C' -> controlState -- Letter mark | c <= '\x064A' -> narrowState -- Main Arabic abjad | c <= '\x065F' -> combiningState -- Arabic vowel markers | c == '\x0670' -> combiningState -- Superscript alef | c <= '\x06D5' -> narrowState -- Arabic digits and letters used in other languages | c <= '\x06DC' -> combiningState -- Small high ligatures | c == '\x06DD' -> controlState -- End of ayah | c == '\x06DE' -> narrowState -- Start of rub el hizb | c <= '\x06E4' -> combiningState -- More small high ligatures | c <= '\x06E6' -> narrowState -- Small vowels | c == '\x06E9' -> narrowState -- Place of sajdah | c <= '\x06ED' -> combiningState -- More combining | otherwise -> narrowState -- All the rest -- Devanagari | c >= '\x0900' && c <= '\x097F' = if | c <= '\x0902' -> combiningState -- Combining characters | c <= '\x0939' -> narrowState -- Main Devanagari abugida | c == '\x093A' -> combiningState | c == '\x093C' -> combiningState | c <= '\x0940' -> narrowState -- Main Devanagari abugida | c <= '\x0948' -> combiningState -- Combining characters | c == '\x094D' -> combiningState -- Combining characters | c <= '\x0950' -> narrowState -- Devanagari om | c <= '\x0957' -> combiningState -- Combining characters | c == '\x0962' -> combiningState -- Combining character | c == '\x0963' -> combiningState -- Combining character | otherwise -> narrowState -- Devanagari digits and up to beginning of Bengali block -- Bengali (plus a couple Gurmukhi characters) | c >= '\x0980' && c <= '\x0A02' = if | c == '\x0981' -> combiningState -- Combining signs | c == '\x09BC' -> combiningState -- Combining signs | c <= '\x09C0' -> narrowState -- Main Bengali abugida | c <= '\x09C4' -> combiningState -- Combining signs | c == '\x09CD' -> combiningState -- Combining signs | c <= '\x09E1' -> narrowState -- Bengali | c <= '\x09E3' -> combiningState -- Combining marks | c == '\x09E2' -> combiningState -- Bengali vocalic vowel signs | c == '\x09E3' -> combiningState -- Bengali vocalic vowel signs | c <= '\x09FD' -> narrowState -- Bengali digits and other symbols | otherwise -> combiningState -- Bengali sandhi mark, plus a few symbols from Gurmukhi -- Telugu (plus one character of Kannada) | c >= '\x0C00' && c <= '\x0C80' = if | c == '\x0C00' -> combiningState -- Combining characters | c == '\x0C04' -> combiningState -- Combining characters | c <= '\x0C39' -> narrowState -- Main Telugu abugida | c == '\x0C3D' -> narrowState -- Telugu avagraha | c <= '\x0C40' -> combiningState -- Vowel markers | c <= '\x0C44' -> narrowState -- Vowel markers | c <= '\x0C56' -> combiningState -- Vowel markers | c == '\x0C62' -> combiningState -- Combining character | c == '\x0C63' -> combiningState -- Combining character | otherwise -> narrowState -- Telugu digits -- Tamil | c >= '\x0B80' && c <= '\x0BFF' = if | c <= '\x0B82' -> combiningState -- Combining characters | c == '\x0BC0' -> combiningState -- Combining characters | c == '\x0BCD' -> combiningState -- Vowel markers | c <= '\x0BCC' -> narrowState -- Main Tamil abugida | otherwise -> narrowState -- Tamil digits and others where narrowState = MatchState False (tot + tentative) c 1 wideState = MatchState False (tot + tentative) c 2 combiningState = let w = if firstChar then 1 else 0 in MatchState False (tot + tentative) c w controlState = MatchState False (tot + tentative) c 0 ambiguousState = MatchState False (tot + tentative) c 2 updateMatchStateWide s c = updateMatchStateNoShortcutWide s c -- | Update a 'MatchState' by processing a character, without taking any -- shortcuts. This should give the same answer as 'updateMatchStateNarrow', but will -- be slower. It is here to test that the shortcuts are implemented correctly. updateMatchStateNoShortcut :: MatchState -> Char -> MatchState updateMatchStateNoShortcut match c = resolveWidth match c $ unicodeWidth (unicodeRangeMap Narrow) c -- | Update a 'MatchState' by processing a character, without taking any -- shortcuts. This should give the same answer as 'updateMatchStateWide', but will -- be slower. It is here to test that the shortcuts are implemented correctly. updateMatchStateNoShortcutWide :: MatchState -> Char -> MatchState updateMatchStateNoShortcutWide match c = resolveWidth match c $ unicodeWidth (unicodeRangeMap Wide) c -- | Update a match state given a character and its class resolveWidth :: MatchState -> Char -> UnicodeWidth -> MatchState resolveWidth (MatchState firstChar tot lastChar tentative) !c = \case Narrow -> narrowState Wide -> wideState Combining -> combiningState Control -> controlState Ambiguous -> ambiguousState -- Zero width joiners will join two emoji together, so let's discard the -- state and parse the next emoji ZWJ | isLastCharEmojiLike -> MatchState False (tot - 2) c 2 ZWJ -> controlState -- Variation modifiers modify the emoji up to this point, so can be -- discarded. However, they always make it width 2, so we set the tentative -- width to 2. EmojiPresentationMod | Just (EmojiInfo True _) <- lastCharEmoji -> MatchState False tot c 2 EmojiPresentationMod -> controlState -- Skin tone modifiers make it width 2, but if they are not in a valid -- position they end the emoji and take up another width 2. EmojiSkinToneMod | Just (EmojiInfo _ True) <- lastCharEmoji -> MatchState False tot c 2 EmojiSkinToneMod -> wideState where narrowState = MatchState False (tot + tentative) c 1 wideState = MatchState False (tot + tentative) c 2 combiningState = let w = if firstChar then 1 else 0 in MatchState False (tot + tentative) c w controlState = MatchState False (tot + tentative) c 0 ambiguousState = MatchState False (tot + tentative) c 1 -- Should be handled already, but treat it as 1 lastCharEmoji = IM.lookup (ord lastChar) emojiMap isLastCharEmojiLike = isJust lastCharEmoji || lastChar == '\xFE0F' || isSkinToneModifier lastChar -- | Keeps track of state in length calculations, determining whether we're at -- the first character, the width so far, possibly a tentative width for this -- group, and the Map for possible emoji continuations. data MatchState = MatchState { matchIsFirst :: !Bool , matchTotal :: !Int , matchLastChar :: !Char , matchTentative :: !Int } deriving (Show) -- | Get the final width from a 'MatchState'. extractLength :: MatchState -> Int extractLength (MatchState _ tot _ tentative) = tot + tentative -- | The unicode width of a given character. data UnicodeWidth = Narrow | Wide | Combining | Control | Ambiguous | ZWJ | EmojiPresentationMod | EmojiSkinToneMod deriving (Show, Eq) -- | Checks whether a character is a skin tone modifier. isSkinToneModifier :: Char -> Bool isSkinToneModifier c = c >= '\x1F3FB' && c <= '\x1F3FF' -- | Checks whether a character is an emoji variation modifier. isEmojiVariation :: Char -> Bool isEmojiVariation c = c >= '\xFE0E' && c <= '\xFE0F' -- | Checks whether a character is a zero-width joiner. isZWJ :: Char -> Bool isZWJ c = c == '\x200D' data EmojiInfo = EmojiInfo { acceptsVariation :: !Bool , acceptsSkinTones :: !Bool } deriving (Eq, Show) instance Semigroup EmojiInfo where EmojiInfo v1 s1 <> EmojiInfo v2 s2 = EmojiInfo (v1 || v2) (s1 || s2) -- | Check a character to see how it modifies emoji. variationState :: Char -> EmojiInfo variationState y = EmojiInfo (isEmojiVariation y) (isSkinToneModifier y) -- | A map of all emoji start characters and the modifiers they take. emojiMap :: IM.IntMap EmojiInfo emojiMap = foldl' (flip addEmoji) mempty $ mapMaybe T.uncons baseEmojis where addEmoji (x, xs) = IM.insertWith (<>) (ord x) (emojiInfo xs) emojiInfo = maybe (EmojiInfo False False) (variationState . fst) . T.uncons -- | Denotes the contiguous ranges of Unicode characters which have a given -- width: 1 for a regular character, 2 for an East Asian wide character. -- Ambiguous characters are resolved in the specified way. unicodeRangeMap :: UnicodeWidth -> UnicodeMap unicodeRangeMap ambiguous = repack . addEmojiClasses . M.fromList . mergeRanges $ map (second resolve) unicodeSpec where resolve Ambiguous = ambiguous resolve x = x -- | Add zero-width joiner and emoji modifiers to a Map. addEmojiClasses :: M.Map Char UnicodeWidth -> M.Map Char UnicodeWidth addEmojiClasses = addAndRestoreBoundary '\x200D' '\x200D' ZWJ . addAndRestoreBoundary '\xFE0F' '\xFE0F' EmojiPresentationMod . addAndRestoreBoundary '\x1F3FB' '\x1F3FF' EmojiSkinToneMod where addAndRestoreBoundary k1 k2 v m = insertAfter $ M.insert k1 v m where insertAfter = case M.lookupLE k1 m of Just (_, prev) -> M.insertWith (\_ old -> old) (succ k2) prev Nothing -> id -- | Collapse unicode character ranges if the general category doesn't make a -- difference for width. mergeRanges :: Eq b => [(a, b)] -> [(a, b)] mergeRanges [] = [] mergeRanges [x] = [x] mergeRanges (x@(_,xw):y@(_,yw):xs) | xw == yw = mergeRanges (x:xs) | otherwise = x : mergeRanges (y:xs) data UnicodeMap = Bin {-# UNPACK #-} !Char !UnicodeWidth !UnicodeMap !UnicodeMap | Tip -- | Find the width of a unicode character unicodeWidth :: UnicodeMap -> Char -> UnicodeWidth unicodeWidth = goNothing where goNothing Tip !_ = Control goNothing (Bin kx x l r) k = case compare k kx of LT -> goNothing l k EQ -> x GT -> goJust r k kx x goJust Tip !_ !_ x' = x' goJust (Bin kx x l r) k kx' x' = case compare k kx of LT -> goJust l k kx' x' EQ -> x GT -> goJust r k kx x {-# INLINABLE unicodeWidth #-} -- | Convert a Map to a UnicodeMap for faster code. repack :: M.Map Char UnicodeWidth -> UnicodeMap repack MInt.Tip = Tip repack (MInt.Bin _ k v l r) = Bin k v (repack l) (repack r) -- | A list of Unicode ranges and the width assigned to them unicodeSpec :: [(Char, UnicodeWidth)] #include "unicodeWidth.inc" doclayout-0.4.0.1/src/Text/unicodeWidth.inc0000644000000000000000000007171007346545000016710 0ustar0000000000000000unicodeSpec = [ ( '\NUL' , Control ) , ( ' ' , Narrow ) , ( '\DEL' , Control ) , ( '\160' , Narrow ) , ( '\161' , Ambiguous ) , ( '\162' , Narrow ) , ( '\164' , Ambiguous ) , ( '\165' , Narrow ) , ( '\167' , Ambiguous ) , ( '\169' , Narrow ) , ( '\170' , Ambiguous ) , ( '\171' , Narrow ) , ( '\173' , Control ) , ( '\174' , Ambiguous ) , ( '\175' , Narrow ) , ( '\176' , Ambiguous ) , ( '\181' , Narrow ) , ( '\182' , Ambiguous ) , ( '\187' , Narrow ) , ( '\188' , Ambiguous ) , ( '\192' , Narrow ) , ( '\198' , Ambiguous ) , ( '\199' , Narrow ) , ( '\208' , Ambiguous ) , ( '\209' , Narrow ) , ( '\215' , Ambiguous ) , ( '\217' , Narrow ) , ( '\222' , Ambiguous ) , ( '\226' , Narrow ) , ( '\230' , Ambiguous ) , ( '\231' , Narrow ) , ( '\232' , Ambiguous ) , ( '\235' , Narrow ) , ( '\236' , Ambiguous ) , ( '\238' , Narrow ) , ( '\240' , Ambiguous ) , ( '\241' , Narrow ) , ( '\242' , Ambiguous ) , ( '\244' , Narrow ) , ( '\247' , Ambiguous ) , ( '\251' , Narrow ) , ( '\252' , Ambiguous ) , ( '\253' , Narrow ) , ( '\254' , Ambiguous ) , ( '\255' , Narrow ) , ( '\257' , Ambiguous ) , ( '\258' , Narrow ) , ( '\273' , Ambiguous ) , ( '\274' , Narrow ) , ( '\275' , Ambiguous ) , ( '\276' , Narrow ) , ( '\283' , Ambiguous ) , ( '\284' , Narrow ) , ( '\294' , Ambiguous ) , ( '\296' , Narrow ) , ( '\299' , Ambiguous ) , ( '\300' , Narrow ) , ( '\305' , Ambiguous ) , ( '\308' , Narrow ) , ( '\312' , Ambiguous ) , ( '\313' , Narrow ) , ( '\319' , Ambiguous ) , ( '\323' , Narrow ) , ( '\324' , Ambiguous ) , ( '\325' , Narrow ) , ( '\328' , Ambiguous ) , ( '\332' , Narrow ) , ( '\333' , Ambiguous ) , ( '\334' , Narrow ) , ( '\338' , Ambiguous ) , ( '\340' , Narrow ) , ( '\358' , Ambiguous ) , ( '\360' , Narrow ) , ( '\363' , Ambiguous ) , ( '\364' , Narrow ) , ( '\462' , Ambiguous ) , ( '\463' , Narrow ) , ( '\464' , Ambiguous ) , ( '\465' , Narrow ) , ( '\466' , Ambiguous ) , ( '\467' , Narrow ) , ( '\468' , Ambiguous ) , ( '\469' , Narrow ) , ( '\470' , Ambiguous ) , ( '\471' , Narrow ) , ( '\472' , Ambiguous ) , ( '\473' , Narrow ) , ( '\474' , Ambiguous ) , ( '\475' , Narrow ) , ( '\476' , Ambiguous ) , ( '\477' , Narrow ) , ( '\593' , Ambiguous ) , ( '\594' , Narrow ) , ( '\609' , Ambiguous ) , ( '\610' , Narrow ) , ( '\708' , Ambiguous ) , ( '\709' , Narrow ) , ( '\711' , Ambiguous ) , ( '\712' , Narrow ) , ( '\713' , Ambiguous ) , ( '\716' , Narrow ) , ( '\717' , Ambiguous ) , ( '\718' , Narrow ) , ( '\720' , Ambiguous ) , ( '\721' , Narrow ) , ( '\728' , Ambiguous ) , ( '\732' , Narrow ) , ( '\733' , Ambiguous ) , ( '\734' , Narrow ) , ( '\735' , Ambiguous ) , ( '\736' , Narrow ) , ( '\768' , Combining ) , ( '\880' , Narrow ) , ( '\913' , Ambiguous ) , ( '\938' , Narrow ) , ( '\945' , Ambiguous ) , ( '\962' , Narrow ) , ( '\963' , Ambiguous ) , ( '\970' , Narrow ) , ( '\1025' , Ambiguous ) , ( '\1026' , Narrow ) , ( '\1040' , Ambiguous ) , ( '\1104' , Narrow ) , ( '\1105' , Ambiguous ) , ( '\1106' , Narrow ) , ( '\1155' , Combining ) , ( '\1162' , Narrow ) , ( '\1425' , Combining ) , ( '\1470' , Narrow ) , ( '\1471' , Combining ) , ( '\1472' , Narrow ) , ( '\1473' , Combining ) , ( '\1475' , Narrow ) , ( '\1476' , Combining ) , ( '\1478' , Narrow ) , ( '\1479' , Combining ) , ( '\1488' , Narrow ) , ( '\1536' , Control ) , ( '\1542' , Narrow ) , ( '\1552' , Combining ) , ( '\1563' , Narrow ) , ( '\1564' , Control ) , ( '\1565' , Narrow ) , ( '\1611' , Combining ) , ( '\1632' , Narrow ) , ( '\1648' , Combining ) , ( '\1649' , Narrow ) , ( '\1750' , Combining ) , ( '\1757' , Control ) , ( '\1758' , Narrow ) , ( '\1759' , Combining ) , ( '\1765' , Narrow ) , ( '\1767' , Combining ) , ( '\1769' , Narrow ) , ( '\1770' , Combining ) , ( '\1774' , Narrow ) , ( '\1807' , Control ) , ( '\1808' , Narrow ) , ( '\1809' , Combining ) , ( '\1810' , Narrow ) , ( '\1840' , Combining ) , ( '\1869' , Narrow ) , ( '\1958' , Combining ) , ( '\1969' , Narrow ) , ( '\2027' , Combining ) , ( '\2036' , Narrow ) , ( '\2045' , Combining ) , ( '\2046' , Narrow ) , ( '\2070' , Combining ) , ( '\2074' , Narrow ) , ( '\2075' , Combining ) , ( '\2084' , Narrow ) , ( '\2085' , Combining ) , ( '\2088' , Narrow ) , ( '\2089' , Combining ) , ( '\2096' , Narrow ) , ( '\2137' , Combining ) , ( '\2142' , Narrow ) , ( '\2192' , Control ) , ( '\2200' , Combining ) , ( '\2208' , Narrow ) , ( '\2250' , Combining ) , ( '\2274' , Control ) , ( '\2275' , Combining ) , ( '\2307' , Narrow ) , ( '\2362' , Combining ) , ( '\2363' , Narrow ) , ( '\2364' , Combining ) , ( '\2365' , Narrow ) , ( '\2369' , Combining ) , ( '\2377' , Narrow ) , ( '\2381' , Combining ) , ( '\2382' , Narrow ) , ( '\2385' , Combining ) , ( '\2392' , Narrow ) , ( '\2402' , Combining ) , ( '\2404' , Narrow ) , ( '\2433' , Combining ) , ( '\2434' , Narrow ) , ( '\2492' , Combining ) , ( '\2493' , Narrow ) , ( '\2497' , Combining ) , ( '\2503' , Narrow ) , ( '\2509' , Combining ) , ( '\2510' , Narrow ) , ( '\2530' , Combining ) , ( '\2534' , Narrow ) , ( '\2558' , Combining ) , ( '\2563' , Narrow ) , ( '\2620' , Combining ) , ( '\2622' , Narrow ) , ( '\2625' , Combining ) , ( '\2649' , Narrow ) , ( '\2672' , Combining ) , ( '\2674' , Narrow ) , ( '\2677' , Combining ) , ( '\2678' , Narrow ) , ( '\2689' , Combining ) , ( '\2691' , Narrow ) , ( '\2748' , Combining ) , ( '\2749' , Narrow ) , ( '\2753' , Combining ) , ( '\2761' , Narrow ) , ( '\2765' , Combining ) , ( '\2768' , Narrow ) , ( '\2786' , Combining ) , ( '\2790' , Narrow ) , ( '\2810' , Combining ) , ( '\2818' , Narrow ) , ( '\2876' , Combining ) , ( '\2877' , Narrow ) , ( '\2879' , Combining ) , ( '\2880' , Narrow ) , ( '\2881' , Combining ) , ( '\2887' , Narrow ) , ( '\2893' , Combining ) , ( '\2903' , Narrow ) , ( '\2914' , Combining ) , ( '\2918' , Narrow ) , ( '\2946' , Combining ) , ( '\2947' , Narrow ) , ( '\3008' , Combining ) , ( '\3009' , Narrow ) , ( '\3021' , Combining ) , ( '\3024' , Narrow ) , ( '\3072' , Combining ) , ( '\3073' , Narrow ) , ( '\3076' , Combining ) , ( '\3077' , Narrow ) , ( '\3132' , Combining ) , ( '\3133' , Narrow ) , ( '\3134' , Combining ) , ( '\3137' , Narrow ) , ( '\3142' , Combining ) , ( '\3160' , Narrow ) , ( '\3170' , Combining ) , ( '\3174' , Narrow ) , ( '\3201' , Combining ) , ( '\3202' , Narrow ) , ( '\3260' , Combining ) , ( '\3261' , Narrow ) , ( '\3263' , Combining ) , ( '\3264' , Narrow ) , ( '\3270' , Combining ) , ( '\3271' , Narrow ) , ( '\3276' , Combining ) , ( '\3285' , Narrow ) , ( '\3298' , Combining ) , ( '\3302' , Narrow ) , ( '\3328' , Combining ) , ( '\3330' , Narrow ) , ( '\3387' , Combining ) , ( '\3389' , Narrow ) , ( '\3393' , Combining ) , ( '\3398' , Narrow ) , ( '\3405' , Combining ) , ( '\3406' , Narrow ) , ( '\3426' , Combining ) , ( '\3430' , Narrow ) , ( '\3457' , Combining ) , ( '\3458' , Narrow ) , ( '\3530' , Combining ) , ( '\3535' , Narrow ) , ( '\3538' , Combining ) , ( '\3544' , Narrow ) , ( '\3633' , Combining ) , ( '\3634' , Narrow ) , ( '\3636' , Combining ) , ( '\3647' , Narrow ) , ( '\3655' , Combining ) , ( '\3663' , Narrow ) , ( '\3761' , Combining ) , ( '\3762' , Narrow ) , ( '\3764' , Combining ) , ( '\3773' , Narrow ) , ( '\3784' , Combining ) , ( '\3792' , Narrow ) , ( '\3864' , Combining ) , ( '\3866' , Narrow ) , ( '\3893' , Combining ) , ( '\3894' , Narrow ) , ( '\3895' , Combining ) , ( '\3896' , Narrow ) , ( '\3897' , Combining ) , ( '\3898' , Narrow ) , ( '\3953' , Combining ) , ( '\3967' , Narrow ) , ( '\3968' , Combining ) , ( '\3973' , Narrow ) , ( '\3974' , Combining ) , ( '\3976' , Narrow ) , ( '\3981' , Combining ) , ( '\4030' , Narrow ) , ( '\4038' , Combining ) , ( '\4039' , Narrow ) , ( '\4141' , Combining ) , ( '\4145' , Narrow ) , ( '\4146' , Combining ) , ( '\4152' , Narrow ) , ( '\4153' , Combining ) , ( '\4155' , Narrow ) , ( '\4157' , Combining ) , ( '\4159' , Narrow ) , ( '\4184' , Combining ) , ( '\4186' , Narrow ) , ( '\4190' , Combining ) , ( '\4193' , Narrow ) , ( '\4209' , Combining ) , ( '\4213' , Narrow ) , ( '\4226' , Combining ) , ( '\4227' , Narrow ) , ( '\4229' , Combining ) , ( '\4231' , Narrow ) , ( '\4237' , Combining ) , ( '\4238' , Narrow ) , ( '\4253' , Combining ) , ( '\4254' , Narrow ) , ( '\4352' , Wide ) , ( '\4448' , Narrow ) , ( '\4957' , Combining ) , ( '\4960' , Narrow ) , ( '\5906' , Combining ) , ( '\5909' , Narrow ) , ( '\5938' , Combining ) , ( '\5940' , Narrow ) , ( '\5970' , Combining ) , ( '\5984' , Narrow ) , ( '\6002' , Combining ) , ( '\6016' , Narrow ) , ( '\6068' , Combining ) , ( '\6070' , Narrow ) , ( '\6071' , Combining ) , ( '\6078' , Narrow ) , ( '\6086' , Combining ) , ( '\6087' , Narrow ) , ( '\6089' , Combining ) , ( '\6100' , Narrow ) , ( '\6109' , Combining ) , ( '\6112' , Narrow ) , ( '\6155' , Combining ) , ( '\6158' , Control ) , ( '\6159' , Combining ) , ( '\6160' , Narrow ) , ( '\6277' , Combining ) , ( '\6279' , Narrow ) , ( '\6313' , Combining ) , ( '\6314' , Narrow ) , ( '\6432' , Combining ) , ( '\6435' , Narrow ) , ( '\6439' , Combining ) , ( '\6441' , Narrow ) , ( '\6450' , Combining ) , ( '\6451' , Narrow ) , ( '\6457' , Combining ) , ( '\6464' , Narrow ) , ( '\6679' , Combining ) , ( '\6681' , Narrow ) , ( '\6683' , Combining ) , ( '\6686' , Narrow ) , ( '\6742' , Combining ) , ( '\6743' , Narrow ) , ( '\6744' , Combining ) , ( '\6753' , Narrow ) , ( '\6754' , Combining ) , ( '\6755' , Narrow ) , ( '\6757' , Combining ) , ( '\6765' , Narrow ) , ( '\6771' , Combining ) , ( '\6784' , Narrow ) , ( '\6832' , Combining ) , ( '\6916' , Narrow ) , ( '\6964' , Combining ) , ( '\6965' , Narrow ) , ( '\6966' , Combining ) , ( '\6971' , Narrow ) , ( '\6972' , Combining ) , ( '\6973' , Narrow ) , ( '\6978' , Combining ) , ( '\6979' , Narrow ) , ( '\7019' , Combining ) , ( '\7028' , Narrow ) , ( '\7040' , Combining ) , ( '\7042' , Narrow ) , ( '\7074' , Combining ) , ( '\7078' , Narrow ) , ( '\7080' , Combining ) , ( '\7082' , Narrow ) , ( '\7083' , Combining ) , ( '\7086' , Narrow ) , ( '\7142' , Combining ) , ( '\7143' , Narrow ) , ( '\7144' , Combining ) , ( '\7146' , Narrow ) , ( '\7149' , Combining ) , ( '\7150' , Narrow ) , ( '\7151' , Combining ) , ( '\7154' , Narrow ) , ( '\7212' , Combining ) , ( '\7220' , Narrow ) , ( '\7222' , Combining ) , ( '\7227' , Narrow ) , ( '\7376' , Combining ) , ( '\7379' , Narrow ) , ( '\7380' , Combining ) , ( '\7393' , Narrow ) , ( '\7394' , Combining ) , ( '\7401' , Narrow ) , ( '\7405' , Combining ) , ( '\7406' , Narrow ) , ( '\7412' , Combining ) , ( '\7413' , Narrow ) , ( '\7416' , Combining ) , ( '\7418' , Narrow ) , ( '\7616' , Combining ) , ( '\7680' , Narrow ) , ( '\8203' , Control ) , ( '\8208' , Ambiguous ) , ( '\8209' , Narrow ) , ( '\8211' , Ambiguous ) , ( '\8215' , Narrow ) , ( '\8216' , Ambiguous ) , ( '\8218' , Narrow ) , ( '\8220' , Ambiguous ) , ( '\8222' , Narrow ) , ( '\8224' , Ambiguous ) , ( '\8227' , Narrow ) , ( '\8228' , Ambiguous ) , ( '\8232' , Narrow ) , ( '\8234' , Control ) , ( '\8239' , Narrow ) , ( '\8240' , Ambiguous ) , ( '\8241' , Narrow ) , ( '\8242' , Ambiguous ) , ( '\8244' , Narrow ) , ( '\8245' , Ambiguous ) , ( '\8246' , Narrow ) , ( '\8251' , Ambiguous ) , ( '\8252' , Narrow ) , ( '\8254' , Ambiguous ) , ( '\8255' , Narrow ) , ( '\8288' , Control ) , ( '\8304' , Narrow ) , ( '\8308' , Ambiguous ) , ( '\8309' , Narrow ) , ( '\8319' , Ambiguous ) , ( '\8320' , Narrow ) , ( '\8321' , Ambiguous ) , ( '\8325' , Narrow ) , ( '\8364' , Ambiguous ) , ( '\8365' , Narrow ) , ( '\8400' , Combining ) , ( '\8448' , Narrow ) , ( '\8451' , Ambiguous ) , ( '\8452' , Narrow ) , ( '\8453' , Ambiguous ) , ( '\8454' , Narrow ) , ( '\8457' , Ambiguous ) , ( '\8458' , Narrow ) , ( '\8467' , Ambiguous ) , ( '\8468' , Narrow ) , ( '\8470' , Ambiguous ) , ( '\8471' , Narrow ) , ( '\8481' , Ambiguous ) , ( '\8483' , Narrow ) , ( '\8486' , Ambiguous ) , ( '\8487' , Narrow ) , ( '\8491' , Ambiguous ) , ( '\8492' , Narrow ) , ( '\8531' , Ambiguous ) , ( '\8533' , Narrow ) , ( '\8539' , Ambiguous ) , ( '\8543' , Narrow ) , ( '\8544' , Ambiguous ) , ( '\8556' , Narrow ) , ( '\8560' , Ambiguous ) , ( '\8570' , Narrow ) , ( '\8585' , Ambiguous ) , ( '\8586' , Narrow ) , ( '\8592' , Ambiguous ) , ( '\8602' , Narrow ) , ( '\8632' , Ambiguous ) , ( '\8634' , Narrow ) , ( '\8658' , Ambiguous ) , ( '\8659' , Narrow ) , ( '\8660' , Ambiguous ) , ( '\8661' , Narrow ) , ( '\8679' , Ambiguous ) , ( '\8680' , Narrow ) , ( '\8704' , Ambiguous ) , ( '\8705' , Narrow ) , ( '\8706' , Ambiguous ) , ( '\8708' , Narrow ) , ( '\8711' , Ambiguous ) , ( '\8713' , Narrow ) , ( '\8715' , Ambiguous ) , ( '\8716' , Narrow ) , ( '\8719' , Ambiguous ) , ( '\8720' , Narrow ) , ( '\8721' , Ambiguous ) , ( '\8722' , Narrow ) , ( '\8725' , Ambiguous ) , ( '\8726' , Narrow ) , ( '\8730' , Ambiguous ) , ( '\8731' , Narrow ) , ( '\8733' , Ambiguous ) , ( '\8737' , Narrow ) , ( '\8739' , Ambiguous ) , ( '\8740' , Narrow ) , ( '\8741' , Ambiguous ) , ( '\8742' , Narrow ) , ( '\8743' , Ambiguous ) , ( '\8749' , Narrow ) , ( '\8750' , Ambiguous ) , ( '\8751' , Narrow ) , ( '\8756' , Ambiguous ) , ( '\8760' , Narrow ) , ( '\8764' , Ambiguous ) , ( '\8766' , Narrow ) , ( '\8776' , Ambiguous ) , ( '\8777' , Narrow ) , ( '\8780' , Ambiguous ) , ( '\8781' , Narrow ) , ( '\8786' , Ambiguous ) , ( '\8787' , Narrow ) , ( '\8800' , Ambiguous ) , ( '\8802' , Narrow ) , ( '\8804' , Ambiguous ) , ( '\8808' , Narrow ) , ( '\8810' , Ambiguous ) , ( '\8812' , Narrow ) , ( '\8814' , Ambiguous ) , ( '\8816' , Narrow ) , ( '\8834' , Ambiguous ) , ( '\8836' , Narrow ) , ( '\8838' , Ambiguous ) , ( '\8840' , Narrow ) , ( '\8853' , Ambiguous ) , ( '\8854' , Narrow ) , ( '\8857' , Ambiguous ) , ( '\8858' , Narrow ) , ( '\8869' , Ambiguous ) , ( '\8870' , Narrow ) , ( '\8895' , Ambiguous ) , ( '\8896' , Narrow ) , ( '\8978' , Ambiguous ) , ( '\8979' , Narrow ) , ( '\8986' , Wide ) , ( '\8988' , Narrow ) , ( '\9001' , Wide ) , ( '\9003' , Narrow ) , ( '\9193' , Wide ) , ( '\9197' , Narrow ) , ( '\9200' , Wide ) , ( '\9201' , Narrow ) , ( '\9203' , Wide ) , ( '\9204' , Narrow ) , ( '\9312' , Ambiguous ) , ( '\9450' , Narrow ) , ( '\9451' , Ambiguous ) , ( '\9548' , Narrow ) , ( '\9552' , Ambiguous ) , ( '\9588' , Narrow ) , ( '\9600' , Ambiguous ) , ( '\9616' , Narrow ) , ( '\9618' , Ambiguous ) , ( '\9622' , Narrow ) , ( '\9632' , Ambiguous ) , ( '\9634' , Narrow ) , ( '\9635' , Ambiguous ) , ( '\9642' , Narrow ) , ( '\9650' , Ambiguous ) , ( '\9652' , Narrow ) , ( '\9654' , Ambiguous ) , ( '\9656' , Narrow ) , ( '\9660' , Ambiguous ) , ( '\9662' , Narrow ) , ( '\9664' , Ambiguous ) , ( '\9666' , Narrow ) , ( '\9670' , Ambiguous ) , ( '\9673' , Narrow ) , ( '\9675' , Ambiguous ) , ( '\9676' , Narrow ) , ( '\9678' , Ambiguous ) , ( '\9682' , Narrow ) , ( '\9698' , Ambiguous ) , ( '\9702' , Narrow ) , ( '\9711' , Ambiguous ) , ( '\9712' , Narrow ) , ( '\9725' , Wide ) , ( '\9727' , Narrow ) , ( '\9733' , Ambiguous ) , ( '\9735' , Narrow ) , ( '\9737' , Ambiguous ) , ( '\9738' , Narrow ) , ( '\9742' , Ambiguous ) , ( '\9744' , Narrow ) , ( '\9748' , Wide ) , ( '\9750' , Narrow ) , ( '\9756' , Ambiguous ) , ( '\9757' , Narrow ) , ( '\9758' , Ambiguous ) , ( '\9759' , Narrow ) , ( '\9792' , Ambiguous ) , ( '\9793' , Narrow ) , ( '\9794' , Ambiguous ) , ( '\9795' , Narrow ) , ( '\9800' , Wide ) , ( '\9812' , Narrow ) , ( '\9824' , Ambiguous ) , ( '\9826' , Narrow ) , ( '\9827' , Ambiguous ) , ( '\9830' , Narrow ) , ( '\9831' , Ambiguous ) , ( '\9835' , Narrow ) , ( '\9836' , Ambiguous ) , ( '\9838' , Narrow ) , ( '\9839' , Ambiguous ) , ( '\9840' , Narrow ) , ( '\9855' , Wide ) , ( '\9856' , Narrow ) , ( '\9875' , Wide ) , ( '\9876' , Narrow ) , ( '\9886' , Ambiguous ) , ( '\9888' , Narrow ) , ( '\9889' , Wide ) , ( '\9890' , Narrow ) , ( '\9898' , Wide ) , ( '\9900' , Narrow ) , ( '\9917' , Wide ) , ( '\9919' , Ambiguous ) , ( '\9920' , Narrow ) , ( '\9924' , Wide ) , ( '\9926' , Ambiguous ) , ( '\9934' , Wide ) , ( '\9935' , Ambiguous ) , ( '\9940' , Wide ) , ( '\9941' , Ambiguous ) , ( '\9954' , Narrow ) , ( '\9955' , Ambiguous ) , ( '\9956' , Narrow ) , ( '\9960' , Ambiguous ) , ( '\9962' , Wide ) , ( '\9963' , Ambiguous ) , ( '\9970' , Wide ) , ( '\9972' , Ambiguous ) , ( '\9973' , Wide ) , ( '\9974' , Ambiguous ) , ( '\9978' , Wide ) , ( '\9979' , Ambiguous ) , ( '\9981' , Wide ) , ( '\9982' , Ambiguous ) , ( '\9984' , Narrow ) , ( '\9989' , Wide ) , ( '\9990' , Narrow ) , ( '\9994' , Wide ) , ( '\9996' , Narrow ) , ( '\10024' , Wide ) , ( '\10025' , Narrow ) , ( '\10045' , Ambiguous ) , ( '\10046' , Narrow ) , ( '\10060' , Wide ) , ( '\10061' , Narrow ) , ( '\10062' , Wide ) , ( '\10063' , Narrow ) , ( '\10067' , Wide ) , ( '\10070' , Narrow ) , ( '\10071' , Wide ) , ( '\10072' , Narrow ) , ( '\10102' , Ambiguous ) , ( '\10112' , Narrow ) , ( '\10133' , Wide ) , ( '\10136' , Narrow ) , ( '\10160' , Wide ) , ( '\10161' , Narrow ) , ( '\10175' , Wide ) , ( '\10176' , Narrow ) , ( '\11035' , Wide ) , ( '\11037' , Narrow ) , ( '\11088' , Wide ) , ( '\11089' , Narrow ) , ( '\11093' , Wide ) , ( '\11094' , Ambiguous ) , ( '\11098' , Narrow ) , ( '\11503' , Combining ) , ( '\11506' , Narrow ) , ( '\11647' , Combining ) , ( '\11648' , Narrow ) , ( '\11744' , Combining ) , ( '\11776' , Narrow ) , ( '\11904' , Wide ) , ( '\12330' , Combining ) , ( '\12334' , Wide ) , ( '\12351' , Narrow ) , ( '\12353' , Wide ) , ( '\12441' , Combining ) , ( '\12443' , Wide ) , ( '\12872' , Ambiguous ) , ( '\12880' , Wide ) , ( '\19904' , Narrow ) , ( '\19968' , Wide ) , ( '\42192' , Narrow ) , ( '\42607' , Combining ) , ( '\42611' , Narrow ) , ( '\42612' , Combining ) , ( '\42622' , Narrow ) , ( '\42654' , Combining ) , ( '\42656' , Narrow ) , ( '\42736' , Combining ) , ( '\42738' , Narrow ) , ( '\43010' , Combining ) , ( '\43011' , Narrow ) , ( '\43014' , Combining ) , ( '\43015' , Narrow ) , ( '\43019' , Combining ) , ( '\43020' , Narrow ) , ( '\43045' , Combining ) , ( '\43047' , Narrow ) , ( '\43052' , Combining ) , ( '\43056' , Narrow ) , ( '\43204' , Combining ) , ( '\43214' , Narrow ) , ( '\43232' , Combining ) , ( '\43250' , Narrow ) , ( '\43263' , Combining ) , ( '\43264' , Narrow ) , ( '\43302' , Combining ) , ( '\43310' , Narrow ) , ( '\43335' , Combining ) , ( '\43346' , Narrow ) , ( '\43360' , Wide ) , ( '\43392' , Combining ) , ( '\43395' , Narrow ) , ( '\43443' , Combining ) , ( '\43444' , Narrow ) , ( '\43446' , Combining ) , ( '\43450' , Narrow ) , ( '\43452' , Combining ) , ( '\43454' , Narrow ) , ( '\43493' , Combining ) , ( '\43494' , Narrow ) , ( '\43561' , Combining ) , ( '\43567' , Narrow ) , ( '\43569' , Combining ) , ( '\43571' , Narrow ) , ( '\43573' , Combining ) , ( '\43584' , Narrow ) , ( '\43587' , Combining ) , ( '\43588' , Narrow ) , ( '\43596' , Combining ) , ( '\43597' , Narrow ) , ( '\43644' , Combining ) , ( '\43645' , Narrow ) , ( '\43696' , Combining ) , ( '\43697' , Narrow ) , ( '\43698' , Combining ) , ( '\43701' , Narrow ) , ( '\43703' , Combining ) , ( '\43705' , Narrow ) , ( '\43710' , Combining ) , ( '\43712' , Narrow ) , ( '\43713' , Combining ) , ( '\43714' , Narrow ) , ( '\43756' , Combining ) , ( '\43758' , Narrow ) , ( '\43766' , Combining ) , ( '\43777' , Narrow ) , ( '\44005' , Combining ) , ( '\44006' , Narrow ) , ( '\44008' , Combining ) , ( '\44009' , Narrow ) , ( '\44013' , Combining ) , ( '\44016' , Narrow ) , ( '\44032' , Wide ) , ( '\55216' , Narrow ) , ( '\57344' , Ambiguous ) , ( '\63744' , Wide ) , ( '\64256' , Narrow ) , ( '\64286' , Combining ) , ( '\64287' , Narrow ) , ( '\65024' , Combining ) , ( '\65040' , Wide ) , ( '\65056' , Combining ) , ( '\65072' , Wide ) , ( '\65136' , Narrow ) , ( '\65279' , Control ) , ( '\65281' , Wide ) , ( '\65377' , Narrow ) , ( '\65504' , Wide ) , ( '\65512' , Narrow ) , ( '\65529' , Control ) , ( '\65532' , Narrow ) , ( '\65533' , Ambiguous ) , ( '\65536' , Narrow ) , ( '\66045' , Combining ) , ( '\66176' , Narrow ) , ( '\66272' , Combining ) , ( '\66273' , Narrow ) , ( '\66422' , Combining ) , ( '\66432' , Narrow ) , ( '\68097' , Combining ) , ( '\68112' , Narrow ) , ( '\68152' , Combining ) , ( '\68160' , Narrow ) , ( '\68325' , Combining ) , ( '\68331' , Narrow ) , ( '\68900' , Combining ) , ( '\68912' , Narrow ) , ( '\69291' , Combining ) , ( '\69293' , Narrow ) , ( '\69446' , Combining ) , ( '\69457' , Narrow ) , ( '\69506' , Combining ) , ( '\69510' , Narrow ) , ( '\69633' , Combining ) , ( '\69634' , Narrow ) , ( '\69688' , Combining ) , ( '\69703' , Narrow ) , ( '\69744' , Combining ) , ( '\69745' , Narrow ) , ( '\69747' , Combining ) , ( '\69749' , Narrow ) , ( '\69759' , Combining ) , ( '\69762' , Narrow ) , ( '\69811' , Combining ) , ( '\69815' , Narrow ) , ( '\69817' , Combining ) , ( '\69819' , Narrow ) , ( '\69821' , Control ) , ( '\69822' , Narrow ) , ( '\69826' , Combining ) , ( '\69837' , Control ) , ( '\69840' , Narrow ) , ( '\69888' , Combining ) , ( '\69891' , Narrow ) , ( '\69927' , Combining ) , ( '\69932' , Narrow ) , ( '\69933' , Combining ) , ( '\69942' , Narrow ) , ( '\70003' , Combining ) , ( '\70004' , Narrow ) , ( '\70016' , Combining ) , ( '\70018' , Narrow ) , ( '\70070' , Combining ) , ( '\70079' , Narrow ) , ( '\70089' , Combining ) , ( '\70093' , Narrow ) , ( '\70095' , Combining ) , ( '\70096' , Narrow ) , ( '\70191' , Combining ) , ( '\70194' , Narrow ) , ( '\70196' , Combining ) , ( '\70197' , Narrow ) , ( '\70198' , Combining ) , ( '\70200' , Narrow ) , ( '\70206' , Combining ) , ( '\70272' , Narrow ) , ( '\70367' , Combining ) , ( '\70368' , Narrow ) , ( '\70371' , Combining ) , ( '\70384' , Narrow ) , ( '\70400' , Combining ) , ( '\70402' , Narrow ) , ( '\70459' , Combining ) , ( '\70461' , Narrow ) , ( '\70464' , Combining ) , ( '\70465' , Narrow ) , ( '\70502' , Combining ) , ( '\70656' , Narrow ) , ( '\70712' , Combining ) , ( '\70720' , Narrow ) , ( '\70722' , Combining ) , ( '\70725' , Narrow ) , ( '\70726' , Combining ) , ( '\70727' , Narrow ) , ( '\70750' , Combining ) , ( '\70751' , Narrow ) , ( '\70835' , Combining ) , ( '\70841' , Narrow ) , ( '\70842' , Combining ) , ( '\70843' , Narrow ) , ( '\70847' , Combining ) , ( '\70849' , Narrow ) , ( '\70850' , Combining ) , ( '\70852' , Narrow ) , ( '\71090' , Combining ) , ( '\71096' , Narrow ) , ( '\71100' , Combining ) , ( '\71102' , Narrow ) , ( '\71103' , Combining ) , ( '\71105' , Narrow ) , ( '\71132' , Combining ) , ( '\71168' , Narrow ) , ( '\71219' , Combining ) , ( '\71227' , Narrow ) , ( '\71229' , Combining ) , ( '\71230' , Narrow ) , ( '\71231' , Combining ) , ( '\71233' , Narrow ) , ( '\71339' , Combining ) , ( '\71340' , Narrow ) , ( '\71341' , Combining ) , ( '\71342' , Narrow ) , ( '\71344' , Combining ) , ( '\71350' , Narrow ) , ( '\71351' , Combining ) , ( '\71352' , Narrow ) , ( '\71453' , Combining ) , ( '\71456' , Narrow ) , ( '\71458' , Combining ) , ( '\71462' , Narrow ) , ( '\71463' , Combining ) , ( '\71472' , Narrow ) , ( '\71727' , Combining ) , ( '\71736' , Narrow ) , ( '\71737' , Combining ) , ( '\71739' , Narrow ) , ( '\71995' , Combining ) , ( '\71997' , Narrow ) , ( '\71998' , Combining ) , ( '\71999' , Narrow ) , ( '\72003' , Combining ) , ( '\72004' , Narrow ) , ( '\72148' , Combining ) , ( '\72156' , Narrow ) , ( '\72160' , Combining ) , ( '\72161' , Narrow ) , ( '\72193' , Combining ) , ( '\72203' , Narrow ) , ( '\72243' , Combining ) , ( '\72249' , Narrow ) , ( '\72251' , Combining ) , ( '\72255' , Narrow ) , ( '\72263' , Combining ) , ( '\72272' , Narrow ) , ( '\72273' , Combining ) , ( '\72279' , Narrow ) , ( '\72281' , Combining ) , ( '\72284' , Narrow ) , ( '\72330' , Combining ) , ( '\72343' , Narrow ) , ( '\72344' , Combining ) , ( '\72346' , Narrow ) , ( '\72752' , Combining ) , ( '\72766' , Narrow ) , ( '\72767' , Combining ) , ( '\72768' , Narrow ) , ( '\72850' , Combining ) , ( '\72873' , Narrow ) , ( '\72874' , Combining ) , ( '\72881' , Narrow ) , ( '\72882' , Combining ) , ( '\72884' , Narrow ) , ( '\72885' , Combining ) , ( '\72960' , Narrow ) , ( '\73009' , Combining ) , ( '\73030' , Narrow ) , ( '\73031' , Combining ) , ( '\73040' , Narrow ) , ( '\73104' , Combining ) , ( '\73107' , Narrow ) , ( '\73109' , Combining ) , ( '\73110' , Narrow ) , ( '\73111' , Combining ) , ( '\73112' , Narrow ) , ( '\73459' , Combining ) , ( '\73461' , Narrow ) , ( '\78896' , Control ) , ( '\82944' , Narrow ) , ( '\92912' , Combining ) , ( '\92917' , Narrow ) , ( '\92976' , Combining ) , ( '\92983' , Narrow ) , ( '\94031' , Combining ) , ( '\94032' , Narrow ) , ( '\94095' , Combining ) , ( '\94099' , Narrow ) , ( '\94176' , Wide ) , ( '\94180' , Combining ) , ( '\94192' , Wide ) , ( '\113664' , Narrow ) , ( '\113821' , Combining ) , ( '\113823' , Narrow ) , ( '\113824' , Control ) , ( '\118528' , Combining ) , ( '\118608' , Narrow ) , ( '\119143' , Combining ) , ( '\119146' , Narrow ) , ( '\119155' , Control ) , ( '\119163' , Combining ) , ( '\119171' , Narrow ) , ( '\119173' , Combining ) , ( '\119180' , Narrow ) , ( '\119210' , Combining ) , ( '\119214' , Narrow ) , ( '\119362' , Combining ) , ( '\119365' , Narrow ) , ( '\121344' , Combining ) , ( '\121399' , Narrow ) , ( '\121403' , Combining ) , ( '\121453' , Narrow ) , ( '\121461' , Combining ) , ( '\121462' , Narrow ) , ( '\121476' , Combining ) , ( '\121477' , Narrow ) , ( '\121499' , Combining ) , ( '\122624' , Narrow ) , ( '\122880' , Combining ) , ( '\123136' , Narrow ) , ( '\123184' , Combining ) , ( '\123191' , Narrow ) , ( '\123566' , Combining ) , ( '\123584' , Narrow ) , ( '\123628' , Combining ) , ( '\123632' , Narrow ) , ( '\125136' , Combining ) , ( '\125184' , Narrow ) , ( '\125252' , Combining ) , ( '\125259' , Narrow ) , ( '\126980' , Wide ) , ( '\126981' , Narrow ) , ( '\127183' , Wide ) , ( '\127185' , Narrow ) , ( '\127232' , Ambiguous ) , ( '\127243' , Narrow ) , ( '\127248' , Ambiguous ) , ( '\127278' , Narrow ) , ( '\127280' , Ambiguous ) , ( '\127338' , Narrow ) , ( '\127344' , Ambiguous ) , ( '\127374' , Wide ) , ( '\127375' , Ambiguous ) , ( '\127377' , Wide ) , ( '\127387' , Ambiguous ) , ( '\127405' , Narrow ) , ( '\127488' , Wide ) , ( '\127777' , Narrow ) , ( '\127789' , Wide ) , ( '\127798' , Narrow ) , ( '\127799' , Wide ) , ( '\127869' , Narrow ) , ( '\127870' , Wide ) , ( '\127892' , Narrow ) , ( '\127904' , Wide ) , ( '\127947' , Narrow ) , ( '\127951' , Wide ) , ( '\127956' , Narrow ) , ( '\127968' , Wide ) , ( '\127985' , Narrow ) , ( '\127988' , Wide ) , ( '\127989' , Narrow ) , ( '\127992' , Wide ) , ( '\128063' , Narrow ) , ( '\128064' , Wide ) , ( '\128065' , Narrow ) , ( '\128066' , Wide ) , ( '\128253' , Narrow ) , ( '\128255' , Wide ) , ( '\128318' , Narrow ) , ( '\128331' , Wide ) , ( '\128335' , Narrow ) , ( '\128336' , Wide ) , ( '\128360' , Narrow ) , ( '\128378' , Wide ) , ( '\128379' , Narrow ) , ( '\128405' , Wide ) , ( '\128407' , Narrow ) , ( '\128420' , Wide ) , ( '\128421' , Narrow ) , ( '\128507' , Wide ) , ( '\128592' , Narrow ) , ( '\128640' , Wide ) , ( '\128710' , Narrow ) , ( '\128716' , Wide ) , ( '\128717' , Narrow ) , ( '\128720' , Wide ) , ( '\128723' , Narrow ) , ( '\128725' , Wide ) , ( '\128736' , Narrow ) , ( '\128747' , Wide ) , ( '\128752' , Narrow ) , ( '\128756' , Wide ) , ( '\128768' , Narrow ) , ( '\128992' , Wide ) , ( '\129024' , Narrow ) , ( '\129292' , Wide ) , ( '\129339' , Narrow ) , ( '\129340' , Wide ) , ( '\129350' , Narrow ) , ( '\129351' , Wide ) , ( '\129536' , Narrow ) , ( '\129648' , Wide ) , ( '\129792' , Narrow ) , ( '\131072' , Wide ) , ( '\917505' , Control ) , ( '\917760' , Combining ) , ( '\983040' , Ambiguous ) ] doclayout-0.4.0.1/test/0000755000000000000000000000000007346545000013025 5ustar0000000000000000doclayout-0.4.0.1/test/test.hs0000644000000000000000000002223307346545000014342 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} import Text.DocLayout import Text.Emoji import Test.Tasty import Test.Tasty.HUnit import Test.Tasty.QuickCheck import Data.Functor ((<&>)) import Data.Text (Text) import qualified Data.Text as T #if MIN_VERSION_base(4,11,0) #else import Data.Semigroup #endif timeout :: Integer timeout = 480 * 10^(6 :: Int) main :: IO () main = defaultMain $ localOption (mkTimeout timeout) $ testGroup "Tests" tests -- 8 minute timeout renderTest :: String -> Maybe Int -> Doc Text -> Text -> TestTree renderTest title mblen doc expected = testCase title $ render mblen doc @?= expected tests :: [TestTree] tests = [ testCase "offset prefixed" $ (offset (nest 3 (text "**" <> text "thisIsGoingToBeTooLongAnyway" <> text "**") <> blankline :: Doc Text) @?= 35) , renderTest "lblock with chop" Nothing (lblock 4 (text "hi there" :: Doc Text)) "hi t\nhere" , renderTest "lblock with blank line" Nothing (Block 5 ["a", "", "b"] :: Doc Text) "a\nb" , renderTest "nesting should not wrap" (Just 10) (nest 3 (text "abcdefghi" :: Doc Text)) " abcdefghi" , renderTest "simple vcat" (Just 10) (vcat $ map chomp ["aaa", "bbb", "ccc"]) "aaa\nbbb\nccc" , renderTest "vcat with chomp" (Just 10) (chomp "aaa" $$ chomp "bbb" $$ chomp "ccc") "aaa\nbbb\nccc" , renderTest "simple text above line length" (Just 4) ("hello" <+> "there") "hello\nthere" , renderTest "cr" (Just 60) ("hello" <> cr <> "there") "hello\nthere" , renderTest "x $$ cr" Nothing ("x" $$ cr) "x\n" , renderTest "wrapping" (Just 10) (hsep ["hello", "there", "this", "is", "a", "test"]) "hello\nthere this\nis a test" , renderTest "simple box wrapping" (Just 50) (lblock 3 "aa" <> lblock 3 "bb" <> lblock 3 ("aa" <+> "bbbb")) "aa bb aa\n b\n bbb" , renderTest "prefixed with multi paragraphs" (Just 80) (prefixed "> " ("foo" <+> "bar" <> cr <> "baz" <> blankline <> "bim" <+> "bam")) "> foo bar\n> baz\n>\n> bim bam" , renderTest "prefixed with hsep" Nothing (prefixed "> " $ hsep ["a","b","c"]) "> a b c" , renderTest "nest" Nothing (nest 4 "aa\n\nbb\ncc") " aa\n\n bb\n cc" , renderTest "hang" Nothing (hang 4 " - " (chomp "aa\n\nbb\ncc" <> cr) <> hang 4 " - " (chomp "dd\n" <> cr)) " - aa\n\n bb\n cc\n - dd\n" , renderTest "align with box" Nothing ("aa" <> lblock 2 ("bb" $$ "cc") <> "dd") "aabb\n ccdd" , renderTest "centered box" Nothing ("aa" <> cblock 4 ("bb" $$ "cc")) "aa bb\n cc" , renderTest "blanks at beginning" Nothing (blanklines 2 <> "aa") "\naa" -- only one because we treat top of doc as implicit blank , renderTest "blanks at end" Nothing ("aa" <> blanklines 2) "aa\n" , renderTest "blanks at end with multiple" Nothing ("aa" <> cr <> blanklines 2 <> blanklines 0) "aa\n" , renderTest "blanks at end with nesting" Nothing (nest 2 (nest 3 ("aa" <> blankline) <> cr <> blanklines 2) <> blanklines 2) " aa\n" , renderTest "blanks around cr" Nothing ("aa" <> blankline <> cr <> blankline <> "bb") "aa\n\nbb" , renderTest "strange wrap case" (Just 8) (vcat [hang 2 "- " (chomp $ text "aaaaa" <> space <> "bbb"), hang 2 "- " (text "aoeu")]) "- aaaaa\n bbb\n- aoeu" , renderTest "strange wrap case" (Just 8) (text "aaaaa" <> space <> text "bbbbb" <> blankline <> text "ccccc") "aaaaa\nbbbbb\n\nccccc" , renderTest "chomp 1" Nothing (chomp (("aa" <> space) <> blankline) <> "bb") "aabb" , renderTest "chomp 2" Nothing (chomp ("aa" <> space) <> "bb") "aabb" , renderTest "chomp 3" Nothing (chomp "aa") "aa" , renderTest "chomp with nesting" Nothing (chomp (nest 3 ("aa" <> blankline))) " aa" , renderTest "chomp with box at end" Nothing ("aa" <> cr <> chomp (lblock 2 ("aa" <> blankline) <> blankline)) "aa\naa" , renderTest "empty and $$" Nothing ("aa" $$ empty $$ "bb") "aa\nbb" , renderTest "table" Nothing ((rblock 4 "aa" <> lblock 3 " | " <> cblock 4 "bb" <> lblock 3 " | " <> lblock 4 "cc") $$ (rblock 4 "----" <> lblock 3 " | " <> cblock 4 "----" <> lblock 3 " | " <> lblock 4 "----") $$ (rblock 4 "dd" <> lblock 3 " | " <> cblock 4 "ee" <> lblock 3 " | " <> lblock 4 "ff")) " aa | bb | cc\n---- | ---- | ----\n dd | ee | ff" , renderTest "proper wrapping with multiple components" (Just 10) ("aa" <> space <> "bbbbb" <> "ccccc") "aa\nbbbbbccccc" , renderTest "nested wrapped text" (Just 10) (nest 5 (hsep ["hi", "there", "my", "friend"]) <> cr) " hi\n there\n my\n friend\n" , renderTest "aligned wrapped text" Nothing (cblock 7 ("hi" <+> "there")) " hi\n there" , renderTest "afterBreak" (Just 2) ("hi" <+> afterBreak "!" <> afterBreak "?" <> "x" <> afterBreak "?") "hi\n!x" , renderTest "breaks and nest" (Just 5) ("[" <> nest 1 ("aaaaaaaaa" $$ "bbbbbbb") <> "]") "[aaaaaaaaa\n bbbbbbb]" , renderTest "empty nest" Nothing ("aa" $$ nest 3 mempty $$ "bb") "aa\nbb" , renderTest "hsep with empty" Nothing (hsep ["a",mempty,"b"]) "a b" , renderTest "(<+>) with empty" Nothing ("a" <+> mempty <+> "b") "a b" , renderTest "vcat doesn't create newline at end" Nothing (vcat ["aa","bb"] <> "cc") "aa\nbbcc" , renderTest "vcat []" Nothing (vcat []) "" , renderTest "nestle" Nothing (nestle $ blanklines 2 $$ "aa" $$ blanklines 2 <> cr) "aa\n" , renderTest "prefix with box" Nothing (prefixed "> " $ cblock 4 ("aa" $$ "bb")) "> aa\n> bb" , renderTest "breaking space after empty" Nothing (empty <> space <> "x") "x" , renderTest "hang with empty content" Nothing (hang 5 (nowrap "xxx") mempty) "xxx" , renderTest "beforeNonBlank" Nothing (beforeNonBlank "!!" <> " ab" $$ beforeNonBlank "!!" <> "a b") " ab\n!!a b" , renderTest "vfill" Nothing (vfill "|" <> cblock 5 ("a" $$ "bbb" $$ "ccccc") <> lblock 3 "dd" <> vfill "+") "| a dd +\n| bbb +\n|ccccc +" , renderTest "vfill 2" Nothing (vfill "| " <> cblock 5 ("a" $$ "bbb") <> vfill " | " <> lblock 2 ("dd" $$ "ee" $$ "ff") <> vfill " |") "| a | dd |\n| bbb | ee |\n| | ff |" , renderTest "combining character at start" Nothing (text "\870" <> space <> text "a") "\870 a" , renderTest "newline after cr" -- #20 Nothing (literal "a" <> cr <> literal "\nb" <> cr <> literal "c") "a\n\nb\nc" , testCase "length of normal text" $ realLength ("This is going to be too long anyway" :: String) @?= 35 , testCase "length of an ambiguous character in normal context" $ realLength ("Circle\x25EF\&Circle" :: String) @?= 13 , testCase "length of an ambiguous character in wide context" $ realLengthWideContext ("圆\x25EF\&圆" :: String) @?= 6 , testCase "length of normal character, which could be continued to an emoji, but isn't" $ realLength ("*a" :: String) @?= 2 , testCase "length of normal character, which could be continued to an emoji, and is" $ realLength ("*\xFE0F\x20E3\&a" :: String) @?= 3 , testCase "length emoji consisting of one code point" $ realLength ("\x231A" :: String) @?= 2 , testCase "length of an emoji constructed using the variating modifier" $ realLength ("\x00A9\xFE0F" :: String) @?= 2 , testCase "length of a non-emoji which would be an emoji with a variation modifier" $ realLength ("\x00A9" :: String) @?= 1 , testCase "length of two emoji in a row" $ realLength ("\x1F170\xFE0F\x1F1E6\x1F1E8" :: String) @?= 4 , testCase "length of an emoji with skin tone modifier, where stripping results in a non-emoji" $ realLength ("\x1F590\x1F3FF" :: String) @?= 2 , testCase "a digit with a skin tone modifier is invalid but might appear, and shouldn't be mistaken for a variation modifier" $ realLength ("1\x1F3FF" :: String) @?= 3 , testGroup "all base emoji have width 2" $ baseEmojis <&> \emoji -> testCase (T.unpack emoji) $ realLength emoji @?= 2 , testGroup "all zero-width joiner emoji sequences have width 2" $ zwjEmojis <&> \emoji -> testCase (T.unpack emoji) $ realLength emoji @?= 2 , testProperty "shortcut provides same answer for string length in a narrow context" . withMaxSuccess 10000 $ \(x :: String) -> realLengthNarrowContext x === realLengthNarrowContextNoShortcut x , testProperty "shortcut provides same answer for string length in a wide context" . withMaxSuccess 10000 $ \(x :: String) -> realLengthWideContext x === realLengthWideContextNoShortcut x ] doclayout-0.4.0.1/udhr/0000755000000000000000000000000007346545000013010 5ustar0000000000000000doclayout-0.4.0.1/udhr/README0000644000000000000000000000314107346545000013667 0ustar0000000000000000Universal Declaration of Human Rights The original have been retrieved from the OHCHR web site: http://www.ohchr.org/EN/UDHR/Pages/Introduction.aspx The Universal Declaration of Human Rights (UDHR) is a milestone document in the history of human rights. Drafted by representatives with different legal and cultural backgrounds from all regions of the world, the Declaration was proclaimed by the United Nations General Assembly in Paris on 10 December 1948 General Assembly resolution 217 A (III) as a common standard of achievements for all peoples and all nations. It sets out, for the first time, fundamental human rights to be universally protected. Translations contain no distinction between languages and dialects since all of them serve the purpose of global dissemination. At present, there are 360 different translations of UDHR, available in PDF format. Text can be extracted from 298 of the translations (The rest are mostly messy scans of handwritten documents). Text format translations contain 11446 characters on average (5064 – 42210 characters). pdf/ original pdf files downloaded from the UN website. txt/ text extracted from pdf files with pdftotext (UTF-8). languages.txt language codes and names This dataset has been retreived on 2021-10-21 from http://research.ics.aalto.fi/cog/data/udhr/. Tommi Vatanen, Jaakko J. Väyrynen and Sami Virpioja (2010) Language identification of short text segments with n-gram models. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), pages 3423-3430. European Language Resources Association (ELRA). doclayout-0.4.0.1/udhr/languages.txt0000644000000000000000000001440607346545000015524 0ustar0000000000000000abk Abkhaz atj Achehnese jiv Achuar Chicham acu Achuar-Shiwiar aja Adja gax Afaan Oromo (Oromiffa) afk Afrikaans agr Aguaruna ccc A'ingae tws1 Akuapem Twi aln Albanian alt Altay amc Amahuaca amr Amarakaeri amh Amharic ame Amuesha-Yanesha arl Arabela arz Arabic (Alarabia) arm Armenian ass Asante cni Ashàninca cpu Ashéninca asm Assamese aii Assyrian (Atoraya) aub Asturian (Bable) kwi Awapit aym Aymara azb1 Azeri/Azerbaijani (Cyrillic) azb Azeri/Azerbaijani (Latin) inz Bahasa Indonesia mli Bahasa Melayu (Malay) 1121 Bai Coca bvi Balanda Viri bzc Balinese bgp Balochi bra Bambara bci Baoulé/Baule bfa Bari bsq Basque (Euskara) bba Batonu (Bariba) ruw Belorus (Belaruski) bem Bemba bng Bengali btb Béti bhj Bhojpuri bcy Bichelamar bkl Bikol/Bicolano boa Bora src4 Bosnian (Cyrillic script) src1 Bosnian (Latin script) brt Breton bpr Bugisnese blg Bulgarian (Balgarski) bms Burmese/Myanmar cak1 Cakchiquel cpp Campa pajonalino cbu Candoshi-Shapra cot Caquinte cbr Cashibo-Cacataibo cbs Cashinahua cln Catalan (Català) ceb Cebuano 1122 Chaa'pala cjd Chamorro tso Changane (Mozambique) cbt Chayahuita nyj1 Chechewa (Nyanja) cic Chickasaw csa Chinanteco chj Chinanteco, Ajitlán chn Chinese (Mandarin) fal Chin Falam hak Chin Hakha tid Chin Tiddim tru1 Chuuk (Trukese) cjk Cokwe coi Corsican kea Crioulo (Cabo Verde) gbc Crioulo da Guiné-Bissau (Guinea Bissau Creole) src2 Croatian wls Cymraeg (Welsh) czc Czech (Cesky) dga Dagaare dag Dagbani gac1 Dangme dns Danish (Dansk) prs1 Dari den Dendi ger Deutsch (German) div Dhivehi (Maldivian) nav Dine, Navajo (Navaho) dinka Dinka dyo Diola (Jola-Fogny) tbz Ditammari dut Dutch (Nederlands) dzo Dzongkha/Bhutanese edo Edo ibb Efik (Ibibio) grk Ellinika' (Greek) eng English spn Español (Spanish) 1115 Esperanto est Estonian (Eesti) eve Even evn Evenki ewe Ewe/Eve tws3 Fante fae Faroese prs Farsi/Persian fji Fijian tgl Filipino (Tagalog) fin Finnish kng Fiote (Angola) foa Fon 1128 Forro frn French (Français) fri Frisian frl Friulian (Friulano) gac2 Ga gli1 Gaeilge (Irish Gaelic) gag Gagauz gls Gàidhlig Albanach (Scottish Gaelic) gln Galician (Galego) cab Garifuna geo Georgian dum Gonja esg Greenlandic (Inuktikut) gun Guarani gua Guarayo hna Guen (Mina) gjr Gujarati hat Haitian Creole (Kreyol) hat1 Haitian Creole (popular) hni Hani kkn Hankuko (Korean) gej Hausa/Haoussa hwi Hawaiian hbr Hebrew hil Hiligaynon hnd Hindi hea Hmong (Miao) Northern East-Guizhou blu Hmong (Miao), Sichuan-Guizhou-Yunnan hms Hmong (Miao) Southern East-Guizhou hva Huasteco huu Huitoto Murui hng Hungarian ice Icelandic (Yslenska) 1120 Ido igr Igbo ilo Iloko/Ilocano 1119 Interlingua esb Inuktitut itn Italian jpn Japanese (Nihongo) jan Javanese maz Jñatrjo (Mazahua) kbp Kabyè kjv Kannada kph Kanuri Yerwa kqn Kaonde pmp Kapampangan krl Karelian pwo Karen (Pwo) ksw Karen (S'gaw) kas Kasem ksh Kashmiri kaz Kazakh kjh Khakas khk Khalkha (Mongolian) khm Khmer 1117 K'iche' (Quiché) qug Kichwa kon Kikongo ya L'Etat (Kikongo/Kituba) mlo Kimbundu nyz Kinyamwezi (Nyamwezi) rua1 Kinyarwanda gkp1 Kpelewo kri Krio kdb1 Kurdish kur Kurmanji kdo Kyrgyz nso Lamnso' (Lám nso') nol Lao ltn1 Latin (Latina) ltn Latin (Latina) lat Latvian lia Limba lin Lingala lit Lithuanian (Lietuviskai) lbm1 Lozi lub Luba-Kasai (Tshiluba) lap1 Luganda/Ganda mlo1 Lunda/Chokwe-lunda lue Luvale lux Luxembourgish (Lëtzebuergeusch) mkj Macedonian mhj Madurese mqm Magahi kde Makonde vmw Makua (Mozambique) mex Malagasy mjs Malayalam mls Maltese mam Mam mni Maninka mbf Maori rrt Maori (Cook Islands) (Rarotongan) aru Mapudungun (Mapuzgun) mrt Marathi mzm Marshallese mkd Marwari mcf Matsés yua Mayan (Yucateco) mao Mazateco mfy Mende mic Mikmaq/Micmac mpu Minangkabau miq Miskito mxv Mixteco mhm Mooré/More moz Mozarabic (Ajami) 1111 Ñahñú (Otomí) nhn Nahuatl nel Ndebele yrk Nenets nep Nepali nio Nganasan nba Ngangela (Nyemba) pcm Nigerian Pidgin English not Nomatsiguenga srt Northern Sotho/Pedi/Sepedi nrr Norwegian (Bokmål) (Norsk, Bokmål) nrn Norwegian (Nynorsk) (Norsk, Nynorsk) nus Nuer nyj Nyanja/Chinyanja nze Nzema auv1 Occitan Auvergnat prv1 Occitan Languedocien ojb Ojibway (Ojibwe) ory Oriya ose Osetin (Ossetian) 1114 Oshiwambo (Ndonga) lot Otuho pbb Paez 1123 Pai Koka plu Palauan pbu Pashto/Pakhto fum Peuhl frn2 Picard pis Pijin (Solomons Pidgin) ppl Pipil pql Polish (Polski) pnf Ponapean por Portuguese pro Provençal fum1 Pulaar pnj1 Punjabi/Panjabi 1112 Purhépecha 1116 Q'echi/Kekchi qec1 Quechua qeg Quechua de Ambo-Pasco quy Quechua de Ayacucho qnt Quechua de Cajamarca qar Quechua de Cotahuasi (Arequipa) qej Quechua de Huamalies (Huanuco) qan Quechua del Callejon de Huaylas quz Quechua del Cusco qju Quechua del Norte de Junin qei Quechua de Margos (Sur de Dios de Mayo, Huanuco) qed Quechua de Pomabamba (Ancash) qud1 Quichua rhe Rhaeto-Romance (Rumantsch) rmn1 Romani rum Romanian (Româna) koo1 Rukonzo (Konjo) rud1 Rundi/Kirundi nyn1 Runyankore-rukiga/Nkore-kiga rus Russian (Russky) lpi Sami/Lappish eml Sammarinese smy Samoan saj Sango (Sangho) skt Sanskrit 1124 Sapara Atupama skr Saraiki srd Sardinian sco Scots ses Seereer src5 Serbian (Cyrillic) (Srpski) src3 Serbian (Latin) (Srpski) crs Seselwa Creole French sjn Shan mcd Sharanahua shk Shilluk shp Shipibo-Conibo shd Shona cjs Shor 1125 Shuar Chicham 1126 Sia Pedee snd Sindhi snh Sinhala swz1 Siswati slo Slovak (Slovencina) slv Slovenian (Slovenscina) som Somali snn Soninké (Soninkanxaane) wee Sorbian sso Southern Sotho/Sotho/Sesotho/Sutu/Sesutu sua Sukuma suo Sundanese sud Sussu/Soussou/Sosso/Soso/Susu swa Swahili/Kiswahili crm Swampy Cree swd Swedish (Svenska) tht Tahitian pet Tajik taj Tamang (Tam) tzm Tamazight (Beraber) tcv Tamil ttr Tatar tcw Telugu 1118 Tének (Huasteco) ttm Tetum thj Thai tej Themne (Temne) tic Tibetan tca Ticuna tgn Tigrinya (Tigrigna) tiv Tiv tob Toba toj Tojol-a'b'al pdg Tok Pisin toi Tonga tov Tongan (Tonga) top Totonaco cof Tsafiki tsh Tshivenda trk Turkish (Türkçe) tck Turkmen tyv Tuvan tzc1 Tzeltal tzc Tzotzil uig Uighur ukr Ukrainian (Ukrayins'ka) mnf Umbundu ura Urarina urd Urdu uzb1 Uzbek (Cyrillic) uzb Uzbek (Latin) vep Veps vie Vietnamese rmy1 Vlach frn1 Walloon/Wallon ako Wama 1127 Wao Tededo wry Waray guc Wayuu tsw Western Sotho/Tswana/Setswana wol Wolof xos Xhosa yad Yagua sah Yakut yao Yao yps Yapese iii Yi ydd Yiddish yor Yoruba (Yorùbá) ykg Yukagir zam Zapoteco ztu1 Zapoteco, San Lucas Quiaviní ccx Zhuang zuu Zulu doclayout-0.4.0.1/udhr/txt/0000755000000000000000000000000007346545000013627 5ustar0000000000000000doclayout-0.4.0.1/udhr/txt/arz.txt0000644000000000000000000000271507346545000015171 0ustar0000000000000000الإعلان العالمي لحقوق الإنسان اعتُمد بموجب قرار الجمعية العامة 217 ألف (د-3) المؤرخ في 10 كانون الأول / ديسمبر 1948. الديباجة لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة البشرية وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل والسلام في العالم. ولما كان تناسي حقوق الإنسان وازدراؤها قد أفضيا إلى أعمال همجية آذت الضمير الإنساني. وكان غاية ما يرنو إليه عامة البشر انبثاق عالم يتمتع فيه الفرد بحرية القول والعقيدة ويتحرر من الفزع والفاقة. ولما كان من الضروري أن يتولى القانون حماية حقوق الإنسان لكيلا يضطر المرء آخر الأمر إلى التمرد على الاستبداد والظلم. ولما كانت شعوب الأمم المتحدة قد أكدت في الميثاق من جديد إيمانها بحقوق الإنسان الأساسية وبكرامة الفرد وقدره وبما للرجال والنساء من حقوق متساوية وحزمت أمرها على أن تدفع بالرقي الاجتماعي قدمًا وأن ترفع مستوى الحياة في جو من الحرية أفسح. ولما كانت الدول الأع doclayout-0.4.0.1/udhr/txt/bng.txt0000644000000000000000000000415107346545000015137 0ustar0000000000000000মানবাধিকারের সার্বজনীন ঘোষণাপত্র মুখবন্ধ যেহেতু মানব পরিবারের সকল সদস্যের সমান ও অবিচ্ছেদ্য অধিকারসমূহ এবং সহজাত মর্যাদার স্বীকৃতি‌ই হচ্ছে বিশ্বে শান্তি, স্বাধীনতা এবং ন্যায়বিচারের ভিত্তি; যেহেতু মানব অধিকারের প্রতি অবজ্ঞা এবং ঘৃণার ফলে মানুবের বিবেক লাঞ্ছিত বোধ করে এমন সব বর্বরোচিত ঘটনা সংঘটিত হয়েছে এবং যেহেতু এমন একটি পৃথিবীর উদ্ভবকে সাধারণ মানুষের সর্বোচ্চ কাঙ্খা রূপে ঘোষণা করা হয়েছে, যেখানে সকল মানুষ ধর্ম এবং বাক স্বাধীনতা ভোগ করবে এবং অভাব ও শংকামুক্ত জীবন যাপন করবে; যেহেতু মানুষ যাতে অত্যাচার ও উত্‍পীড়নের মুখে সর্বশেষ উপায় হিসেবে বিদ্রোহ করতে বাধ্য না হয় সেজন্য আ‌ইনের শাসন দ্বারা মানবাধিকার সংরক্ষণ করা অতি প্রয়োজনীয়; যেহেতু জাতিসমূহের মধ্যে বন্ধুত্বপূর্ণ সম্পর্ক উন্নয়নের প্রয়াস গ্রহণ করা অত্যাবশ্যক; যেহেতু সদস্য জাতিসমূহ জাতিসংঘের সনদে মৌলিক মানবাধিকার, মানব দেহের মর্যাদা doclayout-0.4.0.1/udhr/txt/chn.txt0000644000000000000000000000326507346545000015146 0ustar0000000000000000世界人权宣言 联合国大会一九四八年十二月十日第217A(III)号决议通过并颁布 1948 年 12 月 10 日, 联 合 国 大 会 通 过 并 颁 布《 世 界 人 权 宣 言》。 这 一 具 有 历 史 意 义 的《 宣 言》 颁 布 后, 大 会 要 求 所 有 会 员 国 广 为 宣 传, 并 且“ 不 分 国 家 或 领 土 的 政 治 地 位 , 主 要 在 各 级 学 校 和 其 他 教 育 机 构 加 以 传 播、 展 示、 阅 读 和 阐 述。” 《 宣 言 》 全 文 如 下: 序言 鉴于对人类家庭所有成员的固有尊严及其平等的和不移的权利的 承 认, 乃 是 世 界 自 由、 正 义 与 和 平 的 基 础, 鉴 于 对 人 权 的 无 视 和 侮 蔑 已 发 展 为 野 蛮 暴 行, 这 些 暴 行 玷 污 了 人 类 的 良 心, 而 一 个 人 人 享 有 言 论 和 信 仰 自 由 并 免 予 恐 惧 和 匮 乏 的 世 界 的 来 临, 已 被 宣 布 为 普 通 人 民 的 最 高 愿 望, 鉴 于 为 使 人 类 不 致 迫 不 得 已 铤 而 走 险 对 暴 政 和 压 迫 进 行 反 叛, 有 必 要 使 人 权 受 法 治 的 保 护, 鉴 于 有 必 要 促 进 各 国 间 友 好 关 系 的 发 展, 鉴于各联合国国家的人民已在联合国宪章中重申他们对基本人 权、 人 格 尊 严 和 价 值 以 及 男 女 平 等 权 利 的 信 念, 并 决 心 促 成 较 大 自 由 中 的 社 会 进 步 和 生 活 水 平 的 改 善, 鉴于各会员国业已誓愿同联合国合作以促进对人权和基本自由的 普 遍 尊 重 和 遵 行, 鉴于对这些权利和自由的普遍了解对于这个誓愿的充分实现具有 很 大 的 重 要 性, 因 此 现 在, 大 会, 发 doclayout-0.4.0.1/udhr/txt/eng.txt0000644000000000000000000000144107346545000015141 0ustar0000000000000000Universal Declaration of Human Rights Preamble Whereas recognition of the inherent dignity and of the equal and inalienable rights of all members of the human family is the foundation of freedom, justice and peace in the world, Whereas disregard and contempt for human rights have resulted in barbarous acts which have outraged the conscience of mankind, and the advent of a world in which human beings shall enjoy freedom of speech and belief and freedom from fear and want has been proclaimed as the highest aspiration of the common people, Whereas it is essential, if man is not to be compelled to have recourse, as a last resort, to rebellion against tyranny and oppression, that human rights should be protected by the rule of law, Whereas it is essential to promote the development of friendly doclayout-0.4.0.1/udhr/txt/frn.txt0000644000000000000000000000150007346545000015151 0ustar0000000000000000Déclaration universelle des droits de l'homme Préambule Considérant que la reconnaissance de la dignité inhérente à tous les membres de la famille humaine et de leurs droits égaux et inaliénables constitue le fondement de la liberté, de la justice et de la paix dans le monde, Considérant que la méconnaissance et le mépris des droits de l'homme ont conduit à des actes de barbarie qui révoltent la conscience de l'humanité et que l'avènement d'un monde où les êtres humains seront libres de parler et de croire, libérés de la terreur et de la misère, a été proclamé comme la plus haute aspiration de l'homme, Considérant qu'il est essentiel que les droits de l'homme soient protégés par un régime de droit pour que l'homme ne soit pas contraint, en suprême recours, à la révolte contre la tyrannie e doclayout-0.4.0.1/udhr/txt/grk.txt0000644000000000000000000000267207346545000015162 0ustar0000000000000000ΟΙΚΟΥΜΕΝΙΚΗ ΔΙΑΚΗΡΥΞΗ ΓΙΑ ΤΑ ΑΝΘΡΩΠΙΝΑ ΔΙΚΑΙΩΜΑΤΑ 10 ΔΕΚΕΜΒΡΙΟΥ 1948 ΠΡΟΟΙΜΙΟ Επειδή η αναγνώριση της αξιοπρέπειας, που είναι σύμφυτη σε όλα τα μέλη της ανθρώπινης οικογένειας, καθώς και των ίσων και αναπαλλοτρίωτων δικαιωμάτων τους αποτελεί το θεμέλιο της ελευθερίας, της δικαιοσύνης και της ειρήνης στον κόσμο. Επειδή η παραγνώριση και η περιφρόνηση των δικαιωμάτων του ανθρώπου οδήγησαν σε πράξεις βαρβαρότητας, που εξεγείρουν την ανθρώπινη συνείδηση, και η προοπτική ενός κόσμου όπου οι άνθρωποι θα είναι ελεύθεροι να μιλούν και να πιστεύουν, λυτρωμένοι από τον τρόμο και την αθλιότητα, έχει διακηρυχθεί ως η πιο υψηλή επιδίωξη του ανθρώπου. Επειδή έχει ουσιαστική σημασία να προστατεύονται τα ανθρώπινα δικαιώματα από ένα καθεστώς δικαίου, ώστε ο άνθρωπος να μην αναγκάζεται να προσφεύγει, ως έσ doclayout-0.4.0.1/udhr/txt/hnd.txt0000644000000000000000000000405307346545000015143 0ustar0000000000000000 मानव अधिकारों की सार्वभौम घोषणा १० दिसम्बर १९४८ को यूनाइटेड नेशन्स की जनरल असेम्बली ने मानव अधिकारों की सार्वभौम घोषणा को स्वीकृत और घोषित किया । इसका पूर्ण पाठ आगे के पृष्ठों में दिया गया है । इस ऐतिहासिक कार्य के बाद ही असेम्बली ने सभी सदस्य देशों से अपील की कि वे इस घोषणा का प्रचार करें और देशों अथवा प्रदेशों की राजनैतिक स्थिति पर आधारित भेदभाव का विचार किए बिना, विशेषतः स्कूलों और अन्य शिक्षा संस्थाओं में इसके प्रचार, प्रदर्शन, पठन और व्याख्या का प्रबन्ध करें । इसी घोषणा का सरकारी पाठ संयुक्त राष्ट्रों की इन पांच भाषाओं में प्राप्य हैः—अंग्रेजी, चीनी, फ्रांसीसी, रूसी और स्पेनिश । अनुवाद का जो पाठ यहां दिया गया है, वह भारत सरकार द्वारा स्वीकृत है । प्रस्तावना चूंकि मानव परिवार के सभी सदस्यों के जन्मजात गौरव और समान तथा अविच्छिन्न अधिकार की स्वीकृति ही विश्व-शान्ति, न्याय और स्वतन्त्रता doclayout-0.4.0.1/udhr/txt/jpn.txt0000644000000000000000000000442107346545000015160 0ustar0000000000000000『世界人権宣言』 (1948.12.10 第3回国連総会採択) 〈前文〉 人類社会のすべての構成員の固有の尊厳と平等で譲ることのできない権利とを承 認することは、世界における自由、正義及び平和の基礎であるので、 人権の無視及び軽侮が、人類の良心を踏みにじった野蛮行為をもたらし、言論及 び信仰の自由が受けられ、恐怖及び欠乏のない世界の到来が、一般の人々の最高 の願望として宣言されたので、 人間が専制と圧迫とに対する最後の手段として反逆に訴えることがないようにす るためには、法の支配によって人権を保護することが肝要であるので、 諸国間の友好関係の発展を促進することが肝要であるので、 国際連合の諸国民は、国連憲章において、基本的人権、人間の尊厳及び価値並び に男女の同権についての信念を再確認し、かつ、一層大きな自由のうちで社会的 進歩と生活水準の向上とを促進することを決意したので、 加盟国は、国際連合と協力して、人権及び基本的自由の普遍的な尊重及び遵守の 促進を達成することを誓約したので、 これらの権利及び自由に対する共通の理解は、この誓約を完全にするためにもっ とも重要であるので、 よって、ここに、国連総会は、 社会の各個人及び各機関が、この世界人権宣言を常に念頭に置きながら、加盟国 自身の人民の間にも、また、加盟国の管轄下にある地域の人民の間にも、これら の権利と自由との尊重を指導及び教育によって促進すること並びにそれらの普遍 的措置によって確保することに努力するように、すべての人民とすべての国とが 達成すべき共通の基準として、この人権宣言を公布する。 第1条 すべての人間は、生まれながらにして自由であり、かつ、尊厳と権利と について平等である。人間は、理性と良心とを授けられており、互いに同 胞の精神をもって行動しなければならない。 第2条 すべて人は、人種、皮膚の色、性、言 doclayout-0.4.0.1/udhr/txt/kkn.txt0000644000000000000000000000366307346545000015163 0ustar0000000000000000세계인권선언 전문 모든 인류 구성원의 천부의 존엄성과 동등하고 양도할 수 없는 권리를 인정하는 것이 세계의 자유 , 정의 및 평화의 기초이며 , 인권에 대한 무시와 경멸이 인류의 양심을 격분시키는 만행을 초래하였으며 , 인간이 언론과 신앙의 자유, 그리고 공포와 결핍으로부터의 자유를 누릴 수 있는 세계의 도래가 모든 사람들의 지고한 열망으로서 천명되어 왔으며 , 인간이 폭정과 억압에 대항하는 마지막 수단으로서 반란을 일으키도록 강요받지 않으려면 , 법에 의한 통치에 의하여 인권이 보호되어야 하는 것이 필수적이며 , 국가간에 우호관계의 발전을 증진하는 것이 필수적이며 , 국제연합의 모든 사람들은 그 헌장에서 기본적 인권, 인간의 존엄과 가치 , 그리고 남녀의 동등한 권리에 대한 신념을 재확인하였으며, 보다 폭넓은 자유속에서 사회적 진보와 보다 나은 생활수준을 증진하기로 다짐하였고, 회원국들은 국제연합과 협력하여 인권과 기본적 자유의 보편적 존중과 준수를 증진할 것을 스스로 서약하였으며 , 이러한 권리와 자유에 대한 공통의 이해가 이 서약의 완전한 이행을 위하여 가장 중요하므로 , 이에, 국제연합총회는, 모든 개인과 사회 각 기관이 이 선언을 항상 유념하면서 학습 및 교육을 통하여 이러한 권리와 자유에 대한 존중을 증진하기 위하여 노력하며 , 국내적 그리고 국제적인 점진적 조치를 통하여 회원국 국민들 자신과 그 관할 영토의 국민들 사이에서 이러한 권리와 자유가 보편적이고 효과적으로 인식되고 준수되도록 노력하도록 하기 위하여 , 모든 사람과 국가가 성취하여야 할 공통의 기준으로서 이 세계인권선언을 선포한다. 제1조 doclayout-0.4.0.1/udhr/txt/rus.txt0000644000000000000000000000266207346545000015207 0ustar0000000000000000Всеобщая декларация прав человека Принята и провозглашена резолюцией 217 А (III) Генеральной Ассамблеи от 10 декабря 1948 года. ПРЕАМБУЛА Принимая во внимание, что признание достоинства, присущего всем членам человеческой семьи, и равных и неотъемлемых прав их является основой свободы, справедливости и всеобщего мира; и принимая во внимание, что пренебрежение и презрение к правам человека привели к варварским актам, которые возмущают совесть человечества, и что создание такого мира, в котором люди будут иметь свободу слова и убеждений и будут свободны от страха и нужды, провозглашено как высокое стремление людей; и принимая во внимание, что необходимо, чтобы права человека охранялись властью закона в целях обеспечения того, чтобы человек не был вынужден прибегать, в качестве последнего сре doclayout-0.4.0.1/udhr/txt/tcv.txt0000644000000000000000000000422507346545000015167 0ustar0000000000000000 மனித உரிமைகள் பற்றிய உலகப் பிரகடனம் 1948 திசெம்பர் மாதம் 10ஆம் தேதி, ஐக்கிய நாடுகள் பொதுச்சபை, மனித உரிமை பற்றிய உலகப் பிரகடனத்தை ஏற்றுச் சாற்றியது. அப்பிரகடனம் மேல்வரும் பக்கங்களில் முற்றுமுழுதாகத் தரப்படுகின்றது. வரலாற்று முக்கியத்துவம் வாய்ந்த அந்நடவடிக்கையின் பின்னர் சபையானது, பிரகடனத்தை வெளியிடுமாறும், அவ்வாறு வெளியிடப் பெற்றதை "நாடுகள் அல்லது ஆள்பலங்களின் அரசியல் அந்தஸ்துக் காரணமாக எவ்வித வேறுபாடுமில்லாவகையில் எல்லாப் பள்ளிகளிலும் பிற கல்வி நிறுவனங்களிலும் பரப்பவும், காட்சிக்கு வைக்கவும், வாசிக்கச் செய்யவும், விளக்கவும்" செயற்படுமாறு அங்கத்துவ நாடுகள் யாவற்றையும் கேட்டுக் கொண்டது. மனிதக் குடும்பத்தினைச் சேர்ந்த யாவரதும் உள்ளார்ந்த மரியாதையையும், அவர்கள் யாவரதும் சமமான, மாற்றத்திற்குட்படுத்த முடியாத உரிமைகளையும் அங்கீகரித்தலே உலகத்தில் சுதந்திரம், நீதி, அமைதி என்பவற்றுக்கு அடிப்படையா doclayout-0.4.0.1/udhr/txt/tcw.txt0000644000000000000000000000427507346545000015175 0ustar0000000000000000మానవస్వత్వముల సార్వలౌకిక ప్రకటన ప్రస్తావన మానవకుటంబమునందలి వ్యక్తులందరికిని గల ఆజన్మసిద్ధమైన ప్రతిపత్తిని, అనన్యాక్రాంతములగు సమానస్వత్వములను అంగీకరించుట ప్రపంచమున స్వాతంత్ర్య, న్యాయ, శాంతుల స్థాపనకు పునాది యగును. మానవజాతి అంతఃకరణమును క్షోభపెట్టిన ఘోరచర్యలు, మానవస్వత్వములయెడ గలిగిన అవజ్ఞా నిరసన భావముల పరిణామమనియు, వాక్స్వాతంత్ర్య ప్రత్యయస్వాతంత్ర్యములను, భయవిముక్తిని, దారిద్ర్యవిముక్తిని మానవులు ఎల్లరు అనుభవించుటకు వీలగు లోకముయొక్క ఆవిర్భావమే సామాన్యప్రజానీకముయొక్క మహోన్నతమైన అభికాంక్షయనియు ఉద్ఘోషింపబడియున్నది. నిష్ఠురపాలనా ప్రజాపీడనములపై, గత్యంతరము లేక, మానవుడు తిరుగుబాటు చేయవలసిన బలాత్కార పరిస్థితులు ఏర్పడకుండనుండవలయునన్నచో మానవస్వత్వములు విధినియమముచే పరిరక్షితములగుట ముఖ్యము. రాష్ట్రముల మధ్య సౌహార్దబాన్ధవ్యముల అభివృద్ధికి దోహదము చేయుట అత్యావశ్యకము. మానవుల మూలస్వత్వముల యందును, వ్యక్తుల ప్ర doclayout-0.4.0.1/udhr/txt/thj.txt0000644000000000000000000000446307346545000015164 0ustar0000000000000000 ปฏิญญาสากลว่าด้วยสิทธิมนุษยชน คำปรารภ โดยที่การยอมรับนับถือเกียรติศักดิ์ประจำตัว และสิทธิเท่าเทียมกันและโอนมิได้ของบรรดา สมาชิก ทั้ง หลายแห่งครอบครัว มนุษย์เป็นหลักมูลเหตุแห่งอิสรภาพ ความยุติธรรม และสันติภาพในโลก โดยที่การไม่นำพาและการเหยียดหยามต่อสิทธิมนุษยชน ยังผลให้มีการหระทำอันป่าเถื่อน ซี่งเป็นการละเมิดมโนธรรมของมนุษยชาติอย่างร้ายแรง และใต้[ได้]มีการประกาศว่า ปณิธานสูงสุดของสามัญชนได้แก่ความต้องการให้มนุษย์มีชีวิตอยู่ในโลกด้วยอิสรภาพในการพูด และความเชื่อถือ และอิสรภาพพ้นจากความหวาดกลัวและความต้องการ โดยที่เป็นการจำเป็นอย่างยิ่งที่สิทธิมนุษยชนควรได้รับความคุ้มครองโดยหลักบังคับของกฎหมาย ถ้าไม่ประสงค์จะให้คนตกอยู่ในบังคับให้หันเข้าหาการขบถขัดขืนต่อทรราชและการกดขี่เป็นวิถีทางสุดท้าย โดยที่ประชากรแห่งสหประชาชาติได้ยืนยันไว้ในกฎบัตรถึงความเชื่อมั่นในสิทธิมนุษยชนอันเป็นหลักมูล ในเกียรติศักด doclayout-0.4.0.1/udhr/txt/vie.txt0000644000000000000000000000177207346545000015162 0ustar0000000000000000Tuyên ngôn toàn thế giới về nhân quyền của Liên Hợp Quốc Được Đại hội đồng Liên Hợp Quốc thông qua và công bố theo Nghị quyết số 217 (III), ngày 10 tháng 12 năm 1948. Lời nói đầu Với nhận thức rằng: Việc thừa nhận nhân phẩm vốn có, các quyền bình đẳng và không thể tách rời của mọi thành viên trong gia đình nhân loại là cơ sở cho tự do, công bằng và hoà bình trên thế giới, Sự xâm phạm và coi thường nhân quyền đã dẫn đến những hành động tàn bạo xâm phạm tới lương tâm của nhân loại, và việc xây dựng một thế giới trong đó con người được tự do ngôn luận và tín ngưỡng, không còn phải chịu nỗi sợ hãi và cùng cực được coi là nguyện vọng cao cả nhất của loài người, Nhân quyền phải được pháp