markdown-0.1.13.2/0000755000000000000000000000000012524343246011751 5ustar0000000000000000markdown-0.1.13.2/LICENSE0000644000000000000000000000276712524343246012772 0ustar0000000000000000Copyright (c)2011, Michael Snoyman All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Michael Snoyman nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. markdown-0.1.13.2/markdown.cabal0000644000000000000000000000407612524343246014566 0ustar0000000000000000Name: markdown Version: 0.1.13.2 Synopsis: Convert Markdown to HTML, with XSS protection Description: This library leverages existing high-performance libraries (attoparsec, blaze-html, text, and conduit), and should integrate well with existing codebases. Homepage: https://github.com/snoyberg/markdown License: BSD3 License-file: LICENSE Author: Michael Snoyman Maintainer: michael@snoyman.com Category: Web Build-type: Simple Extra-source-files: test/examples/*.html , test/examples/*.md , test/Tests/*.html , test/Tests/*.text Cabal-version: >=1.8 Library Exposed-modules: Text.Markdown Text.Markdown.Block Text.Markdown.Inline other-modules: Text.Markdown.Types Build-depends: base >= 4 && < 5 , blaze-markup >= 0.6 , blaze-html >= 0.4 , attoparsec >= 0.10 , transformers >= 0.2.2 , conduit >= 1.1 , conduit-extra >= 1.1 , text , data-default >= 0.3 , xss-sanitize >= 0.3.3 , containers ghc-options: -Wall test-suite test hs-source-dirs: test main-is: main.hs other-modules: Block Inline type: exitcode-stdio-1.0 ghc-options: -Wall build-depends: markdown , base >= 4 && < 5 , hspec >= 1.3 , blaze-html , text , transformers , conduit , conduit-extra , containers , filepath , directory source-repository head type: git location: git://github.com/snoyberg/markdown.git markdown-0.1.13.2/Setup.hs0000644000000000000000000000005612524343246013406 0ustar0000000000000000import Distribution.Simple main = defaultMain markdown-0.1.13.2/test/0000755000000000000000000000000012524343246012730 5ustar0000000000000000markdown-0.1.13.2/test/main.hs0000644000000000000000000002557112524343246014222 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} import Text.Blaze.Html (toHtml) import Text.Blaze.Html5 (figure) import Test.Hspec import Text.Markdown import Data.Text.Lazy (Text, unpack, snoc, fromStrict) import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import Text.Blaze.Html.Renderer.Text (renderHtml) import Control.Monad (forM_) import qualified Data.Set as Set import qualified Data.Map as Map import Data.List (isInfixOf, isSuffixOf) import Data.Maybe (fromMaybe) import System.Directory (getDirectoryContents) import System.FilePath ((), replaceExtension) import Block import Inline check :: Text -> Text -> Expectation check html md = renderHtml (markdown def md) `shouldBe` html checkSet :: MarkdownSettings -> Text -> Text -> Expectation checkSet set html md = renderHtml (markdown set md) `shouldBe` html check' :: Text -> Text -> Expectation check' html md = renderHtml (markdown def { msXssProtect = False } md) `shouldBe` html checkNoNL :: Text -> Text -> Expectation checkNoNL html md = f (renderHtml $ markdown def { msXssProtect = False } md) `shouldBe` f html where f = TL.filter (/= '\n') -- FIXME add quickcheck: all input is valid main :: IO () main = do examples <- getExamples gruber <- getGruber hspec $ do describe "block" blockSpecs describe "inline" inlineSpecs describe "line break" $ do it "is inserted for a single newline after two spaces" $ check "

Hello
World!

" "Hello \nWorld!" it "is also inserted for a single CRLF after two spaces" $ check "

Hello
World!

" "Hello \r\nWorld!" it "preserves quote nesting of the previous line" $ check "

Q1
Q2

P2

" "> Q1 \nQ2\n\nP2" it "consumes all trailing whitespace on the previous line" $ check "

Hello
World!

" "Hello \nWorld!" describe "paragraphs" $ do it "simple" $ check "

Hello World!

" "Hello World!" it "multiline" $ check "

Hello\nWorld!

" "Hello\nWorld!" it "multiple" $ check "

Hello

World!

" "Hello\n\nWorld!" describe "italics" $ do it "simple" $ check "

foo

" "*foo*" it "hanging" $ check "

foo *

" "*foo* *" it "two" $ check "

foo bar

" "*foo* *bar*" describe "italics under" $ do it "simple" $ check "

foo

" "_foo_" it "hanging" $ check "

foo _

" "_foo_ _" it "two" $ check "

foo bar

" "_foo_ _bar_" describe "bold" $ do it "simple" $ check "

foo

" "**foo**" it "hanging" $ check "

foo **

" "**foo** **" it "two" $ check "

foo bar

" "**foo** **bar**" describe "bold under" $ do it "simple" $ check "

foo

" "__foo__" it "hanging" $ check "

foo __

" "__foo__ __" it "two" $ check "

foo bar

" "__foo__ __bar__" describe "html" $ do it "simple" $ check "
Hello
" "
Hello
" it "dangerous" $ check "
Hello
" "
Hello
" it "dangerous and allowed" $ check' "
Hello
" "
Hello
" let ml = "
foo\nbar\nbaz
" it "multiline" $ check ml ml let close = "
foo\nbar\nbaz" it "autoclose" $ check ml close let close2 = "
foo\nbar\nbaz\n\nparagraph" it "autoclose 2" $ check "
foo\nbar\nbaz

paragraph

" close2 describe "inline code" $ do it "simple" $ check "

foo bar baz

" "foo `bar` baz" describe "code block" $ do it "simple" $ check "
foo\n bar\nbaz
" " foo\n bar\n baz" it "custom renderer" $ checkSet def { msBlockCodeRenderer = (\_ (u,_) -> figure (toHtml u)) } "
foo\n bar\nbaz
" "```haskell\nfoo\n bar\nbaz\n```" describe "escaping" $ do it "everything" $ check "

*foo_barbaz\\`bin

" "\\*foo\\_bar_baz_\\\\\\`bin" describe "bullets" $ do it "simple" $ check "" "* foo\n* bar\n* baz\n" describe "numbers" $ do it "simple" $ check "
  1. foo
  2. bar
  3. baz
" "5. foo\n2. bar\n1. baz\n" describe "headings" $ do it "hashes" $ check "

foo

bar

baz

" "# foo\n\n## bar\n\n###baz" it "trailing hashes" $ check "

foo

" "# foo ####" it "underline" $ check "

foo

bar

" "foo\n=============\n\nbar\n----------------\n" describe "headings with ID" $ do let withHeadingId = def { msAddHeadingId = True } it "without spaces" $ checkSet withHeadingId "

foo

bar

baz

" "# foo\n\n## bar\n\n###baz" it "with spaces" $ checkSet withHeadingId "

Executive summary

" "# Executive summary" it "with special characters" $ checkSet withHeadingId "

Executive summary .!@#$%^*()-_=:

" "# Executive summary .!@#$%^*()-_=:" describe "blockquotes" $ do it "simple" $ check "

foo

bar
" "> foo\n>\n> bar" describe "links" $ do it "simple" $ check "

bar

" "[bar](foo)" it "title" $ check "

bar

" "[bar](foo \"baz\")" it "escaped href" $ check "

bar

" "[bar](foo\\) \"baz\")" it "escaped title" $ check "

bar

" "[bar](foo\\) \"baz\\\"\")" it "inside a paragraph" $ check "

Hello bar World

" "Hello [bar](foo) World" it "not a link" $ check "

Not a [ link

" "Not a [ link" it "new tab" $ checkSet def { msLinkNewTab = True } "

bar

" "[bar](foo)" {- describe "github links" $ do it "simple" $ check "

bar

" "[[bar|foo]]" it "no link text" $ check "

foo

" "[[foo]]" it "escaping" $ check "

bar

" "[[bar|foo/baz bin]]" it "inside a list" $ check "" "* [[foo]]" -} describe "images" $ do it "simple" $ check "

\"foo\"

" "![foo](http://link.to/image.jpg)" it "title" $ check "

\"foo\"

" "![foo](http://link.to/image.jpg \"bar\")" it "inside a paragraph" $ check "

Hello \"foo\" World

" "Hello ![foo](http://link.to/image.jpg) World" it "not an image" $ check "

Not an ![ image

" "Not an ![ image" describe "rules" $ do let options = concatMap (\t -> [t, snoc t '\n']) [ "* * *" , "***" , "*****" , "- - -" , "---------------------------------------" , "----------------------------------" ] forM_ options $ \o -> it (unpack o) $ check "
" o describe "html" $ do it "inline" $ check "

foo
bar

" "foo
bar" it "inline xss" $ check "

foo
bar

" "foo
bar" it "block" $ check "
hello world
" "
hello world
" it "block xss" $ check "alert('evil')" "" it "should be escaped" $ check "

1 < 2

" "1 < 2" it "standalone" $ checkSet def { msStandaloneHtml = Set.fromList ["", ""], msXssProtect = False } "
foo\nbar
" "\n```haskell\nfoo\nbar\n```\n\n" describe "fencing" $ do it "custom fencing" $ checkSet def { msFencedHandlers = Map.union (htmlFencedHandler "@@@" (\clazz -> T.concat ["
"]) (const "
")) (msFencedHandlers def) } "

foo

bar

" "@@@ someclass\nfoo\n\n> bar\n@@@" describe "footnotes" $ do it "inline" $ check "

[1]hello

" "{1}hello" it "references" $ check "

[1]hello

" "{^1}hello" describe "examples" $ sequence_ examples describe "John Gruber's test suite" $ sequence_ gruber it "comments without spaces #22" $ check "" "" getExamples :: IO [Spec] getExamples = do files <- getDirectoryContents dir mapM go $ filter (".md" `isSuffixOf`) files where dir = "test/examples" go basename = do let fp = dir basename input <- TIO.readFile fp output <- TIO.readFile $ replaceExtension fp "html" let (checker, stripper) | "-spec" `isInfixOf` fp = (check', dropFinalLF) | otherwise = (check, T.strip) return $ it basename $ checker (fromStrict $ stripper output) (fromStrict input) dropFinalLF t = fromMaybe t $ T.stripSuffix "\n" t getGruber :: IO [Spec] getGruber = do files <- getDirectoryContents dir mapM go $ filter (".text" `isSuffixOf`) files where dir = "test/Tests" go basename = do let fp = dir basename input <- TIO.readFile fp output <- TIO.readFile $ replaceExtension fp "html" return $ it basename $ checkNoNL (fromStrict $ T.strip output) (fromStrict input) markdown-0.1.13.2/test/Inline.hs0000644000000000000000000000602212524343246014502 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Inline ( inlineSpecs ) where import Test.Hspec import Text.Markdown.Inline import Data.Text (Text) import Data.Monoid (mempty) check :: Text -> [Inline] -> Expectation check md ins = toInline mempty md `shouldBe` ins inlineSpecs :: Spec inlineSpecs = do describe "raw text" $ do it "simple" $ check "raw text" [InlineText "raw text"] it "multiline" $ check "raw\ntext" [InlineText "raw\ntext"] describe "italic" $ do it "asterisk" $ check "raw *text*" [InlineText "raw ", InlineItalic [InlineText "text"]] it "underline" $ check "raw _text_" [InlineText "raw ", InlineItalic [InlineText "text"]] it "multiline" $ check "*raw\ntext*" [InlineItalic [InlineText "raw\ntext"]] it "mismatched" $ check "*foo* *bar" [InlineItalic [InlineText "foo"], InlineText " *bar"] describe "bold" $ do it "asterisk" $ check "raw **text**" [InlineText "raw ", InlineBold [InlineText "text"]] it "underline" $ check "raw __text__" [InlineText "raw ", InlineBold [InlineText "text"]] it "multiline" $ check "**raw\ntext**" [InlineBold [InlineText "raw\ntext"]] it "mismatched" $ check "**foo** *bar" [InlineBold [InlineText "foo"], InlineText " *bar"] describe "nested" $ do it "bold inside italic" $ check "*i __ib__ i*" [InlineItalic [InlineText "i ", InlineBold [InlineText "ib"], InlineText " i"]] it "bold inside italic swap" $ check "_i **ib** i_" [InlineItalic [InlineText "i ", InlineBold [InlineText "ib"], InlineText " i"]] it "italic inside bold" $ check "**b _ib_ b**" [InlineBold [InlineText "b ", InlineItalic [InlineText "ib"], InlineText " b"]] it "italic inside bold swap" $ check "__b *ib* b__" [InlineBold [InlineText "b ", InlineItalic [InlineText "ib"], InlineText " b"]] describe "code" $ do it "takes all characters" $ check "`foo*__*bar` baz`" [ InlineCode "foo*__*bar" , InlineText " baz`" ] describe "escaping" $ do it "asterisk" $ check "\\*foo*\\\\" [InlineText "*foo*\\"] describe "links" $ do it "simple" $ check "[bar](foo)" [InlineLink "foo" Nothing [InlineText "bar"]] it "title" $ check "[bar](foo \"baz\")" [InlineLink "foo" (Just "baz") [InlineText "bar"]] {- it "escaped href" $ check "

bar

" "[bar](foo\\) \"baz\")" it "escaped title" $ check "

bar

" "[bar](foo\\) \"baz\\\"\")" it "inside a paragraph" $ check "

Hello bar World

" "Hello [bar](foo) World" it "not a link" $ check "

Not a [ link

" "Not a [ link" -} markdown-0.1.13.2/test/Block.hs0000644000000000000000000000710312524343246014317 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Block ( blockSpecs ) where import Test.Hspec import Data.Text (Text) import Data.Conduit import qualified Data.Conduit.List as CL import Text.Markdown (def, MarkdownSettings(..)) import Text.Markdown.Block import Data.Functor.Identity (runIdentity) checkWith :: MarkdownSettings -> Text -> [Block Text] -> Expectation checkWith ms md blocks = runIdentity (yield md $$ toBlocks ms =$ CL.consume) `shouldBe` blocks check :: Text -> [Block Text] -> Expectation check = checkWith def blockSpecs :: Spec blockSpecs = do describe "tilde code" $ do it "simple" $ check "~~~haskell\nfoo\n\nbar\n~~~" [BlockCode (Just "haskell") "foo\n\nbar"] it "no lang" $ check "~~~\nfoo\n\nbar\n~~~" [BlockCode Nothing "foo\n\nbar"] it "no close" $ check "~~~\nfoo\n\nbar\n" [BlockPara " ~~~\nfoo", BlockPara "bar"] describe "list" $ do it "simple unordered" $ check "* foo\n\n* bar\n\n*\t\tqux" [ BlockList Unordered (Right [BlockPara "foo"]) , BlockList Unordered (Right [BlockPara "bar"]) , BlockList Unordered (Right [BlockPara "qux"]) ] it "simple ordered" $ check "1. foo\n\n3. bar\n\n17.\t\tqux" [ BlockList Ordered (Right [BlockPara "foo"]) , BlockList Ordered (Right [BlockPara "bar"]) , BlockList Ordered (Right [BlockPara "qux"]) ] it "nested" $ check "* foo\n* \n 1. bar\n 2. baz" [ BlockList Unordered (Left "foo") , BlockList Unordered (Right [ BlockList Ordered $ Left "bar" , BlockList Ordered $ Left "baz" ]) ] it "with blank" $ check "* foo\n\n bar\n\n* baz" [ BlockList Unordered $ Right [ BlockPara "foo" , BlockPara "bar" ] , BlockList Unordered $ Right [ BlockPara "baz" ] ] it "without whitespace" $ check "*foo\n\n1.bar" [ BlockPara "*foo" , BlockPara "1.bar" ] describe "blockquote" $ do it "simple" $ check "> foo\n>\n> * bar" [ BlockQuote [ BlockPara "foo" , BlockList Unordered $ Left "bar" ] ] it "blank" $ check "> foo\n\n> * bar" [ BlockQuote [BlockPara "foo"] , BlockQuote [BlockList Unordered $ Left "bar"] ] it "require blank before blockquote" $ check "foo\n> bar" [ BlockPara "foo\n> bar" ] it "no blank before blockquote" $ checkWith def { msBlankBeforeBlockquote = False } "foo\n> bar" [ BlockPara "foo", BlockQuote [BlockPara "bar"]] describe "indented code" $ do it "simple" $ check " foo\n bar\n" [ BlockCode Nothing "foo\nbar" ] it "blank" $ check " foo\n\n bar\n" [ BlockCode Nothing "foo\n\nbar" ] it "extra space" $ check " foo\n\n bar\n" [ BlockCode Nothing "foo\n\n bar" ] describe "html" $ do it "simple" $ check "

Hello world!

" [ BlockHtml "

Hello world!

" ] it "multiline" $ check "

Hello world!\n

" [ BlockHtml "

Hello world!\n

" ] markdown-0.1.13.2/test/examples/0000755000000000000000000000000012524343246014546 5ustar0000000000000000markdown-0.1.13.2/test/examples/sublists.html0000644000000000000000000000014312524343246017302 0ustar0000000000000000
  1. No encounters
  2. Encounters
    1. First kind
    2. Second kind
markdown-0.1.13.2/test/examples/multiline-paragraphs.md0000644000000000000000000000037412524343246021224 0ustar0000000000000000This is a multiline paragraph. * Multiline paragraph in a list. * The purpose of classy prelude is *not* to encourage writing polymorphic code based on the typeclasses provided. Though it's certainly possible to write code such as: markdown-0.1.13.2/test/examples/entities.md0000644000000000000000000000010012524343246016703 0ustar00000000000000001 < 2 & 2 > 1, also 1 < 2 & 2 > 1   ý &#xP; markdown-0.1.13.2/test/examples/leading-indent.md0000644000000000000000000000005312524343246017750 0ustar0000000000000000``` some random code and other code ``` markdown-0.1.13.2/test/examples/html-blocks-spec9.html0000644000000000000000000000003312524343246020670 0ustar0000000000000000

Foo

bar
markdown-0.1.13.2/test/examples/html-blocks-spec7.html0000644000000000000000000000015612524343246020674 0ustar0000000000000000 markdown-0.1.13.2/test/examples/html-blocks-spec10.md0000644000000000000000000000002712524343246020377 0ustar0000000000000000
bar
*foo* markdown-0.1.13.2/test/examples/html-blocks-spec9.md0000644000000000000000000000002512524343246020325 0ustar0000000000000000Foo
bar
markdown-0.1.13.2/test/examples/html-blocks-spec5.md0000644000000000000000000000003012524343246020315 0ustar0000000000000000 markdown-0.1.13.2/test/examples/html-blocks-spec10.html0000644000000000000000000000002712524343246020743 0ustar0000000000000000
bar
*foo* markdown-0.1.13.2/test/examples/html-blocks-spec8.md0000644000000000000000000000004112524343246020322 0ustar0000000000000000 markdown-0.1.13.2/test/examples/lists-code.html0000644000000000000000000000014312524343246017500 0ustar0000000000000000
  1. hello world
  2. hello

    data Foo
markdown-0.1.13.2/test/examples/double-backtick.md0000644000000000000000000000021212524343246020106 0ustar0000000000000000This is a paragraph with `` double `backtick` inside `` with space. This is a paragraph with ``double `backtick` inside`` without space. markdown-0.1.13.2/test/examples/html-blocks-spec3.md0000644000000000000000000000004612524343246020322 0ustar0000000000000000
*Markdown*
markdown-0.1.13.2/test/examples/html-blocks-spec4.md0000644000000000000000000000004212524343246020317 0ustar0000000000000000
``` c int x = 33; ``` markdown-0.1.13.2/test/examples/lazy.md0000644000000000000000000000006412524343246016047 0ustar0000000000000000> this is lazy 1. This is a list 2. Another item markdown-0.1.13.2/test/examples/lists-code.md0000644000000000000000000000011112524343246017127 0ustar00000000000000001. hello world 2. hello ```haskell data Foo ``` markdown-0.1.13.2/test/examples/closing-tags.md0000644000000000000000000000001512524343246017456 0ustar0000000000000000*

foo

markdown-0.1.13.2/test/examples/tilde-code.md0000644000000000000000000000003412524343246017076 0ustar0000000000000000~~~haskell foo bar baz ~~~ markdown-0.1.13.2/test/examples/html-blocks-spec4.html0000644000000000000000000000004212524343246020663 0ustar0000000000000000
``` c int x = 33; ``` markdown-0.1.13.2/test/examples/multiline-paragraphs.html0000644000000000000000000000042712524343246021567 0ustar0000000000000000

This is a multiline paragraph.

markdown-0.1.13.2/test/examples/html-blocks-spec1.md0000644000000000000000000000011012524343246020310 0ustar0000000000000000
hi
okay. markdown-0.1.13.2/test/examples/fence-whitespace.html0000644000000000000000000000004612524343246020646 0ustar0000000000000000

foo

bar
markdown-0.1.13.2/test/examples/html-blocks-spec3.html0000644000000000000000000000005612524343246020667 0ustar0000000000000000

Markdown

markdown-0.1.13.2/test/examples/sublists.md0000644000000000000000000000010612524343246016735 0ustar00000000000000001. No encounters 2. Encounters 1. First kind 2. Second kind markdown-0.1.13.2/test/examples/lazy.html0000644000000000000000000000014212524343246016410 0ustar0000000000000000

this is lazy

  1. This is a list
  2. Another item
markdown-0.1.13.2/test/examples/html-blocks-spec6.html0000644000000000000000000000003012524343246020662 0ustar0000000000000000 markdown-0.1.13.2/test/examples/double-backtick.html0000644000000000000000000000024612524343246020461 0ustar0000000000000000

This is a paragraph with double `backtick` inside with space.

This is a paragraph with double `backtick` inside without space.

markdown-0.1.13.2/test/examples/html-blocks-spec6.md0000644000000000000000000000003012524343246020316 0ustar0000000000000000 markdown-0.1.13.2/test/examples/leading-indent.html0000644000000000000000000000007312524343246020316 0ustar0000000000000000
   some random code
and other code
markdown-0.1.13.2/test/examples/html-blocks-spec8.html0000644000000000000000000000007112524343246020671 0ustar0000000000000000
<!-- foo -->
markdown-0.1.13.2/test/examples/entities.html0000644000000000000000000000011512524343246017255 0ustar0000000000000000

1 < 2 & 2 > 1, also 1 < 2 & 2 > 1   ý &#xP;

markdown-0.1.13.2/test/examples/html-blocks-spec5.html0000644000000000000000000000003012524343246020661 0ustar0000000000000000 markdown-0.1.13.2/test/examples/html-blocks-spec7.md0000644000000000000000000000015612524343246020330 0ustar0000000000000000 markdown-0.1.13.2/test/examples/html-blocks-spec2.html0000644000000000000000000000004312524343246020662 0ustar0000000000000000
*hello* markdown-0.1.13.2/test/examples/tilde-code.html0000644000000000000000000000006512524343246017446 0ustar0000000000000000
foo
bar

baz
markdown-0.1.13.2/test/examples/html-blocks-spec2.md0000644000000000000000000000004312524343246020316 0ustar0000000000000000
*hello* markdown-0.1.13.2/test/examples/html-blocks-spec11.html0000644000000000000000000000001712524343246020743 0ustar0000000000000000
  • foo

  • markdown-0.1.13.2/test/examples/list-blocks.html0000644000000000000000000000031112524343246017655 0ustar0000000000000000
    • This is a paragraph.

      Another paragraph.

    • Non-paragraph.

    • Item.

      • Sublist item.
        1. Item 1
        2. Item 2
    markdown-0.1.13.2/test/examples/html-blocks-spec1.html0000644000000000000000000000011512524343246020661 0ustar0000000000000000
    hi

    okay.

    markdown-0.1.13.2/test/examples/list-blocks.md0000644000000000000000000000021612524343246017315 0ustar0000000000000000* This is a paragraph. Another paragraph. * Non-paragraph. * Item. * Sublist item. * 1. Item 1 2. Item 2 markdown-0.1.13.2/test/Tests/0000755000000000000000000000000012524343246014032 5ustar0000000000000000markdown-0.1.13.2/test/Tests/Inline HTML (Advanced).text0000644000000000000000000000047012524343246020513 0ustar0000000000000000Simple block on one line:
    foo
    And nested without indentation:
    foo
    bar
    And with attributes:
    This was broken in 1.0.2b7:
    foo
    markdown-0.1.13.2/test/Tests/Links, inline style.text0000644000000000000000000000110212524343246020446 0ustar0000000000000000Just a [URL](/url/). [URL and title](/url/ "title"). [URL and title](/url/ "title preceded by two spaces"). [URL and title](/url/ "title preceded by a tab"). [URL and title](/url/ "title has spaces afterward" ). [URL wrapped in angle brackets](). [URL w/ angle brackets + title]( "Here's the title"). [Empty](). [With parens in the URL](http://en.wikipedia.org/wiki/WIMP_(computing)) (With outer parens and [parens in url](/foo(bar))) [With parens in the URL](/foo(bar) "and a title") (With outer parens and [parens in url](/foo(bar) "and a title")) markdown-0.1.13.2/test/Tests/Links, reference style.html0000644000000000000000000000215112524343246021113 0ustar0000000000000000

    Foo bar.

    Foo bar.

    Foo bar.

    With embedded [brackets].

    Indented once.

    Indented twice.

    Indented thrice.

    Indented [four][] times.

    [four]: /url
    

    this should work

    So should this.

    And this.

    And this.

    And this.

    But not [that] [].

    Nor [that][].

    Nor [that].

    [Something in brackets like this should work]

    [Same with this.]

    In this case, this points to something else.

    Backslashing should suppress [this] and [this].


    Here's one where the link breaks across lines.

    Here's another where the link breaks across lines, but with a line-ending space.

    markdown-0.1.13.2/test/Tests/Inline HTML (Simple).html0000644000000000000000000000143012524343246020214 0ustar0000000000000000

    Here's a simple block:

    foo

    This should be a code block, though:

    <div>
        foo
    </div>
    

    As should this:

    <div>foo</div>
    

    Now, nested:

    foo

    This should just be an HTML comment:

    Multiline:

    Code block:

    <!-- Comment -->
    

    Just plain comment, with trailing spaces on the line:

    Code:

    <hr />
    

    Hr's:










    markdown-0.1.13.2/test/Tests/Nested blockquotes.text0000644000000000000000000000003012524343246020467 0ustar0000000000000000> foo > > > bar > > foo markdown-0.1.13.2/test/Tests/Code Spans.text0000644000000000000000000000024512524343246016660 0ustar0000000000000000`` Fix for backticks within HTML tag: like this Here's how you put `` `backticks` `` in a code span.markdown-0.1.13.2/test/Tests/Hard-wrapped paragraphs with list-like lines.text0000644000000000000000000000030512524343246025272 0ustar0000000000000000In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item. Here's one with a bullet. * criminey. markdown-0.1.13.2/test/Tests/Links, inline style.html0000644000000000000000000000150012524343246020430 0ustar0000000000000000

    Just a URL.

    URL and title.

    URL and title.

    URL and title.

    URL and title.

    URL wrapped in angle brackets.

    URL w/ angle brackets + title.

    Empty.

    With parens in the URL

    (With outer parens and parens in url)

    With parens in the URL

    (With outer parens and parens in url)

    markdown-0.1.13.2/test/Tests/Backslash escapes.text0000644000000000000000000000234212524343246020240 0ustar0000000000000000These should all get escaped: Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: \{ Right brace: \} Left bracket: \[ Right bracket: \] Left paren: \( Right paren: \) Greater-than: \> Hash: \# Period: \. Bang: \! Plus: \+ Minus: \- These should not, because they occur within a code block: Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: \{ Right brace: \} Left bracket: \[ Right bracket: \] Left paren: \( Right paren: \) Greater-than: \> Hash: \# Period: \. Bang: \! Plus: \+ Minus: \- Nor should these, which occur in code spans: Backslash: `\\` Backtick: `` \` `` Asterisk: `\*` Underscore: `\_` Left brace: `\{` Right brace: `\}` Left bracket: `\[` Right bracket: `\]` Left paren: `\(` Right paren: `\)` Greater-than: `\>` Hash: `\#` Period: `\.` Bang: `\!` Plus: `\+` Minus: `\-` These should get escaped, even though they're matching pairs for other Markdown constructs: \*asterisks\* \_underscores\_ \`backticks\` This is a code span with a literal backslash-backtick sequence: `` \` `` This is a tag with unescaped backticks bar. This is a tag with backslashes bar. markdown-0.1.13.2/test/Tests/Code Blocks.html0000644000000000000000000000047012524343246016771 0ustar0000000000000000
    code block on the first line
    

    Regular text.

    code block indented by spaces
    

    Regular text.

    the lines in this block  
    all contain trailing spaces  
    

    Regular Text.

    code block on the last line
    
    markdown-0.1.13.2/test/Tests/Nested blockquotes.html0000644000000000000000000000013112524343246020451 0ustar0000000000000000

    foo

    bar

    foo

    markdown-0.1.13.2/test/Tests/Literal quotes in titles.text0000644000000000000000000000015412524343246021511 0ustar0000000000000000Foo [bar][]. Foo [bar](/url/ "Title with "quotes" inside"). [bar]: /url/ "Title with "quotes" inside" markdown-0.1.13.2/test/Tests/Inline HTML comments.html0000644000000000000000000000027412524343246020474 0ustar0000000000000000

    Paragraph one.

    Paragraph two.

    The end.

    markdown-0.1.13.2/test/Tests/Code Spans.html0000644000000000000000000000035412524343246016641 0ustar0000000000000000

    <test a=" content of attribute ">

    Fix for backticks within HTML tag: like this

    Here's how you put `backticks` in a code span.

    markdown-0.1.13.2/test/Tests/Tidyness.html0000644000000000000000000000020512524343246016517 0ustar0000000000000000

    A list within a blockquote:

    • asterisk 1
    • asterisk 2
    • asterisk 3
    markdown-0.1.13.2/test/Tests/Amps and angle encoding.html0000644000000000000000000000101412524343246021155 0ustar0000000000000000

    AT&T has an ampersand in their name.

    AT&T is another way to write it.

    This & that.

    4 < 5.

    6 > 5.

    Here's a link with an ampersand in the URL.

    Here's a link with an amersand in the link text: AT&T.

    Here's an inline link.

    Here's an inline link.

    markdown-0.1.13.2/test/Tests/Strong and em together.text0000644000000000000000000000015312524343246021122 0ustar0000000000000000***This is strong and em.*** So is ***this*** word. ___This is strong and em.___ So is ___this___ word. markdown-0.1.13.2/test/Tests/Literal quotes in titles.html0000644000000000000000000000024312524343246021470 0ustar0000000000000000

    Foo bar.

    Foo bar.

    markdown-0.1.13.2/test/Tests/Inline HTML comments.text0000644000000000000000000000024412524343246020511 0ustar0000000000000000Paragraph one. Paragraph two. The end. markdown-0.1.13.2/test/Tests/Ordered and unordered lists.html0000644000000000000000000000326712524343246022126 0ustar0000000000000000

    Unordered

    Asterisks tight:

    • asterisk 1
    • asterisk 2
    • asterisk 3

    Asterisks loose:

    • asterisk 1

    • asterisk 2

    • asterisk 3


    Pluses tight:

    • Plus 1
    • Plus 2
    • Plus 3

    Pluses loose:

    • Plus 1

    • Plus 2

    • Plus 3


    Minuses tight:

    • Minus 1
    • Minus 2
    • Minus 3

    Minuses loose:

    • Minus 1

    • Minus 2

    • Minus 3

    Ordered

    Tight:

    1. First
    2. Second
    3. Third

    and:

    1. One
    2. Two
    3. Three

    Loose using tabs:

    1. First

    2. Second

    3. Third

    and using spaces:

    1. One

    2. Two

    3. Three

    Multiple paragraphs:

    1. Item 1, graf one.

      Item 2. graf two. The quick brown fox jumped over the lazy dog's back.

    2. Item 2.

    3. Item 3.

    Nested

    • Tab

      • Tab

        • Tab

    Here's another:

    1. First
    2. Second:
      • Fee
      • Fie
      • Foe
    3. Third

    Same thing but with paragraphs:

    1. First

    2. Second:

      • Fee
      • Fie
      • Foe
    3. Third

    This was an error in Markdown 1.0.1:

    • this

      • sub

      that

    markdown-0.1.13.2/test/Tests/Blockquotes with code blocks.html0000644000000000000000000000031012524343246022272 0ustar0000000000000000

    Example:

    sub status {
        print "working";
    }
    

    Or:

    sub status {
        return "working";
    }
    
    markdown-0.1.13.2/test/Tests/Backslash escapes.html0000644000000000000000000000327012524343246020221 0ustar0000000000000000

    These should all get escaped:

    Backslash: \

    Backtick: `

    Asterisk: *

    Underscore: _

    Left brace: {

    Right brace: }

    Left bracket: [

    Right bracket: ]

    Left paren: (

    Right paren: )

    Greater-than: >

    Hash: #

    Period: .

    Bang: !

    Plus: +

    Minus: -

    These should not, because they occur within a code block:

    Backslash: \\
    
    Backtick: \`
    
    Asterisk: \*
    
    Underscore: \_
    
    Left brace: \{
    
    Right brace: \}
    
    Left bracket: \[
    
    Right bracket: \]
    
    Left paren: \(
    
    Right paren: \)
    
    Greater-than: \>
    
    Hash: \#
    
    Period: \.
    
    Bang: \!
    
    Plus: \+
    
    Minus: \-
    

    Nor should these, which occur in code spans:

    Backslash: \\

    Backtick: \`

    Asterisk: \*

    Underscore: \_

    Left brace: \{

    Right brace: \}

    Left bracket: \[

    Right bracket: \]

    Left paren: \(

    Right paren: \)

    Greater-than: \>

    Hash: \#

    Period: \.

    Bang: \!

    Plus: \+

    Minus: \-

    These should get escaped, even though they're matching pairs for other Markdown constructs:

    *asterisks*

    _underscores_

    `backticks`

    This is a code span with a literal backslash-backtick sequence: \`

    This is a tag with unescaped backticks bar.

    This is a tag with backslashes bar.

    markdown-0.1.13.2/test/Tests/Images.text0000644000000000000000000000067012524343246016150 0ustar0000000000000000![Alt text](/path/to/img.jpg) ![Alt text](/path/to/img.jpg "Optional title") Inline within a paragraph: [alt text](/url/). ![alt text](/url/ "title preceded by two spaces") ![alt text](/url/ "title has spaces afterward" ) ![alt text]() ![alt text]( "with a title"). ![Empty]() ![this is a stupid URL](http://example.com/(parens).jpg) ![alt text][foo] [foo]: /url/ ![alt text][bar] [bar]: /url/ "Title here"markdown-0.1.13.2/test/Tests/Inline HTML (Simple).text0000644000000000000000000000105112524343246020233 0ustar0000000000000000Here's a simple block:
    foo
    This should be a code block, though:
    foo
    As should this:
    foo
    Now, nested:
    foo
    This should just be an HTML comment: Multiline: Code block: Just plain comment, with trailing spaces on the line: Code:
    Hr's:








    markdown-0.1.13.2/test/Tests/Images.html0000644000000000000000000000122612524343246016126 0ustar0000000000000000

    Alt text

    Alt text

    Inline within a paragraph: alt text.

    alt text

    alt text

    alt text

    alt text.

    Empty

    this is a stupid URL

    alt text

    alt text

    markdown-0.1.13.2/test/Tests/Markdown Documentation - Basics.html0000644000000000000000000002225012524343246022577 0ustar0000000000000000

    Markdown: Basics

    Getting the Gist of Markdown's Formatting Syntax

    This page offers a brief overview of what it's like to use Markdown. The syntax page provides complete, detailed documentation for every feature, but Markdown should be very easy to pick up simply by looking at a few examples of it in action. The examples on this page are written in a before/after style, showing example syntax and the HTML output produced by Markdown.

    It's also helpful to simply try Markdown out; the Dingus is a web application that allows you type your own Markdown-formatted text and translate it to XHTML.

    Note: This document is itself written using Markdown; you can see the source for it by adding '.text' to the URL.

    Paragraphs, Headers, Blockquotes

    A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered blank.) Normal paragraphs should not be intended with spaces or tabs.

    Markdown offers two styles of headers: Setext and atx. Setext-style headers for <h1> and <h2> are created by "underlining" with equal signs (=) and hyphens (-), respectively. To create an atx-style header, you put 1-6 hash marks (#) at the beginning of the line -- the number of hashes equals the resulting HTML header level.

    Blockquotes are indicated using email-style '>' angle brackets.

    Markdown:

    A First Level Header
    ====================
    
    A Second Level Header
    ---------------------
    
    Now is the time for all good men to come to
    the aid of their country. This is just a
    regular paragraph.
    
    The quick brown fox jumped over the lazy
    dog's back.
    
    ### Header 3
    
    > This is a blockquote.
    > 
    > This is the second paragraph in the blockquote.
    >
    > ## This is an H2 in a blockquote
    

    Output:

    <h1>A First Level Header</h1>
    
    <h2>A Second Level Header</h2>
    
    <p>Now is the time for all good men to come to
    the aid of their country. This is just a
    regular paragraph.</p>
    
    <p>The quick brown fox jumped over the lazy
    dog's back.</p>
    
    <h3>Header 3</h3>
    
    <blockquote>
        <p>This is a blockquote.</p>
    
        <p>This is the second paragraph in the blockquote.</p>
    
        <h2>This is an H2 in a blockquote</h2>
    </blockquote>
    

    Phrase Emphasis

    Markdown uses asterisks and underscores to indicate spans of emphasis.

    Markdown:

    Some of these words *are emphasized*.
    Some of these words _are emphasized also_.
    
    Use two asterisks for **strong emphasis**.
    Or, if you prefer, __use two underscores instead__.
    

    Output:

    <p>Some of these words <em>are emphasized</em>.
    Some of these words <em>are emphasized also</em>.</p>
    
    <p>Use two asterisks for <strong>strong emphasis</strong>.
    Or, if you prefer, <strong>use two underscores instead</strong>.</p>
    

    Lists

    Unordered (bulleted) lists use asterisks, pluses, and hyphens (*, +, and -) as list markers. These three markers are interchangable; this:

    *   Candy.
    *   Gum.
    *   Booze.
    

    this:

    +   Candy.
    +   Gum.
    +   Booze.
    

    and this:

    -   Candy.
    -   Gum.
    -   Booze.
    

    all produce the same output:

    <ul>
    <li>Candy.</li>
    <li>Gum.</li>
    <li>Booze.</li>
    </ul>
    

    Ordered (numbered) lists use regular numbers, followed by periods, as list markers:

    1.  Red
    2.  Green
    3.  Blue
    

    Output:

    <ol>
    <li>Red</li>
    <li>Green</li>
    <li>Blue</li>
    </ol>
    

    If you put blank lines between items, you'll get <p> tags for the list item text. You can create multi-paragraph list items by indenting the paragraphs by 4 spaces or 1 tab:

    *   A list item.
    
        With multiple paragraphs.
    
    *   Another item in the list.
    

    Output:

    <ul>
    <li><p>A list item.</p>
    <p>With multiple paragraphs.</p></li>
    <li><p>Another item in the list.</p></li>
    </ul>
    

    Links

    Markdown supports two styles for creating links: inline and reference. With both styles, you use square brackets to delimit the text you want to turn into a link.

    Inline-style links use parentheses immediately after the link text. For example:

    This is an [example link](http://example.com/).
    

    Output:

    <p>This is an <a href="http://example.com/">
    example link</a>.</p>
    

    Optionally, you may include a title attribute in the parentheses:

    This is an [example link](http://example.com/ "With a Title").
    

    Output:

    <p>This is an <a href="http://example.com/" title="With a Title">
    example link</a>.</p>
    

    Reference-style links allow you to refer to your links by names, which you define elsewhere in your document:

    I get 10 times more traffic from [Google][1] than from
    [Yahoo][2] or [MSN][3].
    
    [1]: http://google.com/        "Google"
    [2]: http://search.yahoo.com/  "Yahoo Search"
    [3]: http://search.msn.com/    "MSN Search"
    

    Output:

    <p>I get 10 times more traffic from <a href="http://google.com/"
    title="Google">Google</a> than from <a href="http://search.yahoo.com/"
    title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
    title="MSN Search">MSN</a>.</p>
    

    The title attribute is optional. Link names may contain letters, numbers and spaces, but are not case sensitive:

    I start my morning with a cup of coffee and
    [The New York Times][NY Times].
    
    [ny times]: http://www.nytimes.com/
    

    Output:

    <p>I start my morning with a cup of coffee and
    <a href="http://www.nytimes.com/">The New York Times</a>.</p>
    

    Images

    Image syntax is very much like link syntax.

    Inline (titles are optional):

    ![alt text](/path/to/img.jpg "Title")
    

    Reference-style:

    ![alt text][id]
    
    [id]: /path/to/img.jpg "Title"
    

    Both of the above examples produce the same output:

    <img src="/path/to/img.jpg" alt="alt text" title="Title" />
    

    Code

    In a regular paragraph, you can create code span by wrapping text in backtick quotes. Any ampersands (&) and angle brackets (< or >) will automatically be translated into HTML entities. This makes it easy to use Markdown to write about HTML example code:

    I strongly recommend against using any `<blink>` tags.
    
    I wish SmartyPants used named entities like `&mdash;`
    instead of decimal-encoded entites like `&#8212;`.
    

    Output:

    <p>I strongly recommend against using any
    <code>&lt;blink&gt;</code> tags.</p>
    
    <p>I wish SmartyPants used named entities like
    <code>&amp;mdash;</code> instead of decimal-encoded
    entites like <code>&amp;#8212;</code>.</p>
    

    To specify an entire block of pre-formatted code, indent every line of the block by 4 spaces or 1 tab. Just like with code spans, &, <, and > characters will be escaped automatically.

    Markdown:

    If you want your page to validate under XHTML 1.0 Strict,
    you've got to put paragraph tags in your blockquotes:
    
        <blockquote>
            <p>For example.</p>
        </blockquote>
    

    Output:

    <p>If you want your page to validate under XHTML 1.0 Strict,
    you've got to put paragraph tags in your blockquotes:</p>
    
    <pre><code>&lt;blockquote&gt;
        &lt;p&gt;For example.&lt;/p&gt;
    &lt;/blockquote&gt;
    </code></pre>
    
    markdown-0.1.13.2/test/Tests/Links, shortcut references.html0000755000000000000000000000040012524343246022007 0ustar0000000000000000

    This is the simple case.

    This one has a line break.

    This one has a line break with a line-ending space.

    this and the other

    markdown-0.1.13.2/test/Tests/Horizontal rules.html0000644000000000000000000000060112524343246020161 0ustar0000000000000000

    Dashes:





    ---
    




    - - -
    

    Asterisks:





    ***
    




    * * *
    

    Underscores:





    ___
    




    _ _ _
    
    markdown-0.1.13.2/test/Tests/Markdown Documentation - Syntax.html0000644000000000000000000007605112524343246022671 0ustar0000000000000000

    Markdown: Syntax

    Note: This document is itself written using Markdown; you can see the source for it by adding '.text' to the URL.


    Overview

    Philosophy

    Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

    Readability, however, is emphasized above all else. A Markdown-formatted document should be publishable as-is, as plain text, without looking like it's been marked up with tags or formatting instructions. While Markdown's syntax has been influenced by several existing text-to-HTML filters -- including Setext, atx, Textile, reStructuredText, Grutatext, and EtText -- the single biggest source of inspiration for Markdown's syntax is the format of plain text email.

    To this end, Markdown's syntax is comprised entirely of punctuation characters, which punctuation characters have been carefully chosen so as to look like what they mean. E.g., asterisks around a word actually look like *emphasis*. Markdown lists look like, well, lists. Even blockquotes look like quoted passages of text, assuming you've ever used email.

    Inline HTML

    Markdown's syntax is intended for one purpose: to be used as a format for writing for the web.

    Markdown is not a replacement for HTML, or even close to it. Its syntax is very small, corresponding only to a very small subset of HTML tags. The idea is not to create a syntax that makes it easier to insert HTML tags. In my opinion, HTML tags are already easy to insert. The idea for Markdown is to make it easy to read, write, and edit prose. HTML is a publishing format; Markdown is a writing format. Thus, Markdown's formatting syntax only addresses issues that can be conveyed in plain text.

    For any markup that is not covered by Markdown's syntax, you simply use HTML itself. There's no need to preface it or delimit it to indicate that you're switching from Markdown to HTML; you just use the tags.

    The only restrictions are that block-level HTML elements -- e.g. <div>, <table>, <pre>, <p>, etc. -- must be separated from surrounding content by blank lines, and the start and end tags of the block should not be indented with tabs or spaces. Markdown is smart enough not to add extra (unwanted) <p> tags around HTML block-level tags.

    For example, to add an HTML table to a Markdown article:

    This is a regular paragraph.
    
    <table>
        <tr>
            <td>Foo</td>
        </tr>
    </table>
    
    This is another regular paragraph.
    

    Note that Markdown formatting syntax is not processed within block-level HTML tags. E.g., you can't use Markdown-style *emphasis* inside an HTML block.

    Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be used anywhere in a Markdown paragraph, list item, or header. If you want, you can even use HTML tags instead of Markdown formatting; e.g. if you'd prefer to use HTML <a> or <img> tags instead of Markdown's link or image syntax, go right ahead.

    Unlike block-level HTML tags, Markdown syntax is processed within span-level tags.

    Automatic Escaping for Special Characters

    In HTML, there are two characters that demand special treatment: < and &. Left angle brackets are used to start tags; ampersands are used to denote HTML entities. If you want to use them as literal characters, you must escape them as entities, e.g. &lt;, and &amp;.

    Ampersands in particular are bedeviling for web writers. If you want to write about 'AT&T', you need to write 'AT&amp;T'. You even need to escape ampersands within URLs. Thus, if you want to link to:

    http://images.google.com/images?num=30&q=larry+bird
    

    you need to encode the URL as:

    http://images.google.com/images?num=30&amp;q=larry+bird
    

    in your anchor tag href attribute. Needless to say, this is easy to forget, and is probably the single most common source of HTML validation errors in otherwise well-marked-up web sites.

    Markdown allows you to use these characters naturally, taking care of all the necessary escaping for you. If you use an ampersand as part of an HTML entity, it remains unchanged; otherwise it will be translated into &amp;.

    So, if you want to include a copyright symbol in your article, you can write:

    &copy;
    

    and Markdown will leave it alone. But if you write:

    AT&T
    

    Markdown will translate it to:

    AT&amp;T
    

    Similarly, because Markdown supports inline HTML, if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such. But if you write:

    4 < 5
    

    Markdown will translate it to:

    4 &lt; 5
    

    However, inside Markdown code spans and blocks, angle brackets and ampersands are always encoded automatically. This makes it easy to use Markdown to write about HTML code. (As opposed to raw HTML, which is a terrible format for writing about HTML syntax, because every single < and & in your example code needs to be escaped.)


    Block Elements

    Paragraphs and Line Breaks

    A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered blank.) Normal paragraphs should not be intended with spaces or tabs.

    The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a <br /> tag.

    When you do want to insert a <br /> break tag using Markdown, you end a line with two or more spaces, then type return.

    Yes, this takes a tad more effort to create a <br />, but a simplistic "every line break is a <br />" rule wouldn't work for Markdown. Markdown's email-style blockquoting and multi-paragraph list items work best -- and look better -- when you format them with hard breaks.

    Markdown supports two styles of headers, Setext and atx.

    Setext-style headers are "underlined" using equal signs (for first-level headers) and dashes (for second-level headers). For example:

    This is an H1
    =============
    
    This is an H2
    -------------
    

    Any number of underlining ='s or -'s will work.

    Atx-style headers use 1-6 hash characters at the start of the line, corresponding to header levels 1-6. For example:

    # This is an H1
    
    ## This is an H2
    
    ###### This is an H6
    

    Optionally, you may "close" atx-style headers. This is purely cosmetic -- you can use this if you think it looks better. The closing hashes don't even need to match the number of hashes used to open the header. (The number of opening hashes determines the header level.) :

    # This is an H1 #
    
    ## This is an H2 ##
    
    ### This is an H3 ######
    

    Blockquotes

    Markdown uses email-style > characters for blockquoting. If you're familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a > before every line:

    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    > 
    > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    > id sem consectetuer libero luctus adipiscing.
    

    Markdown allows you to be lazy and only put the > before the first line of a hard-wrapped paragraph:

    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    
    > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    id sem consectetuer libero luctus adipiscing.
    

    Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of >:

    > This is the first level of quoting.
    >
    > > This is nested blockquote.
    >
    > Back to the first level.
    

    Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:

    > ## This is a header.
    > 
    > 1.   This is the first list item.
    > 2.   This is the second list item.
    > 
    > Here's some example code:
    > 
    >     return shell_exec("echo $input | $markdown_script");
    

    Any decent text editor should make email-style quoting easy. For example, with BBEdit, you can make a selection and choose Increase Quote Level from the Text menu.

    Lists

    Markdown supports ordered (numbered) and unordered (bulleted) lists.

    Unordered lists use asterisks, pluses, and hyphens -- interchangably -- as list markers:

    *   Red
    *   Green
    *   Blue
    

    is equivalent to:

    +   Red
    +   Green
    +   Blue
    

    and:

    -   Red
    -   Green
    -   Blue
    

    Ordered lists use numbers followed by periods:

    1.  Bird
    2.  McHale
    3.  Parish
    

    It's important to note that the actual numbers you use to mark the list have no effect on the HTML output Markdown produces. The HTML Markdown produces from the above list is:

    <ol>
    <li>Bird</li>
    <li>McHale</li>
    <li>Parish</li>
    </ol>
    

    If you instead wrote the list in Markdown like this:

    1.  Bird
    1.  McHale
    1.  Parish
    

    or even:

    3. Bird
    1. McHale
    8. Parish
    

    you'd get the exact same HTML output. The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to.

    If you do use lazy list numbering, however, you should still start the list with the number 1. At some point in the future, Markdown may support starting ordered lists at an arbitrary number.

    List markers typically start at the left margin, but may be indented by up to three spaces. List markers must be followed by one or more spaces or a tab.

    To make lists look nice, you can wrap items with hanging indents:

    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
        Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
        viverra nec, fringilla in, laoreet vitae, risus.
    *   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
        Suspendisse id sem consectetuer libero luctus adipiscing.
    

    But if you want to be lazy, you don't have to:

    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    viverra nec, fringilla in, laoreet vitae, risus.
    *   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    Suspendisse id sem consectetuer libero luctus adipiscing.
    

    If list items are separated by blank lines, Markdown will wrap the items in <p> tags in the HTML output. For example, this input:

    *   Bird
    *   Magic
    

    will turn into:

    <ul>
    <li>Bird</li>
    <li>Magic</li>
    </ul>
    

    But this:

    *   Bird
    
    *   Magic
    

    will turn into:

    <ul>
    <li><p>Bird</p></li>
    <li><p>Magic</p></li>
    </ul>
    

    List items may consist of multiple paragraphs. Each subsequent paragraph in a list item must be intended by either 4 spaces or one tab:

    1.  This is a list item with two paragraphs. Lorem ipsum dolor
        sit amet, consectetuer adipiscing elit. Aliquam hendrerit
        mi posuere lectus.
    
        Vestibulum enim wisi, viverra nec, fringilla in, laoreet
        vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
        sit amet velit.
    
    2.  Suspendisse id sem consectetuer libero luctus adipiscing.
    

    It looks nice if you indent every line of the subsequent paragraphs, but here again, Markdown will allow you to be lazy:

    *   This is a list item with two paragraphs.
    
        This is the second paragraph in the list item. You're
    only required to indent the first line. Lorem ipsum dolor
    sit amet, consectetuer adipiscing elit.
    
    *   Another item in the same list.
    

    To put a blockquote within a list item, the blockquote's > delimiters need to be indented:

    *   A list item with a blockquote:
    
        > This is a blockquote
        > inside a list item.
    

    To put a code block within a list item, the code block needs to be indented twice -- 8 spaces or two tabs:

    *   A list item with a code block:
    
            <code goes here>
    

    It's worth noting that it's possible to trigger an ordered list by accident, by writing something like this:

    1986. What a great season.
    

    In other words, a number-period-space sequence at the beginning of a line. To avoid this, you can backslash-escape the period:

    1986\. What a great season.
    

    Code Blocks

    Pre-formatted code blocks are used for writing about programming or markup source code. Rather than forming normal paragraphs, the lines of a code block are interpreted literally. Markdown wraps a code block in both <pre> and <code> tags.

    To produce a code block in Markdown, simply indent every line of the block by at least 4 spaces or 1 tab. For example, given this input:

    This is a normal paragraph:
    
        This is a code block.
    

    Markdown will generate:

    <p>This is a normal paragraph:</p>
    
    <pre><code>This is a code block.
    </code></pre>
    

    One level of indentation -- 4 spaces or 1 tab -- is removed from each line of the code block. For example, this:

    Here is an example of AppleScript:
    
        tell application "Foo"
            beep
        end tell
    

    will turn into:

    <p>Here is an example of AppleScript:</p>
    
    <pre><code>tell application "Foo"
        beep
    end tell
    </code></pre>
    

    A code block continues until it reaches a line that is not indented (or the end of the article).

    Within a code block, ampersands (&) and angle brackets (< and >) are automatically converted into HTML entities. This makes it very easy to include example HTML source code using Markdown -- just paste it and indent it, and Markdown will handle the hassle of encoding the ampersands and angle brackets. For example, this:

        <div class="footer">
            &copy; 2004 Foo Corporation
        </div>
    

    will turn into:

    <pre><code>&lt;div class="footer"&gt;
        &amp;copy; 2004 Foo Corporation
    &lt;/div&gt;
    </code></pre>
    

    Regular Markdown syntax is not processed within code blocks. E.g., asterisks are just literal asterisks within a code block. This means it's also easy to use Markdown to write about Markdown's own syntax.

    Horizontal Rules

    You can produce a horizontal rule tag (<hr />) by placing three or more hyphens, asterisks, or underscores on a line by themselves. If you wish, you may use spaces between the hyphens or asterisks. Each of the following lines will produce a horizontal rule:

    * * *
    
    ***
    
    *****
    
    - - -
    
    ---------------------------------------
    
    _ _ _
    

    Span Elements

    Markdown supports two style of links: inline and reference.

    In both styles, the link text is delimited by [square brackets].

    To create an inline link, use a set of regular parentheses immediately after the link text's closing square bracket. Inside the parentheses, put the URL where you want the link to point, along with an optional title for the link, surrounded in quotes. For example:

    This is [an example](http://example.com/ "Title") inline link.
    
    [This link](http://example.net/) has no title attribute.
    

    Will produce:

    <p>This is <a href="http://example.com/" title="Title">
    an example</a> inline link.</p>
    
    <p><a href="http://example.net/">This link</a> has no
    title attribute.</p>
    

    If you're referring to a local resource on the same server, you can use relative paths:

    See my [About](/about/) page for details.
    

    Reference-style links use a second set of square brackets, inside which you place a label of your choosing to identify the link:

    This is [an example][id] reference-style link.
    

    You can optionally use a space to separate the sets of brackets:

    This is [an example] [id] reference-style link.
    

    Then, anywhere in the document, you define your link label like this, on a line by itself:

    [id]: http://example.com/  "Optional Title Here"
    

    That is:

    • Square brackets containing the link identifier (optionally indented from the left margin using up to three spaces);
    • followed by a colon;
    • followed by one or more spaces (or tabs);
    • followed by the URL for the link;
    • optionally followed by a title attribute for the link, enclosed in double or single quotes.

    The link URL may, optionally, be surrounded by angle brackets:

    [id]: <http://example.com/>  "Optional Title Here"
    

    You can put the title attribute on the next line and use extra spaces or tabs for padding, which tends to look better with longer URLs:

    [id]: http://example.com/longish/path/to/resource/here
        "Optional Title Here"
    

    Link definitions are only used for creating links during Markdown processing, and are stripped from your document in the HTML output.

    Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are not case sensitive. E.g. these two links:

    [link text][a]
    [link text][A]
    

    are equivalent.

    The implicit link name shortcut allows you to omit the name of the link, in which case the link text itself is used as the name. Just use an empty set of square brackets -- e.g., to link the word "Google" to the google.com web site, you could simply write:

    [Google][]
    

    And then define the link:

    [Google]: http://google.com/
    

    Because link names may contain spaces, this shortcut even works for multiple words in the link text:

    Visit [Daring Fireball][] for more information.
    

    And then define the link:

    [Daring Fireball]: http://daringfireball.net/
    

    Link definitions can be placed anywhere in your Markdown document. I tend to put them immediately after each paragraph in which they're used, but if you want, you can put them all at the end of your document, sort of like footnotes.

    Here's an example of reference links in action:

    I get 10 times more traffic from [Google] [1] than from
    [Yahoo] [2] or [MSN] [3].
    
      [1]: http://google.com/        "Google"
      [2]: http://search.yahoo.com/  "Yahoo Search"
      [3]: http://search.msn.com/    "MSN Search"
    

    Using the implicit link name shortcut, you could instead write:

    I get 10 times more traffic from [Google][] than from
    [Yahoo][] or [MSN][].
    
      [google]: http://google.com/        "Google"
      [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
      [msn]:    http://search.msn.com/    "MSN Search"
    

    Both of the above examples will produce the following HTML output:

    <p>I get 10 times more traffic from <a href="http://google.com/"
    title="Google">Google</a> than from
    <a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
    or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
    

    For comparison, here is the same paragraph written using Markdown's inline link style:

    I get 10 times more traffic from [Google](http://google.com/ "Google")
    than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
    [MSN](http://search.msn.com/ "MSN Search").
    

    The point of reference-style links is not that they're easier to write. The point is that with reference-style links, your document source is vastly more readable. Compare the above examples: using reference-style links, the paragraph itself is only 81 characters long; with inline-style links, it's 176 characters; and as raw HTML, it's 234 characters. In the raw HTML, there's more markup than there is text.

    With Markdown's reference-style links, a source document much more closely resembles the final output, as rendered in a browser. By allowing you to move the markup-related metadata out of the paragraph, you can add links without interrupting the narrative flow of your prose.

    Emphasis

    Markdown treats asterisks (*) and underscores (_) as indicators of emphasis. Text wrapped with one * or _ will be wrapped with an HTML <em> tag; double *'s or _'s will be wrapped with an HTML <strong> tag. E.g., this input:

    *single asterisks*
    
    _single underscores_
    
    **double asterisks**
    
    __double underscores__
    

    will produce:

    <em>single asterisks</em>
    
    <em>single underscores</em>
    
    <strong>double asterisks</strong>
    
    <strong>double underscores</strong>
    

    You can use whichever style you prefer; the lone restriction is that the same character must be used to open and close an emphasis span.

    Emphasis can be used in the middle of a word:

    un*fucking*believable
    

    But if you surround an * or _ with spaces, it'll be treated as a literal asterisk or underscore.

    To produce a literal asterisk or underscore at a position where it would otherwise be used as an emphasis delimiter, you can backslash escape it:

    \*this text is surrounded by literal asterisks\*
    

    Code

    To indicate a span of code, wrap it with backtick quotes (`). Unlike a pre-formatted code block, a code span indicates code within a normal paragraph. For example:

    Use the `printf()` function.
    

    will produce:

    <p>Use the <code>printf()</code> function.</p>
    

    To include a literal backtick character within a code span, you can use multiple backticks as the opening and closing delimiters:

    ``There is a literal backtick (`) here.``
    

    which will produce this:

    <p><code>There is a literal backtick (`) here.</code></p>
    

    The backtick delimiters surrounding a code span may include spaces -- one after the opening, one before the closing. This allows you to place literal backtick characters at the beginning or end of a code span:

    A single backtick in a code span: `` ` ``
    
    A backtick-delimited string in a code span: `` `foo` ``
    

    will produce:

    <p>A single backtick in a code span: <code>`</code></p>
    
    <p>A backtick-delimited string in a code span: <code>`foo`</code></p>
    

    With a code span, ampersands and angle brackets are encoded as HTML entities automatically, which makes it easy to include example HTML tags. Markdown will turn this:

    Please don't use any `<blink>` tags.
    

    into:

    <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
    

    You can write this:

    `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
    

    to produce:

    <p><code>&amp;#8212;</code> is the decimal-encoded
    equivalent of <code>&amp;mdash;</code>.</p>
    

    Images

    Admittedly, it's fairly difficult to devise a "natural" syntax for placing images into a plain text document format.

    Markdown uses an image syntax that is intended to resemble the syntax for links, allowing for two styles: inline and reference.

    Inline image syntax looks like this:

    ![Alt text](/path/to/img.jpg)
    
    ![Alt text](/path/to/img.jpg "Optional title")
    

    That is:

    • An exclamation mark: !;
    • followed by a set of square brackets, containing the alt attribute text for the image;
    • followed by a set of parentheses, containing the URL or path to the image, and an optional title attribute enclosed in double or single quotes.

    Reference-style image syntax looks like this:

    ![Alt text][id]
    

    Where "id" is the name of a defined image reference. Image references are defined using syntax identical to link references:

    [id]: url/to/image  "Optional title attribute"
    

    As of this writing, Markdown has no syntax for specifying the dimensions of an image; if this is important to you, you can simply use regular HTML <img> tags.


    Miscellaneous

    Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

    <http://example.com/>
    

    Markdown will turn this into:

    <a href="http://example.com/">http://example.com/</a>
    

    Automatic links for email addresses work similarly, except that Markdown will also perform a bit of randomized decimal and hex entity-encoding to help obscure your address from address-harvesting spambots. For example, Markdown will turn this:

    <address@example.com>
    

    into something like this:

    <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
    &#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
    &#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
    &#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
    

    which will render in a browser as a clickable link to "address@example.com".

    (This sort of entity-encoding trick will indeed fool many, if not most, address-harvesting bots, but it definitely won't fool all of them. It's better than nothing, but an address published in this way will probably eventually start receiving spam.)

    Backslash Escapes

    Markdown allows you to use backslash escapes to generate literal characters which would otherwise have special meaning in Markdown's formatting syntax. For example, if you wanted to surround a word with literal asterisks (instead of an HTML <em> tag), you can backslashes before the asterisks, like this:

    \*literal asterisks\*
    

    Markdown provides backslash escapes for the following characters:

    \   backslash
    `   backtick
    *   asterisk
    _   underscore
    {}  curly braces
    []  square brackets
    ()  parentheses
    #   hash mark
    +   plus sign
    -   minus sign (hyphen)
    .   dot
    !   exclamation mark
    
    markdown-0.1.13.2/test/Tests/Strong and em together.html0000644000000000000000000000024712524343246021106 0ustar0000000000000000

    This is strong and em.

    So is this word.

    This is strong and em.

    So is this word.

    markdown-0.1.13.2/test/Tests/Links, reference style.text0000644000000000000000000000142712524343246021140 0ustar0000000000000000Foo [bar] [1]. Foo [bar][1]. Foo [bar] [1]. [1]: /url/ "Title" With [embedded [brackets]] [b]. Indented [once][]. Indented [twice][]. Indented [thrice][]. Indented [four][] times. [once]: /url [twice]: /url [thrice]: /url [four]: /url [b]: /url/ * * * [this] [this] should work So should [this][this]. And [this] []. And [this][]. And [this]. But not [that] []. Nor [that][]. Nor [that]. [Something in brackets like [this][] should work] [Same with [this].] In this case, [this](/somethingelse/) points to something else. Backslashing should suppress \[this] and [this\]. [this]: foo * * * Here's one where the [link breaks] across lines. Here's another where the [link breaks] across lines, but with a line-ending space. [link breaks]: /url/ markdown-0.1.13.2/test/Tests/Tabs.text0000644000000000000000000000046712524343246015640 0ustar0000000000000000+ this is a list item indented with tabs + this is a list item indented with spaces Code: this code block is indented by one tab And: this code block is indented by two tabs And: + this is an example list item indented with tabs + this is an example list item indented with spaces markdown-0.1.13.2/test/Tests/Inline HTML (Advanced).html0000644000000000000000000000053212524343246020472 0ustar0000000000000000

    Simple block on one line:

    foo

    And nested without indentation:

    foo
    bar

    And with attributes:

    This was broken in 1.0.2b7:

    foo
    markdown-0.1.13.2/test/Tests/Horizontal rules.text0000644000000000000000000000041612524343246020205 0ustar0000000000000000Dashes: --- --- --- --- --- - - - - - - - - - - - - - - - Asterisks: *** *** *** *** *** * * * * * * * * * * * * * * * Underscores: ___ ___ ___ ___ ___ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ markdown-0.1.13.2/test/Tests/Links, shortcut references.text0000755000000000000000000000035412524343246022037 0ustar0000000000000000This is the [simple case]. [simple case]: /simple This one has a [line break]. This one has a [line break] with a line-ending space. [line break]: /foo [this] [that] and the [other] [this]: /this [that]: /that [other]: /other markdown-0.1.13.2/test/Tests/Code Blocks.text0000644000000000000000000000030712524343246017010 0ustar0000000000000000 code block on the first line Regular text. code block indented by spaces Regular text. the lines in this block all contain trailing spaces Regular Text. code block on the last linemarkdown-0.1.13.2/test/Tests/Tidyness.text0000644000000000000000000000011612524343246016540 0ustar0000000000000000> A list within a blockquote: > > * asterisk 1 > * asterisk 2 > * asterisk 3 markdown-0.1.13.2/test/Tests/Auto links.text0000644000000000000000000000040712524343246016752 0ustar0000000000000000Link: . With an ampersand: * In a list? * * It should. > Blockquoted: Auto-links should not occur here: `` or here: markdown-0.1.13.2/test/Tests/Auto links.html0000644000000000000000000000104012524343246016724 0ustar0000000000000000

    Link: http://example.com/.

    With an ampersand: http://example.com/?foo=1&bar=2

    Blockquoted: http://example.com/

    Auto-links should not occur here: <http://example.com/>

    or here: <http://example.com/>
    
    markdown-0.1.13.2/test/Tests/Amps and angle encoding.text0000644000000000000000000000057612524343246021211 0ustar0000000000000000AT&T has an ampersand in their name. AT&T is another way to write it. This & that. 4 < 5. 6 > 5. Here's a [link] [1] with an ampersand in the URL. Here's a link with an amersand in the link text: [AT&T] [2]. Here's an inline [link](/script?foo=1&bar=2). Here's an inline [link](). [1]: http://example.com/?foo=1&bar=2 [2]: http://att.com/ "AT&T" markdown-0.1.13.2/test/Tests/Tabs.html0000644000000000000000000000066712524343246015622 0ustar0000000000000000
    • this is a list item indented with tabs

    • this is a list item indented with spaces

    Code:

    this code block is indented by one tab
    

    And:

        this code block is indented by two tabs
    

    And:

    +   this is an example list item
        indented with tabs
    
    +   this is an example list item
        indented with spaces
    
    markdown-0.1.13.2/test/Tests/Hard-wrapped paragraphs with list-like lines.html0000644000000000000000000000032712524343246025256 0ustar0000000000000000

    In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

    Here's one with a bullet. * criminey.

    markdown-0.1.13.2/test/Tests/Blockquotes with code blocks.text0000644000000000000000000000020712524343246022317 0ustar0000000000000000> Example: > > sub status { > print "working"; > } > > Or: > > sub status { > return "working"; > } markdown-0.1.13.2/Text/0000755000000000000000000000000012524343246012675 5ustar0000000000000000markdown-0.1.13.2/Text/Markdown.hs0000644000000000000000000001461512524343246015022 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} module Text.Markdown ( -- * Functions markdown -- * Settings , MarkdownSettings , msXssProtect , msStandaloneHtml , msFencedHandlers , msBlockCodeRenderer , msLinkNewTab , msBlankBeforeBlockquote , msBlockFilter , msAddHeadingId -- * Newtype , Markdown (..) -- * Fenced handlers , FencedHandler (..) , codeFencedHandler , htmlFencedHandler -- * Convenience re-exports , def ) where import Control.Arrow ((&&&)) import Text.Markdown.Inline import Text.Markdown.Block import Text.Markdown.Types import Prelude hiding (sequence, takeWhile) import Data.Char (isAlphaNum) import Data.Default (Default (..)) import Data.List (intercalate, isInfixOf) import Data.Text (Text) import qualified Data.Text.Lazy as TL import Text.Blaze (toValue) import Text.Blaze.Html (ToMarkup (..), Html) import Text.Blaze.Html.Renderer.Text (renderHtml) import Data.Conduit import qualified Data.Conduit.List as CL import Data.Monoid (Monoid (mappend, mempty, mconcat)) import Data.Functor.Identity (runIdentity) import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as HA import Text.HTML.SanitizeXSS (sanitizeBalance) import qualified Data.Map as Map import Data.String (IsString) -- | A newtype wrapper providing a @ToHtml@ instance. newtype Markdown = Markdown TL.Text deriving(Eq, Ord, Monoid, IsString, Show) instance ToMarkup Markdown where toMarkup (Markdown t) = markdown def t -- | Convert the given textual markdown content to HTML. -- -- >>> :set -XOverloadedStrings -- >>> import Text.Blaze.Html.Renderer.Text -- >>> renderHtml $ markdown def "# Hello World!" -- "

    Hello World!

    " -- -- >>> renderHtml $ markdown def { msXssProtect = False } "" -- "" markdown :: MarkdownSettings -> TL.Text -> Html markdown ms tl = sanitize $ runIdentity $ CL.sourceList blocksH $= toHtmlB ms $$ CL.fold mappend mempty where sanitize | msXssProtect ms = preEscapedToMarkup . sanitizeBalance . TL.toStrict . renderHtml | otherwise = id blocksH :: [Block Html] blocksH = processBlocks blocks blocks :: [Block Text] blocks = runIdentity $ CL.sourceList (TL.toChunks tl) $$ toBlocks ms =$ CL.consume processBlocks :: [Block Text] -> [Block Html] processBlocks = map (fmap $ toHtmlI ms) . msBlockFilter ms . map (fmap $ intercalate [InlineHtml "
    "]) . map (fmap $ map $ toInline refs) . map toBlockLines refs = Map.unions $ map toRef blocks where toRef (BlockReference x y) = Map.singleton x y toRef _ = Map.empty data MState = NoState | InList ListType toHtmlB :: Monad m => MarkdownSettings -> Conduit (Block Html) m Html toHtmlB ms = loop NoState where loop state = await >>= maybe (closeState state) (\x -> do state' <- getState state x yield $ go x loop state') closeState NoState = return () closeState (InList Unordered) = yield $ escape "" closeState (InList Ordered) = yield $ escape "" getState NoState (BlockList ltype _) = do yield $ escape $ case ltype of Unordered -> "
      " Ordered -> "
        " return $ InList ltype getState NoState _ = return NoState getState state@(InList lt1) b@(BlockList lt2 _) | lt1 == lt2 = return state | otherwise = closeState state >> getState NoState b getState state@(InList _) _ = closeState state >> return NoState go (BlockPara h) = H.p h go (BlockPlainText h) = h go (BlockList _ (Left h)) = H.li h go (BlockList _ (Right bs)) = H.li $ blocksToHtml bs go (BlockHtml t) = escape t go (BlockCode a b) = msBlockCodeRenderer ms a (id &&& toMarkup $ b) go (BlockQuote bs) = H.blockquote $ blocksToHtml bs go BlockRule = H.hr go (BlockHeading level h) | msAddHeadingId ms = wrap level H.! HA.id (clean h) $ h | otherwise = wrap level h where wrap 1 = H.h1 wrap 2 = H.h2 wrap 3 = H.h3 wrap 4 = H.h4 wrap 5 = H.h5 wrap _ = H.h6 isValidChar c = isAlphaNum c || isInfixOf [c] "-_:." clean = toValue . TL.filter isValidChar . (TL.replace " " "-") . TL.toLower . renderHtml go BlockReference{} = return () blocksToHtml bs = runIdentity $ mapM_ yield bs $$ toHtmlB ms =$ CL.fold mappend mempty escape :: Text -> Html escape = preEscapedToMarkup toHtmlI :: MarkdownSettings -> [Inline] -> Html toHtmlI ms is0 | msXssProtect ms = escape $ sanitizeBalance $ TL.toStrict $ renderHtml final | otherwise = final where final = gos is0 gos = mconcat . map go go (InlineText t) = toMarkup t go (InlineItalic is) = H.i $ gos is go (InlineBold is) = H.b $ gos is go (InlineCode t) = H.code $ toMarkup t go (InlineLink url Nothing content) | msLinkNewTab ms = H.a H.! HA.href (H.toValue url) H.! HA.target "_blank" $ gos content | otherwise = H.a H.! HA.href (H.toValue url) $ gos content go (InlineLink url (Just title) content) | msLinkNewTab ms = H.a H.! HA.href (H.toValue url) H.! HA.title (H.toValue title) H.! HA.target "_blank" $ gos content | otherwise = H.a H.! HA.href (H.toValue url) H.! HA.title (H.toValue title) $ gos content go (InlineImage url Nothing content) = H.img H.! HA.src (H.toValue url) H.! HA.alt (H.toValue content) go (InlineImage url (Just title) content) = H.img H.! HA.src (H.toValue url) H.! HA.alt (H.toValue content) H.! HA.title (H.toValue title) go (InlineHtml t) = escape t go (InlineFootnoteRef x) = let ishown = TL.pack (show x) (<>) = mappend in H.a H.! HA.href (H.toValue $ "#footnote-" <> ishown) H.! HA.id (H.toValue $ "ref-" <> ishown) $ H.toHtml $ "[" <> ishown <> "]" go (InlineFootnote x) = let ishown = TL.pack (show x) (<>) = mappend in H.a H.! HA.href (H.toValue $ "#ref-" <> ishown) H.! HA.id (H.toValue $ "footnote-" <> ishown) $ H.toHtml $ "[" <> ishown <> "]" markdown-0.1.13.2/Text/Markdown/0000755000000000000000000000000012524343246014457 5ustar0000000000000000markdown-0.1.13.2/Text/Markdown/Types.hs0000644000000000000000000001631512524343246016125 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} module Text.Markdown.Types where import Data.Text (Text) import qualified Data.Text as T import Data.Default (Default (def)) import Data.Set (Set, empty) import Data.Map (Map, singleton) import Data.Monoid (mappend) import Text.Blaze.Html (Html) import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as HA -- | A settings type providing various configuration options. -- -- See for more information on -- settings types. In general, you can use @def@. data MarkdownSettings = MarkdownSettings { msXssProtect :: Bool -- ^ Whether to automatically apply XSS protection to embedded HTML. Default: @True@. , msStandaloneHtml :: Set Text -- ^ HTML snippets which stand on their own. We do not require a blank line following these pieces of HTML. -- -- Default: empty set. -- -- Since: 0.1.2 , msFencedHandlers :: Map Text (Text -> FencedHandler) -- ^ Handlers for the special \"fenced\" format. This is most commonly -- used for fenced code, e.g.: -- -- > ```haskell -- > main = putStrLn "Hello" -- > ``` -- -- This is an extension of Markdown, but a fairly commonly used one. -- -- This setting allows you to create new kinds of fencing. Fencing goes -- into two categories: parsed and raw. Code fencing would be in the raw -- category, where the contents are not treated as Markdown. Parsed will -- treat the contents as Markdown and allow you to perform some kind of -- modifcation to it. -- -- For example, to create a new @\@\@\@@ fencing which wraps up the -- contents in an @article@ tag, you could use: -- -- > def { msFencedHandlers = htmlFencedHandler "@@@" (const "
        ") (const " `Map.union` msFencedHandlers def -- > } -- -- Default: code fencing for @```@ and @~~~@. -- -- Since: 0.1.2 , msBlockCodeRenderer :: Maybe Text -> (Text,Html) -> Html -- ^ A rendering function through which code blocks are passed. -- -- The arguments are the block's language, if any, and the tuple -- @(unrendered content, rendered content)@. For example, if you wanted to pass -- code blocks in your markdown text through a highlighter like @highlighting-kate@, -- you might do something like: -- -- >>> :set -XOverloadedStrings -- >>> let renderer lang (src,_) = formatHtmlBlock defaultFormatOpts $ highlightAs (maybe "text" unpack lang) $ unpack src -- >>> let md = markdown def { msBlockCodeRenderer = renderer } "``` haskell\nmain = putStrLn \"Hello world!\"\n```" -- >>> putStrLn $ renderHtml md --
        main = putStrLn "Hello world!"
        -- -- Since: 0.1.2.1 , msLinkNewTab :: Bool -- ^ If @True@, all generated links have the attribute target=_blank set, -- causing them to be opened in a new tab or window. -- -- Default: @False@ -- -- Since 0.1.4 , msBlankBeforeBlockquote :: Bool -- ^ If @True@, a blank line is required before the start of a blockquote. Standard -- markdown syntax does not require a blank line before a blockquote, but it is all -- too easy for a > to end up at the beginning of a line by accident. -- -- Default: @True@ -- -- Since 0.1.5 , msBlockFilter :: [Block [Inline]] -> [Block [Inline]] -- ^ A function to filter and/or modify parsed blocks before they are -- written to Html -- -- Default: @id@ -- -- Since 0.1.7 , msAddHeadingId :: Bool -- ^ If @True@, an @id@ attribute is added to the heading tag with the value equal to -- the text with only valid CSS identifier characters. -- -- > ## Executive Summary -- -- >

        Executive Summary

        -- -- Default: @False@ -- -- Since 0.1.13 } -- | See 'msFencedHandlers. -- -- Since 0.1.2 data FencedHandler = FHRaw (Text -> [Block Text]) -- ^ Wrap up the given raw content. | FHParsed ([Block Text] -> [Block Text]) -- ^ Wrap up the given parsed content. instance Default MarkdownSettings where def = MarkdownSettings { msXssProtect = True , msStandaloneHtml = empty , msFencedHandlers = codeFencedHandler "```" `mappend` codeFencedHandler "~~~" , msBlockCodeRenderer = \lang (_,rendered) -> case lang of Just l -> H.pre $ H.code H.! HA.class_ (H.toValue l) $ rendered Nothing -> H.pre $ H.code $ rendered , msLinkNewTab = False , msBlankBeforeBlockquote = True , msBlockFilter = id , msAddHeadingId = False } -- | Helper for creating a 'FHRaw'. -- -- Since 0.1.2 codeFencedHandler :: Text -- ^ Delimiter -> Map Text (Text -> FencedHandler) codeFencedHandler key = singleton key $ \lang -> FHRaw $ return . BlockCode (if T.null lang then Nothing else Just lang) -- | Helper for creating a 'FHParsed'. -- -- Note that the start and end parameters take a @Text@ parameter; this is the -- text following the delimiter. For example, with the markdown: -- -- > @@@ foo -- -- @foo@ would be passed to start and end. -- -- Since 0.1.2 htmlFencedHandler :: Text -- ^ Delimiter -> (Text -> Text) -- ^ start HTML -> (Text -> Text) -- ^ end HTML -> Map Text (Text -> FencedHandler) htmlFencedHandler key start end = singleton key $ \lang -> FHParsed $ \blocks -> BlockHtml (start lang) : blocks ++ [BlockHtml $ end lang] data ListType = Ordered | Unordered deriving (Show, Eq) data Block inline = BlockPara inline | BlockList ListType (Either inline [Block inline]) | BlockCode (Maybe Text) Text | BlockQuote [Block inline] | BlockHtml Text | BlockRule | BlockHeading Int inline | BlockReference Text Text | BlockPlainText inline deriving (Show, Eq) instance Functor Block where fmap f (BlockPara i) = BlockPara (f i) fmap f (BlockList lt (Left i)) = BlockList lt $ Left $ f i fmap f (BlockList lt (Right bs)) = BlockList lt $ Right $ map (fmap f) bs fmap _ (BlockCode a b) = BlockCode a b fmap f (BlockQuote bs) = BlockQuote $ map (fmap f) bs fmap _ (BlockHtml t) = BlockHtml t fmap _ BlockRule = BlockRule fmap f (BlockHeading level i) = BlockHeading level (f i) fmap _ (BlockReference x y) = BlockReference x y fmap f (BlockPlainText x) = BlockPlainText (f x) data Inline = InlineText Text | InlineItalic [Inline] | InlineBold [Inline] | InlineCode Text | InlineHtml Text | InlineLink Text (Maybe Text) [Inline] -- ^ URL, title, content | InlineImage Text (Maybe Text) Text -- ^ URL, title, content | InlineFootnoteRef Integer -- ^ The footnote reference in the body | InlineFootnote Integer deriving (Show, Eq) markdown-0.1.13.2/Text/Markdown/Inline.hs0000644000000000000000000001640512524343246016237 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_HADDOCK hide #-} module Text.Markdown.Inline ( Inline (..) , inlineParser , toInline ) where import Prelude hiding (takeWhile) import Data.Text (Text) import qualified Data.Text as T import Data.Attoparsec.Text import Control.Applicative import Data.Monoid (Monoid, mappend) import qualified Data.Map as Map import Text.Markdown.Types (Inline(..)) type RefMap = Map.Map Text Text toInline :: RefMap -> Text -> [Inline] toInline refmap t = case parseOnly (inlineParser refmap) t of Left s -> [InlineText $ T.pack s] Right is -> is (<>) :: Monoid m => m -> m -> m (<>) = mappend inlineParser :: RefMap -> Parser [Inline] inlineParser = fmap combine . many . inlineAny combine :: [Inline] -> [Inline] combine [] = [] combine (InlineText x:InlineText y:rest) = combine (InlineText (x <> y):rest) combine (InlineText x:rest) = InlineText x : combine rest combine (InlineItalic x:InlineItalic y:rest) = combine (InlineItalic (x <> y):rest) combine (InlineItalic x:rest) = InlineItalic (combine x) : combine rest combine (InlineBold x:InlineBold y:rest) = combine (InlineBold (x <> y):rest) combine (InlineBold x:rest) = InlineBold (combine x) : combine rest combine (InlineCode x:InlineCode y:rest) = combine (InlineCode (x <> y):rest) combine (InlineCode x:rest) = InlineCode x : combine rest combine (InlineLink u t c:rest) = InlineLink u t (combine c) : combine rest combine (InlineImage u t c:rest) = InlineImage u t c : combine rest combine (InlineHtml t:rest) = InlineHtml t : combine rest combine (InlineFootnote x:rest) = InlineFootnote x : combine rest combine (InlineFootnoteRef x:rest) = InlineFootnoteRef x : combine rest specials :: [Char] specials = "*_`\\[]!<&{}" inlineAny :: RefMap -> Parser Inline inlineAny refs = inline refs <|> special where special = InlineText . T.singleton <$> satisfy (`elem` specials) inline :: RefMap -> Parser Inline inline refs = text <|> escape <|> footnote <|> footnoteRef <|> paired "**" InlineBold <|> paired "__" InlineBold <|> paired "*" InlineItalic <|> paired "_" InlineItalic <|> doubleCodeSpace <|> doubleCode <|> code <|> link <|> image <|> autoLink <|> html <|> entity where inlinesTill :: Text -> Parser [Inline] inlinesTill end = go id where go front = (string end *> pure (front [])) <|> (do x <- inlineAny refs go $ front . (x:)) text = InlineText <$> takeWhile1 (`notElem` specials) paired t wrap = wrap <$> do _ <- string t is <- inlinesTill t if null is then fail "wrapped around something missing" else return is doubleCodeSpace = InlineCode . T.pack <$> (string "`` " *> manyTill anyChar (string " ``")) doubleCode = InlineCode . T.pack <$> (string "``" *> manyTill anyChar (string "``")) code = InlineCode <$> (char '`' *> takeWhile1 (/= '`') <* char '`') footnoteRef = InlineFootnoteRef <$> (char '{' *> decimal <* char '}') footnote = InlineFootnote <$> (string "{^" *> decimal <* char '}') escape = InlineText . T.singleton <$> (char '\\' *> satisfy (`elem` ("\\`*_{}[]()#+-.!>" :: String))) takeBalancedBrackets = T.pack <$> go (0 :: Int) where go i = do c <- anyChar case c of '[' -> (c:) <$> go (i + 1) ']' | i == 0 -> return [] | otherwise -> (c:) <$> go (i - 1) _ -> (c:) <$> go i parseUrl = fixUrl . T.pack <$> parseUrl' (0 :: Int) parseUrl' level | level > 0 = do c <- anyChar let level' | c == ')' = level - 1 | otherwise = level c' <- if c == '\\' then anyChar else return c cs <- parseUrl' level' return $ c' : cs | otherwise = (do c <- hrefChar if c == '(' then (c:) <$> parseUrl' 1 else (c:) <$> parseUrl' 0) <|> return [] parseUrlTitle defRef = parseUrlTitleInline <|> parseUrlTitleRef defRef parseUrlTitleInside endTitle = do url <- parseUrl mtitle <- (Just <$> title) <|> (skipSpace >> endTitle >> pure Nothing) return (url, mtitle) where title = do _ <- space skipSpace _ <- char '"' t <- T.stripEnd . T.pack <$> go return $ if not (T.null t) && T.last t == '"' then T.init t else t where go = (char '\\' *> anyChar >>= \c -> (c:) <$> go) <|> (endTitle *> return []) <|> (anyChar >>= \c -> (c:) <$> go) parseUrlTitleInline = char '(' *> parseUrlTitleInside (char ')') parseUrlTitleRef defRef = do ref' <- (skipSpace *> char '[' *> takeWhile (/= ']') <* char ']') <|> return "" let ref = if T.null ref' then defRef else ref' case Map.lookup (T.unwords $ T.words ref) refs of Nothing -> fail "ref not found" Just t -> either fail return $ parseOnly (parseUrlTitleInside endOfInput) t link = do _ <- char '[' rawContent <- takeBalancedBrackets content <- either fail return $ parseOnly (inlineParser refs) rawContent (url, mtitle) <- parseUrlTitle rawContent return $ InlineLink url mtitle content image = do _ <- string "![" content <- takeBalancedBrackets (url, mtitle) <- parseUrlTitle content return $ InlineImage url mtitle content fixUrl t | T.length t > 2 && T.head t == '<' && T.last t == '>' = T.init $ T.tail t | otherwise = t autoLink = do _ <- char '<' a <- string "http:" <|> string "https:" b <- takeWhile1 (/= '>') _ <- char '>' let url = a `T.append` b return $ InlineLink url Nothing [InlineText url] html = do c <- char '<' t <- takeWhile1 (\x -> ('A' <= x && x <= 'Z') || ('a' <= x && x <= 'z') || x == '/') if T.null t then fail "invalid tag" else do t2 <- takeWhile (/= '>') c2 <- char '>' return $ InlineHtml $ T.concat [ T.singleton c , t , t2 , T.singleton c2 ] entity = rawent "<" <|> rawent ">" <|> rawent "&" <|> rawent """ <|> rawent "'" <|> decEnt <|> hexEnt rawent t = InlineHtml <$> string t decEnt = do s <- string "&#" t <- takeWhile1 $ \x -> ('0' <= x && x <= '9') c <- char ';' return $ InlineHtml $ T.concat [ s , t , T.singleton c ] hexEnt = do s <- string "&#x" <|> string "&#X" t <- takeWhile1 $ \x -> ('0' <= x && x <= '9') || ('A' <= x && x <= 'F') || ('a' <= x && x <= 'f') c <- char ';' return $ InlineHtml $ T.concat [ s , t , T.singleton c ] hrefChar :: Parser Char hrefChar = (char '\\' *> anyChar) <|> satisfy (notInClass " )") markdown-0.1.13.2/Text/Markdown/Block.hs0000644000000000000000000003054612524343246016055 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE CPP #-} {-# OPTIONS_HADDOCK hide #-} module Text.Markdown.Block ( Block (..) , ListType (..) , toBlocks , toBlockLines ) where import Prelude import Control.Monad (msum) #if MIN_VERSION_conduit(1, 0, 0) import Data.Conduit #else import Data.Conduit hiding ((=$=)) import Data.Conduit.Internal (pipeL) #endif import qualified Data.Conduit.Text as CT import qualified Data.Conduit.List as CL import Data.Text (Text) import qualified Data.Text as T import Data.Functor.Identity (runIdentity) import Data.Char (isDigit) import Text.Markdown.Types import qualified Data.Set as Set import qualified Data.Map as Map #if !MIN_VERSION_conduit(1, 0, 0) (=$=) :: Monad m => Pipe a a b x m y -> Pipe b b c y m z -> Pipe a a c x m z (=$=) = pipeL #endif toBlockLines :: Block Text -> Block [Text] toBlockLines = fmap $ map T.stripEnd . concatMap (T.splitOn " \r\n") . T.splitOn " \n" toBlocks :: Monad m => MarkdownSettings -> Conduit Text m (Block Text) toBlocks ms = mapOutput fixWS CT.lines =$= toBlocksLines ms where fixWS = T.pack . go 0 . T.unpack go _ [] = [] go i ('\r':cs) = go i cs go i ('\t':cs) = (replicate j ' ') ++ go (i + j) cs where j = 4 - (i `mod` 4) go i (c:cs) = c : go (i + 1) cs toBlocksLines :: Monad m => MarkdownSettings -> Conduit Text m (Block Text) toBlocksLines ms = awaitForever (start ms) =$= tightenLists tightenLists :: Monad m => Conduit (Either Blank (Block Text)) m (Block Text) tightenLists = go Nothing where go mTightList = await >>= maybe (return ()) go' where go' (Left Blank) = go mTightList go' (Right (BlockList ltNew contents)) = case mTightList of Just (ltOld, isTight) | ltOld == ltNew -> do yield $ BlockList ltNew $ (if isTight then tighten else untighten) contents go mTightList _ -> do isTight <- checkTight ltNew False yield $ BlockList ltNew $ (if isTight then tighten else untighten) contents go $ Just (ltNew, isTight) go' (Right b) = yield b >> go Nothing tighten (Right [BlockPara t]) = Left t tighten (Right []) = Left T.empty tighten x = x untighten (Left t) = Right [BlockPara t] untighten x = x checkTight lt sawBlank = do await >>= maybe (return $ not sawBlank) go' where go' (Left Blank) = checkTight lt True go' b@(Right (BlockList ltNext _)) | ltNext == lt = do leftover b return $ not sawBlank go' b = leftover b >> return False data Blank = Blank data LineType = LineList ListType Text | LineCode Text | LineFenced Text FencedHandler -- ^ terminator, language | LineBlockQuote Text | LineHeading Int Text | LineBlank | LineText Text | LineRule | LineHtml Text | LineReference Text Text -- ^ name, destination lineType :: MarkdownSettings -> Text -> LineType lineType ms t | T.null $ T.strip t = LineBlank | Just (term, fh) <- getFenced (Map.toList $ msFencedHandlers ms) t = LineFenced term fh | Just t' <- T.stripPrefix "> " t = LineBlockQuote t' | Just (level, t') <- stripHeading t = LineHeading level t' | Just t' <- T.stripPrefix " " t = LineCode t' | isRule t = LineRule | isHtmlStart t = LineHtml t | Just (ltype, t') <- listStart t = LineList ltype t' | Just (name, dest) <- getReference t = LineReference name dest | otherwise = LineText t where getFenced [] _ = Nothing getFenced ((x, fh):xs) t' | Just rest <- T.stripPrefix x t' = Just (x, fh $ T.strip rest) | otherwise = getFenced xs t' isRule :: Text -> Bool isRule = go . T.strip where go "* * *" = True go "***" = True go "*****" = True go "- - -" = True go "---" = True go "___" = True go "_ _ _" = True go t' = T.length (T.takeWhile (== '-') t') >= 5 stripHeading :: Text -> Maybe (Int, Text) stripHeading t' | T.null x = Nothing | otherwise = Just (T.length x, T.strip $ T.dropWhileEnd (== '#') y) where (x, y) = T.span (== '#') t' getReference :: Text -> Maybe (Text, Text) getReference a = do b <- T.stripPrefix "[" $ T.dropWhile (== ' ') a let (name, c) = T.break (== ']') b d <- T.stripPrefix "]:" c Just (name, T.strip d) start :: Monad m => MarkdownSettings -> Text -> Conduit Text m (Either Blank (Block Text)) start ms t = go $ lineType ms t where go LineBlank = yield $ Left Blank go (LineFenced term fh) = do (finished, ls) <- takeTillConsume (== term) case finished of Just _ -> do let block = case fh of FHRaw fh' -> fh' $ T.intercalate "\n" ls FHParsed fh' -> fh' $ runIdentity $ mapM_ yield ls $$ toBlocksLines ms =$ CL.consume mapM_ (yield . Right) block Nothing -> mapM_ leftover (reverse $ T.cons ' ' t : ls) go (LineBlockQuote t') = do ls <- takeQuotes =$= CL.consume let blocks = runIdentity $ mapM_ yield (t' : ls) $$ toBlocksLines ms =$ CL.consume yield $ Right $ BlockQuote blocks go (LineHeading level t') = yield $ Right $ BlockHeading level t' go (LineCode t') = do ls <- getIndented 4 =$= CL.consume yield $ Right $ BlockCode Nothing $ T.intercalate "\n" $ t' : ls go LineRule = yield $ Right BlockRule go (LineHtml t') = do if t' `Set.member` msStandaloneHtml ms then yield $ Right $ BlockHtml t' else do ls <- takeTill (T.null . T.strip) =$= CL.consume yield $ Right $ BlockHtml $ T.intercalate "\n" $ t' : ls go (LineList ltype t') = do t2 <- CL.peek case fmap (lineType ms) t2 of -- If the next line is a non-indented text line, then we have a -- lazy list. Just (LineText t2') | T.null (T.takeWhile (== ' ') t2') -> do CL.drop 1 -- Get all of the non-indented lines. let loop front = do x <- await case x of Nothing -> return $ front [] Just y -> case lineType ms y of LineText z -> loop (front . (z:)) _ -> leftover y >> return (front []) ls <- loop (\rest -> T.dropWhile (== ' ') t' : t2' : rest) yield $ Right $ BlockList ltype $ Right [BlockPara $ T.intercalate "\n" ls] -- If the next line is an indented list, then we have a sublist. I -- disagree with this interpretation of Markdown, but it's the way -- that Github implements things, so we will too. _ | Just t2' <- t2 , Just t2'' <- T.stripPrefix " " t2' , LineList _ltype' _t2''' <- lineType ms t2'' -> do ls <- getIndented 4 =$= CL.consume let blocks = runIdentity $ mapM_ yield ls $$ toBlocksLines ms =$ CL.consume let addPlainText | T.null $ T.strip t' = id | otherwise = (BlockPlainText (T.strip t'):) yield $ Right $ BlockList ltype $ Right $ addPlainText blocks _ -> do let t'' = T.dropWhile (== ' ') t' let leader = T.length t - T.length t'' ls <- getIndented leader =$= CL.consume let blocks = runIdentity $ mapM_ yield (t'' : ls) $$ toBlocksLines ms =$ CL.consume yield $ Right $ BlockList ltype $ Right blocks go (LineReference x y) = yield $ Right $ BlockReference x y go (LineText t') = do -- Check for underline headings let getUnderline :: Text -> Maybe Int getUnderline s | T.length s < 2 = Nothing | T.all (== '=') s = Just 1 | T.all (== '-') s = Just 2 | otherwise = Nothing t2 <- CL.peek case t2 >>= getUnderline of Just level -> do CL.drop 1 yield $ Right $ BlockHeading level t' Nothing -> do let listStartIndent x = case listStart x of Just (_, y) -> T.take 2 y == " " Nothing -> False isNonPara LineBlank = True isNonPara LineFenced{} = True isNonPara LineBlockQuote{} = not $ msBlankBeforeBlockquote ms isNonPara LineHtml{} = True -- See example 95 in Common Markdown spec isNonPara _ = False (mfinal, ls) <- takeTillConsume (\x -> isNonPara (lineType ms x) || listStartIndent x) maybe (return ()) leftover mfinal yield $ Right $ BlockPara $ T.intercalate "\n" $ t' : ls isHtmlStart :: T.Text -> Bool -- Allow for up to three spaces before the opening tag. isHtmlStart t | " " `T.isPrefixOf` t = False isHtmlStart t = case T.stripPrefix "<" $ T.dropWhile (== ' ') t of Nothing -> False Just t' -> let (name, rest) | Just _ <- T.stripPrefix "!--" t' = ("--", t') | otherwise = T.break (\c -> c == ' ' || c == '>') t' in (T.all isValidTagName name && not (T.null name) && (not ("/" `T.isPrefixOf` rest) || ("/>" `T.isPrefixOf` rest))) || isPI t' || isCommentCData t' where isValidTagName :: Char -> Bool isValidTagName c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || (c == '-') || (c == '_') || (c == '/') || (c == '!') isPI = ("?" `T.isPrefixOf`) isCommentCData = ("!" `T.isPrefixOf`) takeTill :: Monad m => (i -> Bool) -> Conduit i m i takeTill f = loop where loop = await >>= maybe (return ()) (\x -> if f x then return () else yield x >> loop) --takeTillConsume :: Monad m => (i -> Bool) -> Consumer i m (Maybe i, [i]) takeTillConsume f = loop id where loop front = await >>= maybe (return (Nothing, front [])) (\x -> if f x then return (Just x, front []) else loop (front . (x:)) ) listStart :: Text -> Maybe (ListType, Text) listStart t0 | Just t' <- stripUnorderedListSeparator t = Just (Unordered, t') | Just t' <- stripNumber t, Just t'' <- stripOrderedListSeparator t' = Just (Ordered, t'') | otherwise = Nothing where t = T.stripStart t0 stripNumber :: Text -> Maybe Text stripNumber x | T.null y = Nothing | otherwise = Just z where (y, z) = T.span isDigit x stripUnorderedListSeparator :: Text -> Maybe Text stripUnorderedListSeparator = stripPrefixChoice ["* ", "*\t", "+ ", "+\t", "- ", "-\t"] stripOrderedListSeparator :: Text -> Maybe Text stripOrderedListSeparator = stripPrefixChoice [". ", ".\t", ") ", ")\t"] -- | Attempt to strip each of the prefixes in @xs@ from the start of @x@. As -- soon as one matches, return the remainder of @x@. Prefixes are tried in -- order. If none match, return @Nothing@. stripPrefixChoice :: [Text] -> Text -> Maybe Text stripPrefixChoice xs x = msum $ map (flip T.stripPrefix x) xs getIndented :: Monad m => Int -> Conduit Text m Text getIndented leader = go [] where go blanks = await >>= maybe (mapM_ leftover blanks) (go' blanks) go' blanks t | T.null $ T.strip t = go (T.drop leader t : blanks) | T.length x == leader && T.null (T.strip x) = do mapM_ yield $ reverse blanks yield y go [] | otherwise = mapM_ leftover (t:blanks) where (x, y) = T.splitAt leader t takeQuotes :: Monad m => Conduit Text m Text takeQuotes = await >>= maybe (return ()) go where go "" = return () go ">" = yield "" >> takeQuotes go t | Just t' <- T.stripPrefix "> " t = yield t' >> takeQuotes | otherwise = yield t >> takeQuotes