markdown-0.32/0000755000175000017500000000000011001614765012311 5ustar tassitassimarkdown-0.32/markdown-tests.lua0000755000175000017500000032033211001614765016004 0ustar tassitassiif not markdown then require "markdown" end -- Splits the text into an array of separate lines. local function split(text, sep) sep = sep or "\n" local lines = {} local pos = 1 while true do local b,e = text:find(sep, pos) if not b then table.insert(lines, text:sub(pos)) break end table.insert(lines, text:sub(pos, b-1)) pos = e + 1 end return lines end ---------------------------------------------------------------------- -- Unit tests ---------------------------------------------------------------------- -- Unit/regression tests for the markdown() function. Each test is specified -- as a sequence of input/expected output pairs, separated by the ~ character. -- Set up a table to store all the tests. Customize __newindex to record the -- order in which the tests are created so that we can process them in the right -- order. local TESTS = {} local TESTS_MT = { __index = _G, __newindex = function(t,k,v) if k:sub(1,1) == "_" then rawset(t,k,v) return end if t._indices == nil then t._indices = {} end table.insert(t._indices, k) rawset(t,k,v) end } setmetatable(TESTS, TESTS_MT) setfenv(1, TESTS) -- Test header markers headers = [[ This is an h1 ============ This is an h2 --------------- ~

This is an h1

This is an h2

~ # This is an h1 ## This is an h2 ###### This is an h6 # This is an h1 # ## This is an h2 ## ### This is an h3 ###### ~

This is an h1

This is an h2

This is an h6

This is an h1

This is an h2

This is an h3

~ # A header with *italics* in it ~

A header with italics in it

~ # A header with a [link][1] in it. [1]: http://google.com/ ~

A header with a link in it.

]] -- Test blockquotes blockquotes = [[ > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. > > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse > id sem consectetuer libero luctus adipiscing. ~

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing.

~ > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. ~

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing.

~ > This is the first level of quoting. > > > This is nested blockquote. > > Back to the first level. ~

This is the first level of quoting.

Thisi s nested blockquote.

Back to the first level.

~ > ## This is a header. > > 1. This is the first list item. > 2. This is the second list item. > > Here's some example code: > > return shell_exec("echo $input | $markdown_script"); ~

This is a header.

  1. This is the first list item.
  2. This is the second list item.

Here's some example code:


		 return shell_exec("echo $input | $markdown_script");
]] -- Test lists lists = [[ * Red * Green * Blue ~ ~ + Red + Green + Blue ~ ~ - Red - Green - Blue ~ ~ 1. Bird 2. McHale 3. Parish ~
  1. Bird
  2. McHale
  3. Parish
~ 1. Bird 1. McHale 1. Parish ~
  1. Bird
  2. McHale
  3. Parish
~ 3. Bird 1. McHale 8. Parish ~
  1. Bird
  2. McHale
  3. Parish
~ * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. ~ ~ * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. ~ ~ * Bird * Magic ~ ~ 1. This is a list item with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. Donec sit amet nisl. Aliquam semper ipsum sit amet velit. 2. Suspendisse id sem consectetuer libero luctus adipiscing. ~
  1. This is a list item with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.

    Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. Donec sit amet nisl. Aliquam semper ipsum sit amet velit.

  2. Suspendisse id sem consectetuer libero luctus adipiscing.

~ * This is a list item with two paragraphs. This is the second paragraph in the list item. You're only required to indent the first line. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. * Another item in the same list. ~ ~ * A list item with a blockquote: > This is a blockquote > inside a list item. ~ ~ * A list item with a code block: ~ ~ 1986. What a great season. ~
  1. What a great season.
~ 1986\. What a great season. ~

1986. What a great season.

]] -- Test code blocks code_blocks = [[ This is a normal paragraph: This is a code block. ~

This is a normal paragraph:

This is a code block.
~ Here is an example of AppleScript: tell application "Foo" beep end tell ~

Here is an example of AppleScript:


    tell application "Foo"
        beep
    end tell
~ ~

    <div class="footer">
        &copy; 2004 Foo Corporation
    </div>
~ A code block with *asterisks*. And _underlines_. ~

	A code block with *asterisks*.
	And _underlines_.
]] -- Test rules rules = [[ * * * *** ***** - - - --------------------------------------- ~




]] -- Test links links = [[ This is [an example](http://example.com/ "Title") inline link. ~

This is an example inline link.

~ [This link](http://example.net/) has no title attribute. ~

This link has no title attribute.

~ See my [About](/about/) page for details. ~

See my About page for details.

~ This is [an example][id] reference-style link. [id]: http://example.com/ "Optional Title Here" Other text. ~

This is an example reference-style link.

Other text.

~ This is [an example] [id] reference-style link. [id]: http://example.com/ ~

This is an example reference-style link.

~ This is [an example][id] reference-style link. [id]: http://example.com/ 'Optional Title Here' ~

This is an example reference-style link.

~ This is [an example][id] reference-style link. [id]: http://example.com/ (Optional Title Here) ~

This is an example reference-style link.

~ This is [an example][id] reference-style link. [id]: (Optional Title Here) ~

This is an example reference-style link.

~ This is [an example][ID] reference-style link. [id]: (Optional Title Here) ~

This is an example reference-style link.

~ Visit [Daring Fireball][] for more information. [Daring Fireball]: http://daringfireball.net/ ~

Visit Daring Fireball for more information.

~ I get 10 times more traffic from [Google] [1] than from [Yahoo] [2] or [MSN] [3]. [1]: http://google.com/ "Google" [2]: http://search.yahoo.com/ "Yahoo Search" [3]: http://search.msn.com/ "MSN Search" ~

I get 10 times more traffic from Google than from Yahoo or MSN.

~ I get 10 times more traffic from [Google][] than from [Yahoo][] or [MSN][]. [google]: http://google.com/ "Google" [yahoo]: http://search.yahoo.com/ "Yahoo Search" [msn]: http://search.msn.com/ "MSN Search" ~

I get 10 times more traffic from Google than from Yahoo or MSN.

~ I get 10 times more traffic from [Google](http://google.com/ "Google") than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or [MSN](http://search.msn.com/ "MSN Search"). ~

I get 10 times more traffic from Google than from Yahoo or MSN.

]] -- Test emphasis emphasis = [[ Test *single asterisks* test. ~

Test single asterisks test.

~ Test _single underscore_ test. ~

Test single underscore test.

~ Test **double asterisks** test. ~

Test double asterisks test.

~ Test __double underscores__ test. ~

Test double underscores test.

~ un*fucking*believable ~

unfuckingbelievable

~ \*this text is surrounded by literal asterisks\* ~

*this text is surrounded by literal asterisks*

~ Test with *two* different *words*. ~

Test with two different words.

]] -- Test code code = [[ Use the `printf()` function. ~

Use the printf() function.

~ ``There is a literal backtick (`) here.`` ~

There is a literal backtick (`) here.

~ ``There is a literal backtick (`) here.`` ~

There is a literal backtick (`) here.

~ Please don't use any `` tags. ~

Please don't use any <blink> tags.

~ `—` is the decimal-encoded equivalent of `—`. ~

&#8212; is the decimal-encoded equivalent of &mdash;.

~ Markdown treats asterisks (`*`) and underscores (`_`) as indicators of emphasis. Text wrapped with one `*` or `_` will be wrapped with an ~

Markdown treats asterisks (*) and underscores (_) as indicators of emphasis. Text wrapped with one * or _ will be wrapped with an

]] -- Test images images = [[ ![Alt text](/path/to/img.jpg) ![Alt text](/path/to/img.jpg "Optional title") ~

Alt text

Alt text

~ ![Alt text][id] [id]: url/to/image "Optional title attribute" ~

Alt text

]] -- Test autolinking autolinks = [[ An auto link . ~

An auto link http://www.google.com/.

~ Mail links and . ~

Mail links niklas@frykholm .se and niklas@frykholm .se.

]] -- Test escape codes escapes = [[ \*literal asterisks\* ~

*literal asterisks*

]] -- Test hard linebreaks linebreaks = [[ Poetry in motion ~

Poetry
in
motion

]] -- Test amps and angles conversion amps = [[ © ~

©

~ AT&T ~

AT&T

~ 4 < 5 ~

4 < 5

]] -- Test raw blocks raw = [[
This should not be disturbed by Markdown.
~
This should not be disturbed by Markdown.
]] -- Markdown test suit markdown_amps_and_angles = [[ AT&T has an ampersand in their name. AT&T is another way to write it. This & that. 4 < 5. 6 > 5. Here's a [link] [1] with an ampersand in the URL. Here's a link with an amersand in the link text: [AT&T] [2]. Here's an inline [link](/script?foo=1&bar=2). Here's an inline [link](). [1]: http://example.com/?foo=1&bar=2 [2]: http://att.com/ "AT&T" ~

AT&T has an ampersand in their name.

AT&T is another way to write it.

This & that.

4 < 5.

6 > 5.

Here's a link with an ampersand in the URL.

Here's a link with an amersand in the link text: AT&T.

Here's an inline link.

Here's an inline link.

]] markdown_auto_links = [[ Link: . With an ampersand: * In a list? * * It should. > Blockquoted: Auto-links should not occur here: `` or here: ~

Link: http://example.com/.

With an ampersand: http://example.com/?foo=1&bar=2

Blockquoted: http://example.com/

Auto-links should not occur here: <http://example.com/>

or here: <http://example.com/>
]] markdown_backslash_escapes = [[ These should all get escaped: Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: \{ Right brace: \} Left bracket: \[ Right bracket: \] Left paren: \( Right paren: \) Greater-than: \> Hash: \# Period: \. Bang: \! Plus: \+ Minus: \- These should not, because they occur within a code block: Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: \{ Right brace: \} Left bracket: \[ Right bracket: \] Left paren: \( Right paren: \) Greater-than: \> Hash: \# Period: \. Bang: \! Plus: \+ Minus: \- Nor should these, which occur in code spans: Backslash: `\\` Backtick: `` \` `` Asterisk: `\*` Underscore: `\_` Left brace: `\{` Right brace: `\}` Left bracket: `\[` Right bracket: `\]` Left paren: `\(` Right paren: `\)` Greater-than: `\>` Hash: `\#` Period: `\.` Bang: `\!` Plus: `\+` Minus: `\-` ~

These should all get escaped:

Backslash: \

Backtick: `

Asterisk: *

Underscore: _

Left brace: {

Right brace: }

Left bracket: [

Right bracket: ]

Left paren: (

Right paren: )

Greater-than: >

Hash: #

Period: .

Bang: !

Plus: +

Minus: -

These should not, because they occur within a code block:

Backslash: \\

Backtick: \`

Asterisk: \*

Underscore: \_

Left brace: \{

Right brace: \}

Left bracket: \[

Right bracket: \]

Left paren: \(

Right paren: \)

Greater-than: \>

Hash: \#

Period: \.

Bang: \!

Plus: \+

Minus: \-

Nor should these, which occur in code spans:

Backslash: \\

Backtick: \`

Asterisk: \*

Underscore: \_

Left brace: \{

Right brace: \}

Left bracket: \[

Right bracket: \]

Left paren: \(

Right paren: \)

Greater-than: \>

Hash: \#

Period: \.

Bang: \!

Plus: \+

Minus: \-

]] markdown_blockquotes_with_codeblocks = [[ > Example: > > sub status { > print "working"; > } > > Or: > > sub status { > return "working"; > } ~

Example:

sub status {
    print "working";
}

Or:

sub status {
    return "working";
}
]] markdown_hard_wrapped_paragraphs_with_list_like_lines = [[ In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item. Here's one with a bullet. * criminey. ~

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

Here's one with a bullet. * criminey.

]] markdown_horizontal_rules = [[ Dashes: --- --- --- --- --- - - - - - - - - - - - - - - - Asterisks: *** *** *** *** *** * * * * * * * * * * * * * * * Underscores: ___ ___ ___ ___ ___ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ~

Dashes:





---




- - -

Asterisks:





***




* * *

Underscores:





___




_ _ _
]] -- currently not supported by regular markdown, so we ignore it too --[[ markdown_inline_html_advanced = Simple block on one line:
foo
And nested without indentation:
foo
bar
~

Simple block on one line:

foo

And nested without indentation:

foo
bar
]] markdown_inline_html_simple = [[ Here's a simple block:
foo
This should be a code block, though:
foo
As should this:
foo
Now, nested:
foo
This should just be an HTML comment: Multiline: Code block: Just plain comment, with trailing spaces on the line: Code:
Hr's:








~

Here's a simple block:

foo

This should be a code block, though:

<div>
    foo
</div>

As should this:

<div>foo</div>

Now, nested:

foo

This should just be an HTML comment:

Multiline:

Code block:

<!-- Comment -->

Just plain comment, with trailing spaces on the line:

Code:

<hr />

Hr's:










]] markdown_inline_html_comments = [[ Paragraph one. Paragraph two. The end. ~

Paragraph one.

Paragraph two.

The end.

]] markdown_links_inline_style = [[ Just a [URL](/url/). [URL and title](/url/ "title"). [URL and title](/url/ "title preceded by two spaces"). [URL and title](/url/ "title preceded by a tab"). [Empty](). ~

Just a URL.

URL and title.

URL and title.

URL and title.

Empty.

]] markdown_links_reference_style = [=[ Foo [bar] [1]. Foo [bar][1]. Foo [bar] [1]. [1]: /url/ "Title" With [embedded [brackets]] [b]. Indented [once][]. Indented [twice][]. Indented [thrice][]. Indented [four][] times. [once]: /url [twice]: /url [thrice]: /url [four]: /url [b]: /url/ ~

Foo bar.

Foo bar.

Foo bar.

With embedded [brackets].

Indented once.

Indented twice.

Indented thrice.

Indented [four][] times.

[four]: /url
]=] markdown_literal_quotes_in_text = [=[ Foo [bar][]. Foo [bar](/url/ "Title with "quotes" inside"). [bar]: /url/ "Title with "quotes" inside" ~

Foo bar.

Foo bar.

]=] markdown_basics = [[ Markdown: Basics ================ Getting the Gist of Markdown's Formatting Syntax ------------------------------------------------ This page offers a brief overview of what it's like to use Markdown. The [syntax page] [s] provides complete, detailed documentation for every feature, but Markdown should be very easy to pick up simply by looking at a few examples of it in action. The examples on this page are written in a before/after style, showing example syntax and the HTML output produced by Markdown. It's also helpful to simply try Markdown out; the [Dingus] [d] is a web application that allows you type your own Markdown-formatted text and translate it to XHTML. **Note:** This document is itself written using Markdown; you can [see the source for it by adding '.text' to the URL] [src]. [s]: /projects/markdown/syntax "Markdown Syntax" [d]: /projects/markdown/dingus "Markdown Dingus" [src]: /projects/markdown/basics.text ## Paragraphs, Headers, Blockquotes ## A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered blank.) Normal paragraphs should not be intended with spaces or tabs. Markdown offers two styles of headers: *Setext* and *atx*. Setext-style headers for `

` and `

` are created by "underlining" with equal signs (`=`) and hyphens (`-`), respectively. To create an atx-style header, you put 1-6 hash marks (`#`) at the beginning of the line -- the number of hashes equals the resulting HTML header level. Blockquotes are indicated using email-style '`>`' angle brackets. Markdown: A First Level Header ==================== A Second Level Header --------------------- Now is the time for all good men to come to the aid of their country. This is just a regular paragraph. The quick brown fox jumped over the lazy dog's back. ### Header 3 > This is a blockquote. > > This is the second paragraph in the blockquote. > > ## This is an H2 in a blockquote Output:

A First Level Header

A Second Level Header

Now is the time for all good men to come to the aid of their country. This is just a regular paragraph.

The quick brown fox jumped over the lazy dog's back.

Header 3

This is a blockquote.

This is the second paragraph in the blockquote.

This is an H2 in a blockquote

### Phrase Emphasis ### Markdown uses asterisks and underscores to indicate spans of emphasis. Markdown: Some of these words *are emphasized*. Some of these words _are emphasized also_. Use two asterisks for **strong emphasis**. Or, if you prefer, __use two underscores instead__. Output:

Some of these words are emphasized. Some of these words are emphasized also.

Use two asterisks for strong emphasis. Or, if you prefer, use two underscores instead.

## Lists ## Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`, `+`, and `-`) as list markers. These three markers are interchangable; this: * Candy. * Gum. * Booze. this: + Candy. + Gum. + Booze. and this: - Candy. - Gum. - Booze. all produce the same output:
  • Candy.
  • Gum.
  • Booze.
Ordered (numbered) lists use regular numbers, followed by periods, as list markers: 1. Red 2. Green 3. Blue Output:
  1. Red
  2. Green
  3. Blue
If you put blank lines between items, you'll get `

` tags for the list item text. You can create multi-paragraph list items by indenting the paragraphs by 4 spaces or 1 tab: * A list item. With multiple paragraphs. * Another item in the list. Output:

  • A list item.

    With multiple paragraphs.

  • Another item in the list.

### Links ### Markdown supports two styles for creating links: *inline* and *reference*. With both styles, you use square brackets to delimit the text you want to turn into a link. Inline-style links use parentheses immediately after the link text. For example: This is an [example link](http://example.com/). Output:

This is an example link.

Optionally, you may include a title attribute in the parentheses: This is an [example link](http://example.com/ "With a Title"). Output:

This is an example link.

Reference-style links allow you to refer to your links by names, which you define elsewhere in your document: I get 10 times more traffic from [Google][1] than from [Yahoo][2] or [MSN][3]. [1]: http://google.com/ "Google" [2]: http://search.yahoo.com/ "Yahoo Search" [3]: http://search.msn.com/ "MSN Search" Output:

I get 10 times more traffic from Google than from Yahoo or MSN.

The title attribute is optional. Link names may contain letters, numbers and spaces, but are *not* case sensitive: I start my morning with a cup of coffee and [The New York Times][NY Times]. [ny times]: http://www.nytimes.com/ Output:

I start my morning with a cup of coffee and The New York Times.

### Images ### Image syntax is very much like link syntax. Inline (titles are optional): ![alt text](/path/to/img.jpg "Title") Reference-style: ![alt text][id] [id]: /path/to/img.jpg "Title" Both of the above examples produce the same output: alt text ### Code ### In a regular paragraph, you can create code span by wrapping text in backtick quotes. Any ampersands (`&`) and angle brackets (`<` or `>`) will automatically be translated into HTML entities. This makes it easy to use Markdown to write about HTML example code: I strongly recommend against using any `` tags. I wish SmartyPants used named entities like `—` instead of decimal-encoded entites like `—`. Output:

I strongly recommend against using any <blink> tags.

I wish SmartyPants used named entities like &mdash; instead of decimal-encoded entites like &#8212;.

To specify an entire block of pre-formatted code, indent every line of the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`, and `>` characters will be escaped automatically. Markdown: If you want your page to validate under XHTML 1.0 Strict, you've got to put paragraph tags in your blockquotes:

For example.

Output:

If you want your page to validate under XHTML 1.0 Strict, you've got to put paragraph tags in your blockquotes:

<blockquote>
        <p>For example.</p>
    </blockquote>
    
~

Markdown: Basics

Getting the Gist of Markdown's Formatting Syntax

This page offers a brief overview of what it's like to use Markdown. The syntax page provides complete, detailed documentation for every feature, but Markdown should be very easy to pick up simply by looking at a few examples of it in action. The examples on this page are written in a before/after style, showing example syntax and the HTML output produced by Markdown.

It's also helpful to simply try Markdown out; the Dingus is a web application that allows you type your own Markdown-formatted text and translate it to XHTML.

Note: This document is itself written using Markdown; you can see the source for it by adding '.text' to the URL.

Paragraphs, Headers, Blockquotes

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered blank.) Normal paragraphs should not be intended with spaces or tabs.

Markdown offers two styles of headers: Setext and atx. Setext-style headers for <h1> and <h2> are created by "underlining" with equal signs (=) and hyphens (-), respectively. To create an atx-style header, you put 1-6 hash marks (#) at the beginning of the line -- the number of hashes equals the resulting HTML header level.

Blockquotes are indicated using email-style '>' angle brackets.

Markdown:

A First Level Header
====================

A Second Level Header
---------------------

Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.

The quick brown fox jumped over the lazy
dog's back.

### Header 3

> This is a blockquote.
> 
> This is the second paragraph in the blockquote.
>
> ## This is an H2 in a blockquote

Output:

<h1>A First Level Header</h1>

<h2>A Second Level Header</h2>

<p>Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.</p>

<p>The quick brown fox jumped over the lazy
dog's back.</p>

<h3>Header 3</h3>

<blockquote>
    <p>This is a blockquote.</p>

    <p>This is the second paragraph in the blockquote.</p>

    <h2>This is an H2 in a blockquote</h2>
</blockquote>

Phrase Emphasis

Markdown uses asterisks and underscores to indicate spans of emphasis.

Markdown:

Some of these words *are emphasized*.
Some of these words _are emphasized also_.

Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.

Output:

<p>Some of these words <em>are emphasized</em>.
Some of these words <em>are emphasized also</em>.</p>

<p>Use two asterisks for <strong>strong emphasis</strong>.
Or, if you prefer, <strong>use two underscores instead</strong>.</p>

Lists

Unordered (bulleted) lists use asterisks, pluses, and hyphens (*, +, and -) as list markers. These three markers are interchangable; this:

*   Candy.
*   Gum.
*   Booze.

this:

+   Candy.
+   Gum.
+   Booze.

and this:

-   Candy.
-   Gum.
-   Booze.

all produce the same output:

<ul>
<li>Candy.</li>
<li>Gum.</li>
<li>Booze.</li>
</ul>

Ordered (numbered) lists use regular numbers, followed by periods, as list markers:

1.  Red
2.  Green
3.  Blue

Output:

<ol>
<li>Red</li>
<li>Green</li>
<li>Blue</li>
</ol>

If you put blank lines between items, you'll get <p> tags for the list item text. You can create multi-paragraph list items by indenting the paragraphs by 4 spaces or 1 tab:

*   A list item.

    With multiple paragraphs.

*   Another item in the list.

Output:

<ul>
<li><p>A list item.</p>
<p>With multiple paragraphs.</p></li>
<li><p>Another item in the list.</p></li>
</ul>

Links

Markdown supports two styles for creating links: inline and reference. With both styles, you use square brackets to delimit the text you want to turn into a link.

Inline-style links use parentheses immediately after the link text. For example:

This is an [example link](http://example.com/).

Output:

<p>This is an <a href="http://example.com/">
example link</a>.</p>

Optionally, you may include a title attribute in the parentheses:

This is an [example link](http://example.com/ "With a Title").

Output:

<p>This is an <a href="http://example.com/" title="With a Title">
example link</a>.</p>

Reference-style links allow you to refer to your links by names, which you define elsewhere in your document:

I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].

[1]: http://google.com/        "Google"
[2]: http://search.yahoo.com/  "Yahoo Search"
[3]: http://search.msn.com/    "MSN Search"

Output:

<p>I get 10 times more traffic from <a href="http://google.com/"
title="Google">Google</a> than from <a href="http://search.yahoo.com/"
title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
title="MSN Search">MSN</a>.</p>

The title attribute is optional. Link names may contain letters, numbers and spaces, but are not case sensitive:

I start my morning with a cup of coffee and
[The New York Times][NY Times].

[ny times]: http://www.nytimes.com/

Output:

<p>I start my morning with a cup of coffee and
<a href="http://www.nytimes.com/">The New York Times</a>.</p>

Images

Image syntax is very much like link syntax.

Inline (titles are optional):

![alt text](/path/to/img.jpg "Title")

Reference-style:

![alt text][id]

[id]: /path/to/img.jpg "Title"

Both of the above examples produce the same output:

<img src="/path/to/img.jpg" alt="alt text" title="Title" />

Code

In a regular paragraph, you can create code span by wrapping text in backtick quotes. Any ampersands (&) and angle brackets (< or >) will automatically be translated into HTML entities. This makes it easy to use Markdown to write about HTML example code:

I strongly recommend against using any `<blink>` tags.

I wish SmartyPants used named entities like `&mdash;`
instead of decimal-encoded entites like `&#8212;`.

Output:

<p>I strongly recommend against using any
<code>&lt;blink&gt;</code> tags.</p>

<p>I wish SmartyPants used named entities like
<code>&amp;mdash;</code> instead of decimal-encoded
entites like <code>&amp;#8212;</code>.</p>

To specify an entire block of pre-formatted code, indent every line of the block by 4 spaces or 1 tab. Just like with code spans, &, <, and > characters will be escaped automatically.

Markdown:

If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:

    <blockquote>
        <p>For example.</p>
    </blockquote>

Output:

<p>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:</p>

<pre><code>&lt;blockquote&gt;
    &lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
</code></pre>
]] markdown_syntax = [[ Markdown: Syntax ================ * [Overview](#overview) * [Philosophy](#philosophy) * [Inline HTML](#html) * [Automatic Escaping for Special Characters](#autoescape) * [Block Elements](#block) * [Paragraphs and Line Breaks](#p) * [Headers](#header) * [Blockquotes](#blockquote) * [Lists](#list) * [Code Blocks](#precode) * [Horizontal Rules](#hr) * [Span Elements](#span) * [Links](#link) * [Emphasis](#em) * [Code](#code) * [Images](#img) * [Miscellaneous](#misc) * [Backslash Escapes](#backslash) * [Automatic Links](#autolink) **Note:** This document is itself written using Markdown; you can [see the source for it by adding '.text' to the URL][src]. [src]: /projects/markdown/syntax.text * * *

Overview

Philosophy

Markdown is intended to be as easy-to-read and easy-to-write as is feasible. Readability, however, is emphasized above all else. A Markdown-formatted document should be publishable as-is, as plain text, without looking like it's been marked up with tags or formatting instructions. While Markdown's syntax has been influenced by several existing text-to-HTML filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], [Grutatext] [5], and [EtText] [6] -- the single biggest source of inspiration for Markdown's syntax is the format of plain text email. [1]: http://docutils.sourceforge.net/mirror/setext.html [2]: http://www.aaronsw.com/2002/atx/ [3]: http://textism.com/tools/textile/ [4]: http://docutils.sourceforge.net/rst.html [5]: http://www.triptico.com/software/grutatxt.html [6]: http://ettext.taint.org/doc/ To this end, Markdown's syntax is comprised entirely of punctuation characters, which punctuation characters have been carefully chosen so as to look like what they mean. E.g., asterisks around a word actually look like \*emphasis\*. Markdown lists look like, well, lists. Even blockquotes look like quoted passages of text, assuming you've ever used email.

Inline HTML

Markdown's syntax is intended for one purpose: to be used as a format for *writing* for the web. Markdown is not a replacement for HTML, or even close to it. Its syntax is very small, corresponding only to a very small subset of HTML tags. The idea is *not* to create a syntax that makes it easier to insert HTML tags. In my opinion, HTML tags are already easy to insert. The idea for Markdown is to make it easy to read, write, and edit prose. HTML is a *publishing* format; Markdown is a *writing* format. Thus, Markdown's formatting syntax only addresses issues that can be conveyed in plain text. For any markup that is not covered by Markdown's syntax, you simply use HTML itself. There's no need to preface it or delimit it to indicate that you're switching from Markdown to HTML; you just use the tags. The only restrictions are that block-level HTML elements -- e.g. `
`, ``, `
`, `

`, etc. -- must be separated from surrounding content by blank lines, and the start and end tags of the block should not be indented with tabs or spaces. Markdown is smart enough not to add extra (unwanted) `

` tags around HTML block-level tags. For example, to add an HTML table to a Markdown article: This is a regular paragraph.

Foo
This is another regular paragraph. Note that Markdown formatting syntax is not processed within block-level HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an HTML block. Span-level HTML tags -- e.g. ``, ``, or `` -- can be used anywhere in a Markdown paragraph, list item, or header. If you want, you can even use HTML tags instead of Markdown formatting; e.g. if you'd prefer to use HTML `` or `` tags instead of Markdown's link or image syntax, go right ahead. Unlike block-level HTML tags, Markdown syntax *is* processed within span-level tags.

Automatic Escaping for Special Characters

In HTML, there are two characters that demand special treatment: `<` and `&`. Left angle brackets are used to start tags; ampersands are used to denote HTML entities. If you want to use them as literal characters, you must escape them as entities, e.g. `<`, and `&`. Ampersands in particular are bedeviling for web writers. If you want to write about 'AT&T', you need to write '`AT&T`'. You even need to escape ampersands within URLs. Thus, if you want to link to: http://images.google.com/images?num=30&q=larry+bird you need to encode the URL as: http://images.google.com/images?num=30&q=larry+bird in your anchor tag `href` attribute. Needless to say, this is easy to forget, and is probably the single most common source of HTML validation errors in otherwise well-marked-up web sites. Markdown allows you to use these characters naturally, taking care of all the necessary escaping for you. If you use an ampersand as part of an HTML entity, it remains unchanged; otherwise it will be translated into `&`. So, if you want to include a copyright symbol in your article, you can write: © and Markdown will leave it alone. But if you write: AT&T Markdown will translate it to: AT&T Similarly, because Markdown supports [inline HTML](#html), if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such. But if you write: 4 < 5 Markdown will translate it to: 4 < 5 However, inside Markdown code spans and blocks, angle brackets and ampersands are *always* encoded automatically. This makes it easy to use Markdown to write about HTML code. (As opposed to raw HTML, which is a terrible format for writing about HTML syntax, because every single `<` and `&` in your example code needs to be escaped.) * * *

Block Elements

Paragraphs and Line Breaks

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered blank.) Normal paragraphs should not be intended with spaces or tabs. The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a `
` tag. When you *do* want to insert a `
` break tag using Markdown, you end a line with two or more spaces, then type return. Yes, this takes a tad more effort to create a `
`, but a simplistic "every line break is a `
`" rule wouldn't work for Markdown. Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] work best -- and look better -- when you format them with hard breaks. [bq]: #blockquote [l]: #list Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. Setext-style headers are "underlined" using equal signs (for first-level headers) and dashes (for second-level headers). For example: This is an H1 ============= This is an H2 ------------- Any number of underlining `=`'s or `-`'s will work. Atx-style headers use 1-6 hash characters at the start of the line, corresponding to header levels 1-6. For example: # This is an H1 ## This is an H2 ###### This is an H6 Optionally, you may "close" atx-style headers. This is purely cosmetic -- you can use this if you think it looks better. The closing hashes don't even need to match the number of hashes used to open the header. (The number of opening hashes determines the header level.) : # This is an H1 # ## This is an H2 ## ### This is an H3 ######

Blockquotes

Markdown uses email-style `>` characters for blockquoting. If you're familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a `>` before every line: > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. > > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse > id sem consectetuer libero luctus adipiscing. Markdown allows you to be lazy and only put the `>` before the first line of a hard-wrapped paragraph: > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of `>`: > This is the first level of quoting. > > > This is nested blockquote. > > Back to the first level. Blockquotes can contain other Markdown elements, including headers, lists, and code blocks: > ## This is a header. > > 1. This is the first list item. > 2. This is the second list item. > > Here's some example code: > > return shell_exec("echo $input | $markdown_script"); Any decent text editor should make email-style quoting easy. For example, with BBEdit, you can make a selection and choose Increase Quote Level from the Text menu.

Lists

Markdown supports ordered (numbered) and unordered (bulleted) lists. Unordered lists use asterisks, pluses, and hyphens -- interchangably -- as list markers: * Red * Green * Blue is equivalent to: + Red + Green + Blue and: - Red - Green - Blue Ordered lists use numbers followed by periods: 1. Bird 2. McHale 3. Parish It's important to note that the actual numbers you use to mark the list have no effect on the HTML output Markdown produces. The HTML Markdown produces from the above list is:
  1. Bird
  2. McHale
  3. Parish
If you instead wrote the list in Markdown like this: 1. Bird 1. McHale 1. Parish or even: 3. Bird 1. McHale 8. Parish you'd get the exact same HTML output. The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to. If you do use lazy list numbering, however, you should still start the list with the number 1. At some point in the future, Markdown may support starting ordered lists at an arbitrary number. List markers typically start at the left margin, but may be indented by up to three spaces. List markers must be followed by one or more spaces or a tab. To make lists look nice, you can wrap items with hanging indents: * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. But if you want to be lazy, you don't have to: * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. If list items are separated by blank lines, Markdown will wrap the items in `

` tags in the HTML output. For example, this input: * Bird * Magic will turn into:

  • Bird
  • Magic
But this: * Bird * Magic will turn into:
  • Bird

  • Magic

List items may consist of multiple paragraphs. Each subsequent paragraph in a list item must be intended by either 4 spaces or one tab: 1. This is a list item with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. Donec sit amet nisl. Aliquam semper ipsum sit amet velit. 2. Suspendisse id sem consectetuer libero luctus adipiscing. It looks nice if you indent every line of the subsequent paragraphs, but here again, Markdown will allow you to be lazy: * This is a list item with two paragraphs. This is the second paragraph in the list item. You're only required to indent the first line. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. * Another item in the same list. To put a blockquote within a list item, the blockquote's `>` delimiters need to be indented: * A list item with a blockquote: > This is a blockquote > inside a list item. To put a code block within a list item, the code block needs to be indented *twice* -- 8 spaces or two tabs: * A list item with a code block: It's worth noting that it's possible to trigger an ordered list by accident, by writing something like this: 1986. What a great season. In other words, a *number-period-space* sequence at the beginning of a line. To avoid this, you can backslash-escape the period: 1986\. What a great season.

Code Blocks

Pre-formatted code blocks are used for writing about programming or markup source code. Rather than forming normal paragraphs, the lines of a code block are interpreted literally. Markdown wraps a code block in both `
` and `` tags.

To produce a code block in Markdown, simply indent every line of the
block by at least 4 spaces or 1 tab. For example, given this input:

    This is a normal paragraph:

        This is a code block.

Markdown will generate:

    

This is a normal paragraph:

This is a code block.
    
One level of indentation -- 4 spaces or 1 tab -- is removed from each line of the code block. For example, this: Here is an example of AppleScript: tell application "Foo" beep end tell will turn into:

Here is an example of AppleScript:

tell application "Foo"
        beep
    end tell
    
A code block continues until it reaches a line that is not indented (or the end of the article). Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) are automatically converted into HTML entities. This makes it very easy to include example HTML source code using Markdown -- just paste it and indent it, and Markdown will handle the hassle of encoding the ampersands and angle brackets. For example, this: will turn into:
<div class="footer">
        &copy; 2004 Foo Corporation
    </div>
    
Regular Markdown syntax is not processed within code blocks. E.g., asterisks are just literal asterisks within a code block. This means it's also easy to use Markdown to write about Markdown's own syntax.

Horizontal Rules

You can produce a horizontal rule tag (`
`) by placing three or more hyphens, asterisks, or underscores on a line by themselves. If you wish, you may use spaces between the hyphens or asterisks. Each of the following lines will produce a horizontal rule: * * * *** ***** - - - --------------------------------------- _ _ _ * * *

Span Elements

Markdown supports two style of links: *inline* and *reference*. In both styles, the link text is delimited by [square brackets]. To create an inline link, use a set of regular parentheses immediately after the link text's closing square bracket. Inside the parentheses, put the URL where you want the link to point, along with an *optional* title for the link, surrounded in quotes. For example: This is [an example](http://example.com/ "Title") inline link. [This link](http://example.net/) has no title attribute. Will produce:

This is an example inline link.

This link has no title attribute.

If you're referring to a local resource on the same server, you can use relative paths: See my [About](/about/) page for details. Reference-style links use a second set of square brackets, inside which you place a label of your choosing to identify the link: This is [an example][id] reference-style link. You can optionally use a space to separate the sets of brackets: This is [an example] [id] reference-style link. Then, anywhere in the document, you define your link label like this, on a line by itself: [id]: http://example.com/ "Optional Title Here" That is: * Square brackets containing the link identifier (optionally indented from the left margin using up to three spaces); * followed by a colon; * followed by one or more spaces (or tabs); * followed by the URL for the link; * optionally followed by a title attribute for the link, enclosed in double or single quotes. The link URL may, optionally, be surrounded by angle brackets: [id]: "Optional Title Here" You can put the title attribute on the next line and use extra spaces or tabs for padding, which tends to look better with longer URLs: [id]: http://example.com/longish/path/to/resource/here "Optional Title Here" Link definitions are only used for creating links during Markdown processing, and are stripped from your document in the HTML output. Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links: [link text][a] [link text][A] are equivalent. The *implicit link name* shortcut allows you to omit the name of the link, in which case the link text itself is used as the name. Just use an empty set of square brackets -- e.g., to link the word "Google" to the google.com web site, you could simply write: [Google][] And then define the link: [Google]: http://google.com/ Because link names may contain spaces, this shortcut even works for multiple words in the link text: Visit [Daring Fireball][] for more information. And then define the link: [Daring Fireball]: http://daringfireball.net/ Link definitions can be placed anywhere in your Markdown document. I tend to put them immediately after each paragraph in which they're used, but if you want, you can put them all at the end of your document, sort of like footnotes. Here's an example of reference links in action: I get 10 times more traffic from [Google] [1] than from [Yahoo] [2] or [MSN] [3]. [1]: http://google.com/ "Google" [2]: http://search.yahoo.com/ "Yahoo Search" [3]: http://search.msn.com/ "MSN Search" Using the implicit link name shortcut, you could instead write: I get 10 times more traffic from [Google][] than from [Yahoo][] or [MSN][]. [google]: http://google.com/ "Google" [yahoo]: http://search.yahoo.com/ "Yahoo Search" [msn]: http://search.msn.com/ "MSN Search" Both of the above examples will produce the following HTML output:

I get 10 times more traffic from Google than from Yahoo or MSN.

For comparison, here is the same paragraph written using Markdown's inline link style: I get 10 times more traffic from [Google](http://google.com/ "Google") than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or [MSN](http://search.msn.com/ "MSN Search"). The point of reference-style links is not that they're easier to write. The point is that with reference-style links, your document source is vastly more readable. Compare the above examples: using reference-style links, the paragraph itself is only 81 characters long; with inline-style links, it's 176 characters; and as raw HTML, it's 234 characters. In the raw HTML, there's more markup than there is text. With Markdown's reference-style links, a source document much more closely resembles the final output, as rendered in a browser. By allowing you to move the markup-related metadata out of the paragraph, you can add links without interrupting the narrative flow of your prose.

Emphasis

Markdown treats asterisks (`*`) and underscores (`_`) as indicators of emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` tag. E.g., this input: *single asterisks* _single underscores_ **double asterisks** __double underscores__ will produce: single asterisks single underscores double asterisks double underscores You can use whichever style you prefer; the lone restriction is that the same character must be used to open and close an emphasis span. Emphasis can be used in the middle of a word: un*fucking*believable But if you surround an `*` or `_` with spaces, it'll be treated as a literal asterisk or underscore. To produce a literal asterisk or underscore at a position where it would otherwise be used as an emphasis delimiter, you can backslash escape it: \*this text is surrounded by literal asterisks\*

Code

To indicate a span of code, wrap it with backtick quotes (`` ` ``). Unlike a pre-formatted code block, a code span indicates code within a normal paragraph. For example: Use the `printf()` function. will produce:

Use the printf() function.

To include a literal backtick character within a code span, you can use multiple backticks as the opening and closing delimiters: ``There is a literal backtick (`) here.`` which will produce this:

There is a literal backtick (`) here.

The backtick delimiters surrounding a code span may include spaces -- one after the opening, one before the closing. This allows you to place literal backtick characters at the beginning or end of a code span: A single backtick in a code span: `` ` `` A backtick-delimited string in a code span: `` `foo` `` will produce:

A single backtick in a code span: `

A backtick-delimited string in a code span: `foo`

With a code span, ampersands and angle brackets are encoded as HTML entities automatically, which makes it easy to include example HTML tags. Markdown will turn this: Please don't use any `` tags. into:

Please don't use any <blink> tags.

You can write this: `—` is the decimal-encoded equivalent of `—`. to produce:

&#8212; is the decimal-encoded equivalent of &mdash;.

Images

Admittedly, it's fairly difficult to devise a "natural" syntax for placing images into a plain text document format. Markdown uses an image syntax that is intended to resemble the syntax for links, allowing for two styles: *inline* and *reference*. Inline image syntax looks like this: ![Alt text](/path/to/img.jpg) ![Alt text](/path/to/img.jpg "Optional title") That is: * An exclamation mark: `!`; * followed by a set of square brackets, containing the `alt` attribute text for the image; * followed by a set of parentheses, containing the URL or path to the image, and an optional `title` attribute enclosed in double or single quotes. Reference-style image syntax looks like this: ![Alt text][id] Where "id" is the name of a defined image reference. Image references are defined using syntax identical to link references: [id]: url/to/image "Optional title attribute" As of this writing, Markdown has no syntax for specifying the dimensions of an image; if this is important to you, you can simply use regular HTML `` tags. * * *

Miscellaneous

Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: Markdown will turn this into: http://example.com/ Automatic links for email addresses work similarly, except that Markdown will also perform a bit of randomized decimal and hex entity-encoding to help obscure your address from address-harvesting spambots. For example, Markdown will turn this: into something like this: address@exa mple.com which will render in a browser as a clickable link to "address@example.com". (This sort of entity-encoding trick will indeed fool many, if not most, address-harvesting bots, but it definitely won't fool all of them. It's better than nothing, but an address published in this way will probably eventually start receiving spam.)

Backslash Escapes

Markdown allows you to use backslash escapes to generate literal characters which would otherwise have special meaning in Markdown's formatting syntax. For example, if you wanted to surround a word with literal asterisks (instead of an HTML `` tag), you can backslashes before the asterisks, like this: \*literal asterisks\* Markdown provides backslash escapes for the following characters: \ backslash ` backtick * asterisk _ underscore {} curly braces [] square brackets () parentheses # hash mark + plus sign - minus sign (hyphen) . dot ! exclamation mark ~

Markdown: Syntax

Note: This document is itself written using Markdown; you can see the source for it by adding '.text' to the URL.


Overview

Philosophy

Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

Readability, however, is emphasized above all else. A Markdown-formatted document should be publishable as-is, as plain text, without looking like it's been marked up with tags or formatting instructions. While Markdown's syntax has been influenced by several existing text-to-HTML filters -- including Setext, atx, Textile, reStructuredText, Grutatext, and EtText -- the single biggest source of inspiration for Markdown's syntax is the format of plain text email.

To this end, Markdown's syntax is comprised entirely of punctuation characters, which punctuation characters have been carefully chosen so as to look like what they mean. E.g., asterisks around a word actually look like *emphasis*. Markdown lists look like, well, lists. Even blockquotes look like quoted passages of text, assuming you've ever used email.

Inline HTML

Markdown's syntax is intended for one purpose: to be used as a format for writing for the web.

Markdown is not a replacement for HTML, or even close to it. Its syntax is very small, corresponding only to a very small subset of HTML tags. The idea is not to create a syntax that makes it easier to insert HTML tags. In my opinion, HTML tags are already easy to insert. The idea for Markdown is to make it easy to read, write, and edit prose. HTML is a publishing format; Markdown is a writing format. Thus, Markdown's formatting syntax only addresses issues that can be conveyed in plain text.

For any markup that is not covered by Markdown's syntax, you simply use HTML itself. There's no need to preface it or delimit it to indicate that you're switching from Markdown to HTML; you just use the tags.

The only restrictions are that block-level HTML elements -- e.g. <div>, <table>, <pre>, <p>, etc. -- must be separated from surrounding content by blank lines, and the start and end tags of the block should not be indented with tabs or spaces. Markdown is smart enough not to add extra (unwanted) <p> tags around HTML block-level tags.

For example, to add an HTML table to a Markdown article:

This is a regular paragraph.

<table>
    <tr>
        <td>Foo</td>
    </tr>
</table>

This is another regular paragraph.

Note that Markdown formatting syntax is not processed within block-level HTML tags. E.g., you can't use Markdown-style *emphasis* inside an HTML block.

Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be used anywhere in a Markdown paragraph, list item, or header. If you want, you can even use HTML tags instead of Markdown formatting; e.g. if you'd prefer to use HTML <a> or <img> tags instead of Markdown's link or image syntax, go right ahead.

Unlike block-level HTML tags, Markdown syntax is processed within span-level tags.

Automatic Escaping for Special Characters

In HTML, there are two characters that demand special treatment: < and &. Left angle brackets are used to start tags; ampersands are used to denote HTML entities. If you want to use them as literal characters, you must escape them as entities, e.g. &lt;, and &amp;.

Ampersands in particular are bedeviling for web writers. If you want to write about 'AT&T', you need to write 'AT&amp;T'. You even need to escape ampersands within URLs. Thus, if you want to link to:

http://images.google.com/images?num=30&q=larry+bird

you need to encode the URL as:

http://images.google.com/images?num=30&amp;q=larry+bird

in your anchor tag href attribute. Needless to say, this is easy to forget, and is probably the single most common source of HTML validation errors in otherwise well-marked-up web sites.

Markdown allows you to use these characters naturally, taking care of all the necessary escaping for you. If you use an ampersand as part of an HTML entity, it remains unchanged; otherwise it will be translated into &amp;.

So, if you want to include a copyright symbol in your article, you can write:

&copy;

and Markdown will leave it alone. But if you write:

AT&T

Markdown will translate it to:

AT&amp;T

Similarly, because Markdown supports inline HTML, if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such. But if you write:

4 < 5

Markdown will translate it to:

4 &lt; 5

However, inside Markdown code spans and blocks, angle brackets and ampersands are always encoded automatically. This makes it easy to use Markdown to write about HTML code. (As opposed to raw HTML, which is a terrible format for writing about HTML syntax, because every single < and & in your example code needs to be escaped.)


Block Elements

Paragraphs and Line Breaks

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered blank.) Normal paragraphs should not be intended with spaces or tabs.

The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a <br /> tag.

When you do want to insert a <br /> break tag using Markdown, you end a line with two or more spaces, then type return.

Yes, this takes a tad more effort to create a <br />, but a simplistic "every line break is a <br />" rule wouldn't work for Markdown. Markdown's email-style blockquoting and multi-paragraph list items work best -- and look better -- when you format them with hard breaks.

Markdown supports two styles of headers, Setext and atx.

Setext-style headers are "underlined" using equal signs (for first-level headers) and dashes (for second-level headers). For example:

This is an H1
=============

This is an H2
-------------

Any number of underlining ='s or -'s will work.

Atx-style headers use 1-6 hash characters at the start of the line, corresponding to header levels 1-6. For example:

# This is an H1

## This is an H2

###### This is an H6

Optionally, you may "close" atx-style headers. This is purely cosmetic -- you can use this if you think it looks better. The closing hashes don't even need to match the number of hashes used to open the header. (The number of opening hashes determines the header level.) :

# This is an H1 #

## This is an H2 ##

### This is an H3 ######

Blockquotes

Markdown uses email-style > characters for blockquoting. If you're familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a > before every line:

> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
> 
> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
> id sem consectetuer libero luctus adipiscing.

Markdown allows you to be lazy and only put the > before the first line of a hard-wrapped paragraph:

> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.

Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of >:

> This is the first level of quoting.
>
> > This is nested blockquote.
>
> Back to the first level.

Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:

> ## This is a header.
> 
> 1.   This is the first list item.
> 2.   This is the second list item.
> 
> Here's some example code:
> 
>     return shell_exec("echo $input | $markdown_script");

Any decent text editor should make email-style quoting easy. For example, with BBEdit, you can make a selection and choose Increase Quote Level from the Text menu.

Lists

Markdown supports ordered (numbered) and unordered (bulleted) lists.

Unordered lists use asterisks, pluses, and hyphens -- interchangably -- as list markers:

*   Red
*   Green
*   Blue

is equivalent to:

+   Red
+   Green
+   Blue

and:

-   Red
-   Green
-   Blue

Ordered lists use numbers followed by periods:

1.  Bird
2.  McHale
3.  Parish

It's important to note that the actual numbers you use to mark the list have no effect on the HTML output Markdown produces. The HTML Markdown produces from the above list is:

<ol>
<li>Bird</li>
<li>McHale</li>
<li>Parish</li>
</ol>

If you instead wrote the list in Markdown like this:

1.  Bird
1.  McHale
1.  Parish

or even:

3. Bird
1. McHale
8. Parish

you'd get the exact same HTML output. The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to.

If you do use lazy list numbering, however, you should still start the list with the number 1. At some point in the future, Markdown may support starting ordered lists at an arbitrary number.

List markers typically start at the left margin, but may be indented by up to three spaces. List markers must be followed by one or more spaces or a tab.

To make lists look nice, you can wrap items with hanging indents:

*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    viverra nec, fringilla in, laoreet vitae, risus.
*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    Suspendisse id sem consectetuer libero luctus adipiscing.

But if you want to be lazy, you don't have to:

*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.

If list items are separated by blank lines, Markdown will wrap the items in <p> tags in the HTML output. For example, this input:

*   Bird
*   Magic

will turn into:

<ul>
<li>Bird</li>
<li>Magic</li>
</ul>

But this:

*   Bird

*   Magic

will turn into:

<ul>
<li><p>Bird</p></li>
<li><p>Magic</p></li>
</ul>

List items may consist of multiple paragraphs. Each subsequent paragraph in a list item must be intended by either 4 spaces or one tab:

1.  This is a list item with two paragraphs. Lorem ipsum dolor
    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
    mi posuere lectus.

    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
    sit amet velit.

2.  Suspendisse id sem consectetuer libero luctus adipiscing.

It looks nice if you indent every line of the subsequent paragraphs, but here again, Markdown will allow you to be lazy:

*   This is a list item with two paragraphs.

    This is the second paragraph in the list item. You're
only required to indent the first line. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit.

*   Another item in the same list.

To put a blockquote within a list item, the blockquote's > delimiters need to be indented:

*   A list item with a blockquote:

    > This is a blockquote
    > inside a list item.

To put a code block within a list item, the code block needs to be indented twice -- 8 spaces or two tabs:

*   A list item with a code block:

        <code goes here>

It's worth noting that it's possible to trigger an ordered list by accident, by writing something like this:

1986. What a great season.

In other words, a number-period-space sequence at the beginning of a line. To avoid this, you can backslash-escape the period:

1986\. What a great season.

Code Blocks

Pre-formatted code blocks are used for writing about programming or markup source code. Rather than forming normal paragraphs, the lines of a code block are interpreted literally. Markdown wraps a code block in both <pre> and <code> tags.

To produce a code block in Markdown, simply indent every line of the block by at least 4 spaces or 1 tab. For example, given this input:

This is a normal paragraph:

    This is a code block.

Markdown will generate:

<p>This is a normal paragraph:</p>

<pre><code>This is a code block.
</code></pre>

One level of indentation -- 4 spaces or 1 tab -- is removed from each line of the code block. For example, this:

Here is an example of AppleScript:

    tell application "Foo"
        beep
    end tell

will turn into:

<p>Here is an example of AppleScript:</p>

<pre><code>tell application "Foo"
    beep
end tell
</code></pre>

A code block continues until it reaches a line that is not indented (or the end of the article).

Within a code block, ampersands (&) and angle brackets (< and >) are automatically converted into HTML entities. This makes it very easy to include example HTML source code using Markdown -- just paste it and indent it, and Markdown will handle the hassle of encoding the ampersands and angle brackets. For example, this:

    <div class="footer">
        &copy; 2004 Foo Corporation
    </div>

will turn into:

<pre><code>&lt;div class="footer"&gt;
    &amp;copy; 2004 Foo Corporation
&lt;/div&gt;
</code></pre>

Regular Markdown syntax is not processed within code blocks. E.g., asterisks are just literal asterisks within a code block. This means it's also easy to use Markdown to write about Markdown's own syntax.

Horizontal Rules

You can produce a horizontal rule tag (<hr />) by placing three or more hyphens, asterisks, or underscores on a line by themselves. If you wish, you may use spaces between the hyphens or asterisks. Each of the following lines will produce a horizontal rule:

* * *

***

*****

- - -

---------------------------------------

_ _ _

Span Elements

Markdown supports two style of links: inline and reference.

In both styles, the link text is delimited by [square brackets].

To create an inline link, use a set of regular parentheses immediately after the link text's closing square bracket. Inside the parentheses, put the URL where you want the link to point, along with an optional title for the link, surrounded in quotes. For example:

This is [an example](http://example.com/ "Title") inline link.

[This link](http://example.net/) has no title attribute.

Will produce:

<p>This is <a href="http://example.com/" title="Title">
an example</a> inline link.</p>

<p><a href="http://example.net/">This link</a> has no
title attribute.</p>

If you're referring to a local resource on the same server, you can use relative paths:

See my [About](/about/) page for details.

Reference-style links use a second set of square brackets, inside which you place a label of your choosing to identify the link:

This is [an example][id] reference-style link.

You can optionally use a space to separate the sets of brackets:

This is [an example] [id] reference-style link.

Then, anywhere in the document, you define your link label like this, on a line by itself:

[id]: http://example.com/  "Optional Title Here"

That is:

  • Square brackets containing the link identifier (optionally indented from the left margin using up to three spaces);
  • followed by a colon;
  • followed by one or more spaces (or tabs);
  • followed by the URL for the link;
  • optionally followed by a title attribute for the link, enclosed in double or single quotes.

The link URL may, optionally, be surrounded by angle brackets:

[id]: <http://example.com/>  "Optional Title Here"

You can put the title attribute on the next line and use extra spaces or tabs for padding, which tends to look better with longer URLs:

[id]: http://example.com/longish/path/to/resource/here
    "Optional Title Here"

Link definitions are only used for creating links during Markdown processing, and are stripped from your document in the HTML output.

Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are not case sensitive. E.g. these two links:

[link text][a]
[link text][A]

are equivalent.

The implicit link name shortcut allows you to omit the name of the link, in which case the link text itself is used as the name. Just use an empty set of square brackets -- e.g., to link the word "Google" to the google.com web site, you could simply write:

[Google][]

And then define the link:

[Google]: http://google.com/

Because link names may contain spaces, this shortcut even works for multiple words in the link text:

Visit [Daring Fireball][] for more information.

And then define the link:

[Daring Fireball]: http://daringfireball.net/

Link definitions can be placed anywhere in your Markdown document. I tend to put them immediately after each paragraph in which they're used, but if you want, you can put them all at the end of your document, sort of like footnotes.

Here's an example of reference links in action:

I get 10 times more traffic from [Google] [1] than from
[Yahoo] [2] or [MSN] [3].

  [1]: http://google.com/        "Google"
  [2]: http://search.yahoo.com/  "Yahoo Search"
  [3]: http://search.msn.com/    "MSN Search"

Using the implicit link name shortcut, you could instead write:

I get 10 times more traffic from [Google][] than from
[Yahoo][] or [MSN][].

  [google]: http://google.com/        "Google"
  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
  [msn]:    http://search.msn.com/    "MSN Search"

Both of the above examples will produce the following HTML output:

<p>I get 10 times more traffic from <a href="http://google.com/"
title="Google">Google</a> than from
<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>

For comparison, here is the same paragraph written using Markdown's inline link style:

I get 10 times more traffic from [Google](http://google.com/ "Google")
than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
[MSN](http://search.msn.com/ "MSN Search").

The point of reference-style links is not that they're easier to write. The point is that with reference-style links, your document source is vastly more readable. Compare the above examples: using reference-style links, the paragraph itself is only 81 characters long; with inline-style links, it's 176 characters; and as raw HTML, it's 234 characters. In the raw HTML, there's more markup than there is text.

With Markdown's reference-style links, a source document much more closely resembles the final output, as rendered in a browser. By allowing you to move the markup-related metadata out of the paragraph, you can add links without interrupting the narrative flow of your prose.

Emphasis

Markdown treats asterisks (*) and underscores (_) as indicators of emphasis. Text wrapped with one * or _ will be wrapped with an HTML <em> tag; double *'s or _'s will be wrapped with an HTML <strong> tag. E.g., this input:

*single asterisks*

_single underscores_

**double asterisks**

__double underscores__

will produce:

<em>single asterisks</em>

<em>single underscores</em>

<strong>double asterisks</strong>

<strong>double underscores</strong>

You can use whichever style you prefer; the lone restriction is that the same character must be used to open and close an emphasis span.

Emphasis can be used in the middle of a word:

un*fucking*believable

But if you surround an * or _ with spaces, it'll be treated as a literal asterisk or underscore.

To produce a literal asterisk or underscore at a position where it would otherwise be used as an emphasis delimiter, you can backslash escape it:

\*this text is surrounded by literal asterisks\*

Code

To indicate a span of code, wrap it with backtick quotes (`). Unlike a pre-formatted code block, a code span indicates code within a normal paragraph. For example:

Use the `printf()` function.

will produce:

<p>Use the <code>printf()</code> function.</p>

To include a literal backtick character within a code span, you can use multiple backticks as the opening and closing delimiters:

``There is a literal backtick (`) here.``

which will produce this:

<p><code>There is a literal backtick (`) here.</code></p>

The backtick delimiters surrounding a code span may include spaces -- one after the opening, one before the closing. This allows you to place literal backtick characters at the beginning or end of a code span:

A single backtick in a code span: `` ` ``

A backtick-delimited string in a code span: `` `foo` ``

will produce:

<p>A single backtick in a code span: <code>`</code></p>

<p>A backtick-delimited string in a code span: <code>`foo`</code></p>

With a code span, ampersands and angle brackets are encoded as HTML entities automatically, which makes it easy to include example HTML tags. Markdown will turn this:

Please don't use any `<blink>` tags.

into:

<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>

You can write this:

`&#8212;` is the decimal-encoded equivalent of `&mdash;`.

to produce:

<p><code>&amp;#8212;</code> is the decimal-encoded
equivalent of <code>&amp;mdash;</code>.</p>

Images

Admittedly, it's fairly difficult to devise a "natural" syntax for placing images into a plain text document format.

Markdown uses an image syntax that is intended to resemble the syntax for links, allowing for two styles: inline and reference.

Inline image syntax looks like this:

![Alt text](/path/to/img.jpg)

![Alt text](/path/to/img.jpg "Optional title")

That is:

  • An exclamation mark: !;
  • followed by a set of square brackets, containing the alt attribute text for the image;
  • followed by a set of parentheses, containing the URL or path to the image, and an optional title attribute enclosed in double or single quotes.

Reference-style image syntax looks like this:

![Alt text][id]

Where "id" is the name of a defined image reference. Image references are defined using syntax identical to link references:

[id]: url/to/image  "Optional title attribute"

As of this writing, Markdown has no syntax for specifying the dimensions of an image; if this is important to you, you can simply use regular HTML <img> tags.


Miscellaneous

Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

<http://example.com/>

Markdown will turn this into:

<a href="http://example.com/">http://example.com/</a>

Automatic links for email addresses work similarly, except that Markdown will also perform a bit of randomized decimal and hex entity-encoding to help obscure your address from address-harvesting spambots. For example, Markdown will turn this:

<address@example.com>

into something like this:

<a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>

which will render in a browser as a clickable link to "address@example.com".

(This sort of entity-encoding trick will indeed fool many, if not most, address-harvesting bots, but it definitely won't fool all of them. It's better than nothing, but an address published in this way will probably eventually start receiving spam.)

Backslash Escapes

Markdown allows you to use backslash escapes to generate literal characters which would otherwise have special meaning in Markdown's formatting syntax. For example, if you wanted to surround a word with literal asterisks (instead of an HTML <em> tag), you can backslashes before the asterisks, like this:

\*literal asterisks\*

Markdown provides backslash escapes for the following characters:

\   backslash
`   backtick
*   asterisk
_   underscore
{}  curly braces
[]  square brackets
()  parentheses
#   hash mark
+   plus sign
-   minus sign (hyphen)
.   dot
!   exclamation mark
]] markdown_nested_blockquotes = [[ > foo > > > bar > > foo ~

foo

bar

foo

]] markdown_ordered_and_unordered_lists = [[ ## Unordered Asterisks tight: * asterisk 1 * asterisk 2 * asterisk 3 Asterisks loose: * asterisk 1 * asterisk 2 * asterisk 3 * * * Pluses tight: + Plus 1 + Plus 2 + Plus 3 Pluses loose: + Plus 1 + Plus 2 + Plus 3 * * * Minuses tight: - Minus 1 - Minus 2 - Minus 3 Minuses loose: - Minus 1 - Minus 2 - Minus 3 ## Ordered Tight: 1. First 2. Second 3. Third and: 1. One 2. Two 3. Three Loose using tabs: 1. First 2. Second 3. Third and using spaces: 1. One 2. Two 3. Three Multiple paragraphs: 1. Item 1, graf one. Item 2. graf two. The quick brown fox jumped over the lazy dog's back. 2. Item 2. 3. Item 3. ## Nested * Tab * Tab * Tab Here's another: 1. First 2. Second: * Fee * Fie * Foe 3. Third Same thing but with paragraphs: 1. First 2. Second: * Fee * Fie * Foe 3. Third ~

Unordered

Asterisks tight:

  • asterisk 1
  • asterisk 2
  • asterisk 3

Asterisks loose:

  • asterisk 1

  • asterisk 2

  • asterisk 3


Pluses tight:

  • Plus 1
  • Plus 2
  • Plus 3

Pluses loose:

  • Plus 1

  • Plus 2

  • Plus 3


Minuses tight:

  • Minus 1
  • Minus 2
  • Minus 3

Minuses loose:

  • Minus 1

  • Minus 2

  • Minus 3

Ordered

Tight:

  1. First
  2. Second
  3. Third

and:

  1. One
  2. Two
  3. Three

Loose using tabs:

  1. First

  2. Second

  3. Third

and using spaces:

  1. One

  2. Two

  3. Three

Multiple paragraphs:

  1. Item 1, graf one.

    Item 2. graf two. The quick brown fox jumped over the lazy dog's back.

  2. Item 2.

  3. Item 3.

Nested

  • Tab
    • Tab
      • Tab

Here's another:

  1. First
  2. Second:
    • Fee
    • Fie
    • Foe
  3. Third

Same thing but with paragraphs:

  1. First

  2. Second:

    • Fee
    • Fie
    • Foe
  3. Third

]] markdown_strong_and_em_together = [[ ***This is strong and em.*** So is ***this*** word. ___This is strong and em.___ So is ___this___ word. ~

This is strong and em.

So is this word.

This is strong and em.

So is this word.

]] markdown_tabs = [[ + this is a list item indented with tabs + this is a list item indented with spaces Code: this code block is indented by one tab And: this code block is indented by two tabs And: + this is an example list item indented with tabs + this is an example list item indented with spaces ~
  • this is a list item indented with tabs

  • this is a list item indented with spaces

Code:

this code block is indented by one tab

And:

    this code block is indented by two tabs

And:

+   this is an example list item
    indented with tabs

+   this is an example list item
    indented with spaces
]] markdown_tidyness = [[ > A list within a blockquote: > > * asterisk 1 > * asterisk 2 > * asterisk 3 ~

A list within a blockquote:

  • asterisk 1
  • asterisk 2
  • asterisk 3
]] bug_from_paul_chiusano_gmail_com = [[ [context-free grammar][CFG] [notated][BNF] [unrestricted grammar][] [CFG]: [BNF]: [unrestricted grammar]: ~

context-free grammar
notated
unrestricted grammar

]] setfenv(1, _G) -- Test running function local function run_tests() -- Are s1 and s2 equal except for whitespace issues. local function nonspace_equal(s1, s2) s1 = s1:gsub("[ \t\n\r]", "") s2 = s2:gsub("[ \t\n\r]", "") return s1 == s2 end -- Runs a single test local function run_single_test(name, source, desired) local result = markdown(source) local res = nonspace_equal(result, desired) if not res then print("********** TEST FAILED **********") print(name) print("----- Input:") print(source) print("----- Expected output:") print(desired) print("----- Actual output:") print(result) print("*********************************") print() end return res end -- Runs a specified test battery local function run_test(name, code) local failed, succeeded = 0,0 local data = split(code, "\n~[ \t]*\n") for i = 1, #data, 2 do if run_single_test(name, data[i], data[i+1]) then succeeded = succeeded + 1 else failed = failed + 1 end end print(string.format("%-20s %15s %5i %15s %5i", name, "Succeeded:", succeeded, "Failed:", failed)) end for _,k in ipairs(TESTS._indices) do run_test(k,TESTS[k]) end end run_tests()markdown-0.32/markdown.lua0000644000175000017500000012013211020272623014627 0ustar tassitassi#!/usr/bin/env lua --[[ # markdown.lua -- version 0.32 **Author:** Niklas Frykholm, **Date:** 31 May 2008 This is an implementation of the popular text markup language Markdown in pure Lua. Markdown can convert documents written in a simple and easy to read text format to well-formatted HTML. For a more thourough description of Markdown and the Markdown syntax, see . The original Markdown source is written in Perl and makes heavy use of advanced regular expression techniques (such as negative look-ahead, etc) which are not available in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground up. It is probably not completely bug free. If you notice any bugs, please report them to me. A unit test that exposes the error is helpful. ## Usage require "markdown" markdown(source) ``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the Markdown transformation to the specified string. ``markdown.lua`` can also be used directly from the command line: lua markdown.lua test.md Creates a file ``test.html`` with the converted content of ``test.md``. Run: lua markdown.lua -h For a description of the command-line options. ``markdown.lua`` uses the same license as Lua, the MIT license. ## License Copyright © 2008 Niklas Frykholm. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ## Version history - **0.32** -- 31 May 2008 - Fix for links containing brackets - **0.31** -- 1 Mar 2008 - Fix for link definitions followed by spaces - **0.30** -- 25 Feb 2008 - Consistent behavior with Markdown when the same link reference is reused - **0.29** -- 24 Feb 2008 - Fix for
 blocks with spaces in them
-	**0.28** -- 18 Feb 2008
	-	Fix for link encoding
-	**0.27** -- 14 Feb 2008
	-	Fix for link database links with ()
-	**0.26** -- 06 Feb 2008
	-	Fix for nested italic and bold markers
-	**0.25** -- 24 Jan 2008
	-	Fix for encoding of naked <
-	**0.24** -- 21 Jan 2008
	-	Fix for link behavior.
-	**0.23** -- 10 Jan 2008
	-	Fix for a regression bug in longer expressions in italic or bold.
-	**0.22** -- 27 Dec 2007
	-	Fix for crash when processing blocks with a percent sign in them.
-	**0.21** -- 27 Dec 2007
	- 	Fix for combined strong and emphasis tags
-	**0.20** -- 13 Oct 2007
	-	Fix for < as well in image titles, now matches Dingus behavior
-	**0.19** -- 28 Sep 2007
	-	Fix for quotation marks " and ampersands & in link and image titles.
-	**0.18** -- 28 Jul 2007
	-	Does not crash on unmatched tags (behaves like standard markdown)
-	**0.17** -- 12 Apr 2007
	-	Fix for links with %20 in them.
-	**0.16** -- 12 Apr 2007
	-	Do not require arg global to exist.
-	**0.15** -- 28 Aug 2006
	-	Better handling of links with underscores in them.
-	**0.14** -- 22 Aug 2006
	-	Bug for *`foo()`*
-	**0.13** -- 12 Aug 2006
	-	Added -l option for including stylesheet inline in document.
	-	Fixed bug in -s flag.
	-	Fixed emphasis bug.
-	**0.12** -- 15 May 2006
	-	Fixed several bugs to comply with MarkdownTest 1.0 
-	**0.11** -- 12 May 2006
	-	Fixed bug for escaping `*` and `_` inside code spans.
	-	Added license terms.
	-	Changed join() to table.concat().
-	**0.10** -- 3 May 2006
	-	Initial public release.

// Niklas
]]


-- Set up a table for holding local functions to avoid polluting the global namespace
local M = {}
local MT = {__index = _G}
setmetatable(M, MT)
setfenv(1, M)

----------------------------------------------------------------------
-- Utility functions
----------------------------------------------------------------------

-- Locks table t from changes, writes an error if someone attempts to change the table.
-- This is useful for detecting variables that have "accidently" been made global. Something
-- I tend to do all too much.
function lock(t)
	function lock_new_index(t, k, v)
		error("module has been locked -- " .. k .. " must be declared local", 2)
	end

	local mt = {__newindex = lock_new_index}
	if getmetatable(t) then mt.__index = getmetatable(t).__index end
	setmetatable(t, mt)
end

-- Returns the result of mapping the values in table t through the function f
function map(t, f)
	local out = {}
	for k,v in pairs(t) do out[k] = f(v,k) end
	return out
end

-- The identity function, useful as a placeholder.
function identity(text) return text end

-- Functional style if statement. (NOTE: no short circuit evaluation)
function iff(t, a, b) if t then return a else return b end end

-- Splits the text into an array of separate lines.
function split(text, sep)
	sep = sep or "\n"
	local lines = {}
	local pos = 1
	while true do
		local b,e = text:find(sep, pos)
		if not b then table.insert(lines, text:sub(pos)) break end
		table.insert(lines, text:sub(pos, b-1))
		pos = e + 1
	end
	return lines
end

-- Converts tabs to spaces
function detab(text)
	local tab_width = 4
	local function rep(match)
		local spaces = -match:len()
		while spaces<1 do spaces = spaces + tab_width end
		return match .. string.rep(" ", spaces)
	end
	text = text:gsub("([^\n]-)\t", rep)
	return text
end

-- Applies string.find for every pattern in the list and returns the first match
function find_first(s, patterns, index)
	local res = {}
	for _,p in ipairs(patterns) do
		local match = {s:find(p, index)}
		if #match>0 and (#res==0 or match[1] < res[1]) then res = match end
	end
	return unpack(res)
end

-- If a replacement array is specified, the range [start, stop] in the array is replaced
-- with the replacement array and the resulting array is returned. Without a replacement
-- array the section of the array between start and stop is returned.
function splice(array, start, stop, replacement)
	if replacement then
		local n = stop - start + 1
		while n > 0 do
			table.remove(array, start)
			n = n - 1
		end
		for i,v in ipairs(replacement) do
			table.insert(array, start, v)
		end
		return array
	else
		local res = {}
		for i = start,stop do
			table.insert(res, array[i])
		end
		return res
	end
end

-- Outdents the text one step.
function outdent(text)
	text = "\n" .. text
	text = text:gsub("\n  ? ? ?", "\n")
	text = text:sub(2)
	return text
end

-- Indents the text one step.
function indent(text)
	text = text:gsub("\n", "\n    ")
	return text
end

-- Does a simple tokenization of html data. Returns the data as a list of tokens. 
-- Each token is a table with a type field (which is either "tag" or "text") and
-- a text field (which contains the original token data).
function tokenize_html(html)
	local tokens = {}
	local pos = 1
	while true do
		local start = find_first(html, {"", start)
		elseif html:match("^<%?", start) then
			_,stop = html:find("?>", start)
		else
			_,stop = html:find("%b<>", start)
		end
		if not stop then
			-- error("Could not match html tag " .. html:sub(start,start+30)) 
		 	table.insert(tokens, {type="text", text=html:sub(start, start)})
			pos = start + 1
		else
			table.insert(tokens, {type="tag", text=html:sub(start, stop)})
			pos = stop + 1
		end
	end
	return tokens
end

----------------------------------------------------------------------
-- Hash
----------------------------------------------------------------------

-- This is used to "hash" data into alphanumeric strings that are unique
-- in the document. (Note that this is not cryptographic hash, the hash
-- function is not one-way.) The hash procedure is used to protect parts
-- of the document from further processing.

local HASH = {
	-- Has the hash been inited.
	inited = false,
	
	-- The unique string prepended to all hash values. This is to ensure
	-- that hash values do not accidently coincide with an actual existing
	-- string in the document.
	identifier = "",
	
	-- Counter that counts up for each new hash instance.
	counter = 0,
	
	-- Hash table.
	table = {}
}

-- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
-- in the text.
function init_hash(text)
	HASH.inited = true
	HASH.identifier = ""
	HASH.counter = 0
	HASH.table = {}
	
	local s = "HASH"
	local counter = 0
	local id
	while true do
		id  = s .. counter
		if not text:find(id, 1, true) then break end
		counter = counter + 1
	end
	HASH.identifier = id
end

-- Returns the hashed value for s.
function hash(s)
	assert(HASH.inited)
	if not HASH.table[s] then
		HASH.counter = HASH.counter + 1
		local id = HASH.identifier .. HASH.counter .. "X"
		HASH.table[s] = id
	end
	return HASH.table[s]
end

----------------------------------------------------------------------
-- Protection
----------------------------------------------------------------------

-- The protection module is used to "protect" parts of a document
-- so that they are not modified by subsequent processing steps. 
-- Protected parts are saved in a table for later unprotection

-- Protection data
local PD = {
	-- Saved blocks that have been converted
	blocks = {},

	-- Block level tags that will be protected
	tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote",
	"pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset",
	"iframe", "math", "ins", "del"}
}

-- Pattern for matching a block tag that begins and ends in the leftmost
-- column and may contain indented subtags, i.e.
-- 
-- A nested block. --
-- Nested data. --
--
function block_pattern(tag) return "\n<" .. tag .. ".-\n[ \t]*\n" end -- Pattern for matching a block tag that begins and ends with a newline function line_pattern(tag) return "\n<" .. tag .. ".-[ \t]*\n" end -- Protects the range of characters from start to stop in the text and -- returns the protected string. function protect_range(text, start, stop) local s = text:sub(start, stop) local h = hash(s) PD.blocks[h] = s text = text:sub(1,start) .. h .. text:sub(stop) return text end -- Protect every part of the text that matches any of the patterns. The first -- matching pattern is protected first, etc. function protect_matches(text, patterns) while true do local start, stop = find_first(text, patterns) if not start then break end text = protect_range(text, start, stop) end return text end -- Protects blocklevel tags in the specified text function protect(text) -- First protect potentially nested block tags text = protect_matches(text, map(PD.tags, block_pattern)) -- Then protect block tags at the line level. text = protect_matches(text, map(PD.tags, line_pattern)) -- Protect
and comment tags text = protect_matches(text, {"\n]->[ \t]*\n"}) text = protect_matches(text, {"\n[ \t]*\n"}) return text end -- Returns true if the string s is a hash resulting from protection function is_protected(s) return PD.blocks[s] end -- Unprotects the specified text by expanding all the nonces function unprotect(text) for k,v in pairs(PD.blocks) do v = v:gsub("%%", "%%%%") text = text:gsub(k, v) end return text end ---------------------------------------------------------------------- -- Block transform ---------------------------------------------------------------------- -- The block transform functions transform the text on the block level. -- They work with the text as an array of lines rather than as individual -- characters. -- Returns true if the line is a ruler of (char) characters. -- The line must contain at least three char characters and contain only spaces and -- char characters. function is_ruler_of(line, char) if not line:match("^[ %" .. char .. "]*$") then return false end if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end return true end -- Identifies the block level formatting present in the line function classify(line) local info = {line = line, text = line} if line:match("^ ") then info.type = "indented" info.outdented = line:sub(5) return info end for _,c in ipairs({'*', '-', '_', '='}) do if is_ruler_of(line, c) then info.type = "ruler" info.ruler_char = c return info end end if line == "" then info.type = "blank" return info end if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") info.type = "header" info.level = m1:len() info.text = m2 return info end if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") info.type = "list_item" info.list_type = "numeric" info.number = 0 + number info.text = text return info end if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") info.type = "list_item" info.list_type = "bullet" info.bullet = bullet info.text= text return info end if line:match("^>[ \t]?(.*)") then info.type = "blockquote" info.text = line:match("^>[ \t]?(.*)") return info end if is_protected(line) then info.type = "raw" info.html = unprotect(line) return info end info.type = "normal" return info end -- Find headers constisting of a normal line followed by a ruler and converts them to -- header entries. function headers(array) local i = 1 while i <= #array - 1 do if array[i].type == "normal" and array[i+1].type == "ruler" and (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then local info = {line = array[i].line} info.text = info.line info.type = "header" info.level = iff(array[i+1].ruler_char == "=", 1, 2) table.remove(array, i+1) array[i] = info end i = i + 1 end return array end -- Find list blocks and convert them to protected data blocks function lists(array, sublist) local function process_list(arr) local function any_blanks(arr) for i = 1, #arr do if arr[i].type == "blank" then return true end end return false end local function split_list_items(arr) local acc = {arr[1]} local res = {} for i=2,#arr do if arr[i].type == "list_item" then table.insert(res, acc) acc = {arr[i]} else table.insert(acc, arr[i]) end end table.insert(res, acc) return res end local function process_list_item(lines, block) while lines[#lines].type == "blank" do table.remove(lines) end local itemtext = lines[1].text for i=2,#lines do itemtext = itemtext .. "\n" .. outdent(lines[i].line) end if block then itemtext = block_transform(itemtext, true) if not itemtext:find("
") then itemtext = indent(itemtext) end
				return "    
  • " .. itemtext .. "
  • " else local lines = split(itemtext) lines = map(lines, classify) lines = lists(lines, true) lines = blocks_to_html(lines, true) itemtext = table.concat(lines, "\n") if not itemtext:find("
    ") then itemtext = indent(itemtext) end
    				return "    
  • " .. itemtext .. "
  • " end end local block_list = any_blanks(arr) local items = split_list_items(arr) local out = "" for _, item in ipairs(items) do out = out .. process_list_item(item, block_list) .. "\n" end if arr[1].list_type == "numeric" then return "
      \n" .. out .. "
    " else return "
      \n" .. out .. "
    " end end -- Finds the range of lines composing the first list in the array. A list -- starts with (^ list_item) or (blank list_item) and ends with -- (blank* $) or (blank normal). -- -- A sublist can start with just (list_item) does not need a blank... local function find_list(array, sublist) local function find_list_start(array, sublist) if array[1].type == "list_item" then return 1 end if sublist then for i = 1,#array do if array[i].type == "list_item" then return i end end else for i = 1, #array-1 do if array[i].type == "blank" and array[i+1].type == "list_item" then return i+1 end end end return nil end local function find_list_end(array, start) local pos = #array for i = start, #array-1 do if array[i].type == "blank" and array[i+1].type ~= "list_item" and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then pos = i-1 break end end while pos > start and array[pos].type == "blank" do pos = pos - 1 end return pos end local start = find_list_start(array, sublist) if not start then return nil end return start, find_list_end(array, start) end while true do local start, stop = find_list(array, sublist) if not start then break end local text = process_list(splice(array, start, stop)) local info = { line = text, type = "raw", html = text } array = splice(array, start, stop, {info}) end -- Convert any remaining list items to normal for _,line in ipairs(array) do if line.type == "list_item" then line.type = "normal" end end return array end -- Find and convert blockquote markers. function blockquotes(lines) local function find_blockquote(lines) local start for i,line in ipairs(lines) do if line.type == "blockquote" then start = i break end end if not start then return nil end local stop = #lines for i = start+1, #lines do if lines[i].type == "blank" or lines[i].type == "blockquote" then elseif lines[i].type == "normal" then if lines[i-1].type == "blank" then stop = i-1 break end else stop = i-1 break end end while lines[stop].type == "blank" do stop = stop - 1 end return start, stop end local function process_blockquote(lines) local raw = lines[1].text for i = 2,#lines do raw = raw .. "\n" .. lines[i].text end local bt = block_transform(raw) if not bt:find("
    ") then bt = indent(bt) end
    		return "
    \n " .. bt .. "\n
    " end while true do local start, stop = find_blockquote(lines) if not start then break end local text = process_blockquote(splice(lines, start, stop)) local info = { line = text, type = "raw", html = text } lines = splice(lines, start, stop, {info}) end return lines end -- Find and convert codeblocks. function codeblocks(lines) local function find_codeblock(lines) local start for i,line in ipairs(lines) do if line.type == "indented" then start = i break end end if not start then return nil end local stop = #lines for i = start+1, #lines do if lines[i].type ~= "indented" and lines[i].type ~= "blank" then stop = i-1 break end end while lines[stop].type == "blank" do stop = stop - 1 end return start, stop end local function process_codeblock(lines) local raw = detab(encode_code(outdent(lines[1].line))) for i = 2,#lines do raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line))) end return "
    " .. raw .. "\n
    " end while true do local start, stop = find_codeblock(lines) if not start then break end local text = process_codeblock(splice(lines, start, stop)) local info = { line = text, type = "raw", html = text } lines = splice(lines, start, stop, {info}) end return lines end -- Convert lines to html code function blocks_to_html(lines, no_paragraphs) local out = {} local i = 1 while i <= #lines do local line = lines[i] if line.type == "ruler" then table.insert(out, "
    ") elseif line.type == "raw" then table.insert(out, line.html) elseif line.type == "normal" then local s = line.line while i+1 <= #lines and lines[i+1].type == "normal" do i = i + 1 s = s .. "\n" .. lines[i].line end if no_paragraphs then table.insert(out, span_transform(s)) else table.insert(out, "

    " .. span_transform(s) .. "

    ") end elseif line.type == "header" then local s = "" .. span_transform(line.text) .. "" table.insert(out, s) else table.insert(out, line.line) end i = i + 1 end return out end -- Perform all the block level transforms function block_transform(text, sublist) local lines = split(text) lines = map(lines, classify) lines = headers(lines) lines = lists(lines, sublist) lines = codeblocks(lines) lines = blockquotes(lines) lines = blocks_to_html(lines) local text = table.concat(lines, "\n") return text end -- Debug function for printing a line array to see the result -- of partial transforms. function print_lines(lines) for i, line in ipairs(lines) do print(i, line.type, line.text or line.line) end end ---------------------------------------------------------------------- -- Span transform ---------------------------------------------------------------------- -- Functions for transforming the text at the span level. -- These characters may need to be escaped because they have a special -- meaning in markdown. escape_chars = "'\\`*_{}[]()>#+-.!'" escape_table = {} function init_escape_table() escape_table = {} for i = 1,#escape_chars do local c = escape_chars:sub(i,i) escape_table[c] = hash(c) end end -- Adds a new escape to the escape table. function add_escape(text) if not escape_table[text] then escape_table[text] = hash(text) end return escape_table[text] end -- Escape characters that should not be disturbed by markdown. function escape_special_chars(text) local tokens = tokenize_html(text) local out = "" for _, token in ipairs(tokens) do local t = token.text if token.type == "tag" then -- In tags, encode * and _ so they don't conflict with their use in markdown. t = t:gsub("%*", escape_table["*"]) t = t:gsub("%_", escape_table["_"]) else t = encode_backslash_escapes(t) end out = out .. t end return out end -- Encode backspace-escaped characters in the markdown source. function encode_backslash_escapes(t) for i=1,escape_chars:len() do local c = escape_chars:sub(i,i) t = t:gsub("\\%" .. c, escape_table[c]) end return t end -- Unescape characters that have been encoded. function unescape_special_chars(t) local tin = t for k,v in pairs(escape_table) do k = k:gsub("%%", "%%%%") t = t:gsub(v,k) end if t ~= tin then t = unescape_special_chars(t) end return t end -- Encode/escape certain characters inside Markdown code runs. -- The point is that in code, these characters are literals, -- and lose their special Markdown meanings. function encode_code(s) s = s:gsub("%&", "&") s = s:gsub("<", "<") s = s:gsub(">", ">") for k,v in pairs(escape_table) do s = s:gsub("%"..k, v) end return s end -- Handle backtick blocks. function code_spans(s) s = s:gsub("\\\\", escape_table["\\"]) s = s:gsub("\\`", escape_table["`"]) local pos = 1 while true do local start, stop = s:find("`+", pos) if not start then return s end local count = stop - start + 1 -- Find a matching numbert of backticks local estart, estop = s:find(string.rep("`", count), stop+1) local brstart = s:find("\n", stop+1) if estart and (not brstart or estart < brstart) then local code = s:sub(stop+1, estart-1) code = code:gsub("^[ \t]+", "") code = code:gsub("[ \t]+$", "") code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"]) code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"]) code = "" .. encode_code(code) .. "" code = add_escape(code) s = s:sub(1, start-1) .. code .. s:sub(estop+1) pos = start + code:len() else pos = stop + 1 end end return s end -- Encode alt text... enodes &, and ". function encode_alt(s) if not s then return s end s = s:gsub('&', '&') s = s:gsub('"', '"') s = s:gsub('<', '<') return s end -- Handle image references function images(text) local function reference_link(alt, id) alt = encode_alt(alt:match("%b[]"):sub(2,-2)) id = id:match("%[(.*)%]"):lower() if id == "" then id = text:lower() end link_database[id] = link_database[id] or {} if not link_database[id].url then return nil end local url = link_database[id].url or id url = encode_alt(url) local title = encode_alt(link_database[id].title) if title then title = " title=\"" .. title .. "\"" else title = "" end return add_escape ('' .. alt .. '") end local function inline_link(alt, link) alt = encode_alt(alt:match("%b[]"):sub(2,-2)) local url, title = link:match("%(?[ \t]*['\"](.+)['\"]") url = url or link:match("%(?%)") url = encode_alt(url) title = encode_alt(title) if title then return add_escape('' .. alt .. '') else return add_escape('' .. alt .. '') end end text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) text = text:gsub("!(%b[])(%b())", inline_link) return text end -- Handle anchor references function anchors(text) local function reference_link(text, id) text = text:match("%b[]"):sub(2,-2) id = id:match("%b[]"):sub(2,-2):lower() if id == "" then id = text:lower() end link_database[id] = link_database[id] or {} if not link_database[id].url then return nil end local url = link_database[id].url or id url = encode_alt(url) local title = encode_alt(link_database[id].title) if title then title = " title=\"" .. title .. "\"" else title = "" end return add_escape("") .. text .. add_escape("") end local function inline_link(text, link) text = text:match("%b[]"):sub(2,-2) local url, title = link:match("%(?[ \t]*['\"](.+)['\"]") title = encode_alt(title) url = url or link:match("%(?%)") or "" url = encode_alt(url) if title then return add_escape("") .. text .. "" else return add_escape("") .. text .. add_escape("") end end text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) text = text:gsub("(%b[])(%b())", inline_link) return text end -- Handle auto links, i.e. . function auto_links(text) local function link(s) return add_escape("") .. s .. "" end -- Encode chars as a mix of dec and hex entitites to (perhaps) fool -- spambots. local function encode_email_address(s) -- Use a deterministic encoding to make unit testing possible. -- Code 45% hex, 45% dec, 10% plain. local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45} local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45} local plain = {code = function(c) return c end, count = 0, rate = 0.1} local codes = {hex, dec, plain} local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end local out = "" for i = 1,s:len() do for _,code in ipairs(codes) do code.count = code.count + code.rate end if codes[1].count < codes[2].count then swap(codes,1,2) end if codes[2].count < codes[3].count then swap(codes,2,3) end if codes[1].count < codes[2].count then swap(codes,1,2) end local code = codes[1] local c = s:sub(i,i) -- Force encoding of "@" to make email address more invisible. if c == "@" and code == plain then code = codes[2] end out = out .. code.code(c) code.count = code.count - 1 end return out end local function mail(s) s = unescape_special_chars(s) local address = encode_email_address("mailto:" .. s) local text = encode_email_address(s) return add_escape("") .. text .. "" end -- links text = text:gsub("<(https?:[^'\">%s]+)>", link) text = text:gsub("<(ftp:[^'\">%s]+)>", link) -- mail text = text:gsub("%s]+)>", mail) text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) return text end -- Encode free standing amps (&) and angles (<)... note that this does not -- encode free >. function amps_and_angles(s) -- encode amps not part of &..; expression local pos = 1 while true do local amp = s:find("&", pos) if not amp then break end local semi = s:find(";", amp+1) local stop = s:find("[ \t\n&]", amp+1) if not semi or (stop and stop < semi) or (semi - amp) > 15 then s = s:sub(1,amp-1) .. "&" .. s:sub(amp+1) pos = amp+1 else pos = amp+1 end end -- encode naked <'s s = s:gsub("<([^a-zA-Z/?$!])", "<%1") s = s:gsub("<$", "<") -- what about >, nothing done in the original markdown source to handle them return s end -- Handles emphasis markers (* and _) in the text. function emphasis(text) for _, s in ipairs {"%*%*", "%_%_"} do text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "%1") text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "%1") end for _, s in ipairs {"%*", "%_"} do text = text:gsub(s .. "([^%s_])" .. s, "%1") text = text:gsub(s .. "([^%s_])" .. s, "%1") text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "%1") text = text:gsub(s .. "([^<>_]-[^<>_]-[^<>_]-)" .. s, "%1") end return text end -- Handles line break markers in the text. function line_breaks(text) return text:gsub(" +\n", "
    \n") end -- Perform all span level transforms. function span_transform(text) text = code_spans(text) text = escape_special_chars(text) text = images(text) text = anchors(text) text = auto_links(text) text = amps_and_angles(text) text = emphasis(text) text = line_breaks(text) return text end ---------------------------------------------------------------------- -- Markdown ---------------------------------------------------------------------- -- Cleanup the text by normalizing some possible variations to make further -- processing easier. function cleanup(text) -- Standardize line endings text = text:gsub("\r\n", "\n") -- DOS to UNIX text = text:gsub("\r", "\n") -- Mac to UNIX -- Convert all tabs to spaces text = detab(text) -- Strip lines with only spaces and tabs while true do local subs text, subs = text:gsub("\n[ \t]+\n", "\n\n") if subs == 0 then break end end return "\n" .. text .. "\n" end -- Strips link definitions from the text and stores the data in a lookup table. function strip_link_definitions(text) local linkdb = {} local function link_def(id, url, title) id = id:match("%[(.+)%]"):lower() linkdb[id] = linkdb[id] or {} linkdb[id].url = url or linkdb[id].url linkdb[id].title = title or linkdb[id].title return "" end local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*]+)>?[ \t]*" local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" text = text:gsub(def_title1, link_def) text = text:gsub(def_title2, link_def) text = text:gsub(def_title3, link_def) text = text:gsub(def_no_title, link_def) return text, linkdb end link_database = {} -- Main markdown processing function function markdown(text) init_hash(text) init_escape_table() text = cleanup(text) text = protect(text) text, link_database = strip_link_definitions(text) text = block_transform(text) text = unescape_special_chars(text) return text end ---------------------------------------------------------------------- -- End of module ---------------------------------------------------------------------- setfenv(1, _G) M.lock(M) -- Expose markdown function to the world markdown = M.markdown -- Class for parsing command-line options local OptionParser = {} OptionParser.__index = OptionParser -- Creates a new option parser function OptionParser:new() local o = {short = {}, long = {}} setmetatable(o, self) return o end -- Calls f() whenever a flag with specified short and long name is encountered function OptionParser:flag(short, long, f) local info = {type = "flag", f = f} if short then self.short[short] = info end if long then self.long[long] = info end end -- Calls f(param) whenever a parameter flag with specified short and long name is encountered function OptionParser:param(short, long, f) local info = {type = "param", f = f} if short then self.short[short] = info end if long then self.long[long] = info end end -- Calls f(v) for each non-flag argument function OptionParser:arg(f) self.arg = f end -- Runs the option parser for the specified set of arguments. Returns true if all arguments -- where successfully parsed and false otherwise. function OptionParser:run(args) local pos = 1 while pos <= #args do local arg = args[pos] if arg == "--" then for i=pos+1,#args do if self.arg then self.arg(args[i]) end return true end end if arg:match("^%-%-") then local info = self.long[arg:sub(3)] if not info then print("Unknown flag: " .. arg) return false end if info.type == "flag" then info.f() pos = pos + 1 else param = args[pos+1] if not param then print("No parameter for flag: " .. arg) return false end info.f(param) pos = pos+2 end elseif arg:match("^%-") then for i=2,arg:len() do local c = arg:sub(i,i) local info = self.short[c] if not info then print("Unknown flag: -" .. c) return false end if info.type == "flag" then info.f() else if i == arg:len() then param = args[pos+1] if not param then print("No parameter for flag: -" .. c) return false end info.f(param) pos = pos + 1 else param = arg:sub(i+1) info.f(param) end break end end pos = pos + 1 else if self.arg then self.arg(arg) end pos = pos + 1 end end return true end -- Handles the case when markdown is run from the command line local function run_command_line(arg) -- Generate output for input s given options local function run(s, options) s = markdown(s) if not options.wrap_header then return s end local header = "" if options.header then local f = io.open(options.header) or error("Could not open file: " .. options.header) header = f:read("*a") f:close() else header = [[ TITLE ]] local title = options.title or s:match("

    (.-)

    ") or s:match("

    (.-)

    ") or s:match("

    (.-)

    ") or "Untitled" header = header:gsub("TITLE", title) if options.inline_style then local style = "" local f = io.open(options.stylesheet) if f then style = f:read("*a") f:close() else error("Could not include style sheet " .. options.stylesheet .. ": File not found") end header = header:gsub('', "") else header = header:gsub("STYLESHEET", options.stylesheet) end header = header:gsub("CHARSET", options.charset) end local footer = "" if options.footer then local f = io.open(options.footer) or error("Could not open file: " .. options.footer) footer = f:read("*a") f:close() end return header .. s .. footer end -- Generate output path name from input path name given options. local function outpath(path, options) if options.append then return path .. ".html" end local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end return path .. ".html" end -- Default commandline options local options = { wrap_header = true, header = nil, footer = nil, charset = "utf-8", title = nil, stylesheet = "default.css", inline_style = false } local help = [[ Usage: markdown.lua [OPTION] [FILE] Runs the markdown text markup to HTML converter on each file specified on the command line. If no files are specified, runs on standard input. No header: -n, --no-wrap Don't wrap the output in ... tags. Custom header: -e, --header FILE Use content of FILE for header. -f, --footer FILE Use content of FILE for footer. Generated header: -c, --charset SET Specifies charset (default utf-8). -i, --title TITLE Specifies title (default from first

    tag). -s, --style STYLE Specifies style sheet file (default default.css). -l, --inline-style Include the style sheet file inline in the header. Generated files: -a, --append Append .html extension (instead of replacing). Other options: -h, --help Print this help text. -t, --test Run the unit tests. ]] local run_stdin = true local op = OptionParser:new() op:flag("n", "no-wrap", function () options.wrap_header = false end) op:param("e", "header", function (x) options.header = x end) op:param("f", "footer", function (x) options.footer = x end) op:param("c", "charset", function (x) options.charset = x end) op:param("i", "title", function(x) options.title = x end) op:param("s", "style", function(x) options.stylesheet = x end) op:flag("l", "inline-style", function(x) options.inline_style = true end) op:flag("a", "append", function() options.append = true end) op:flag("t", "test", function() local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") local f = io.open(n) if f then f:close() dofile(n) else error("Cannot find markdown-tests.lua") end run_stdin = false end) op:flag("h", "help", function() print(help) run_stdin = false end) op:arg(function(path) local file = io.open(path) or error("Could not open file: " .. path) local s = file:read("*a") file:close() s = run(s, options) file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options)) file:write(s) file:close() run_stdin = false end ) if not op:run(arg) then print(help) run_stdin = false end if run_stdin then local s = io.read("*a") s = run(s, options) io.write(s) end end -- If we are being run from the command-line, act accordingly if arg and arg[0]:find("markdown%.lua$") then run_command_line(arg) else return markdown end