1require 'rubygems'
2require 'minitest/autorun'
3require 'pp'
4
5require 'rdoc'
6require 'rdoc/markdown'
7
8class TestRDocMarkdownTest < RDoc::TestCase
9
10  MARKDOWN_TEST_PATH = File.expand_path '../MarkdownTest_1.0.3/', __FILE__
11
12  def setup
13    super
14
15    @parser = RDoc::Markdown.new
16  end
17
18  def mu_pp obj
19    s = ''
20    s = PP.pp obj, s
21    s.force_encoding Encoding.default_external if defined? Encoding
22    s.chomp
23  end
24
25  def test_amps_and_angle_encoding
26    input = File.read "#{MARKDOWN_TEST_PATH}/Amps and angle encoding.text"
27
28    doc = @parser.parse input
29
30    expected =
31      doc(
32        para("AT&T has an ampersand in their name."),
33        para("AT&T is another way to write it."),
34        para("This & that."),
35        para("4 < 5."),
36        para("6 > 5."),
37        para("Here's a {link}[http://example.com/?foo=1&bar=2] with " +
38             "an ampersand in the URL."),
39        para("Here's a link with an amersand in the link text: " +
40             "{AT&T}[http://att.com/]."),
41        para("Here's an inline {link}[/script?foo=1&bar=2]."),
42        para("Here's an inline {link}[/script?foo=1&bar=2]."))
43
44    assert_equal expected, doc
45  end
46
47  def test_auto_links
48    input = File.read "#{MARKDOWN_TEST_PATH}/Auto links.text"
49
50    doc = @parser.parse input
51
52    # TODO verify rdoc auto-links too
53    expected =
54      doc(
55        para("Link: http://example.com/."),
56        para("With an ampersand: http://example.com/?foo=1&bar=2"),
57        list(:BULLET,
58          item(nil, para("In a list?")),
59          item(nil, para("http://example.com/")),
60          item(nil, para("It should."))),
61        block(
62          para("Blockquoted: http://example.com/")),
63        para("Auto-links should not occur here: " +
64             "<code><http://example.com/></code>"),
65        verb("or here: <http://example.com/>\n"))
66
67    assert_equal expected, doc
68  end
69
70  def test_backslash_escapes
71    input = File.read "#{MARKDOWN_TEST_PATH}/Backslash escapes.text"
72
73    doc = @parser.parse input
74
75    expected =
76      doc(
77        para("These should all get escaped:"),
78
79        para("Backslash: \\"),
80        para("Backtick: `"),
81        para("Asterisk: *"),
82        para("Underscore: _"),
83        para("Left brace: {"),
84        para("Right brace: }"),
85        para("Left bracket: ["),
86        para("Right bracket: ]"),
87        para("Left paren: ("),
88        para("Right paren: )"),
89        para("Greater-than: >"),
90        para("Hash: #"),
91        para("Period: ."),
92        para("Bang: !"),
93        para("Plus: +"),
94        para("Minus: -"),
95
96        para("These should not, because they occur within a code block:"),
97
98        verb("Backslash: \\\\\n",
99             "\n",
100             "Backtick: \\`\n",
101             "\n",
102             "Asterisk: \\*\n",
103             "\n",
104             "Underscore: \\_\n",
105             "\n",
106             "Left brace: \\{\n",
107             "\n",
108             "Right brace: \\}\n",
109             "\n",
110             "Left bracket: \\[\n",
111             "\n",
112             "Right bracket: \\]\n",
113             "\n",
114             "Left paren: \\(\n",
115             "\n",
116             "Right paren: \\)\n",
117             "\n",
118             "Greater-than: \\>\n",
119             "\n",
120             "Hash: \\#\n",
121             "\n",
122             "Period: \\.\n",
123             "\n",
124             "Bang: \\!\n",
125             "\n",
126             "Plus: \\+\n",
127             "\n",
128             "Minus: \\-\n"),
129
130        para("Nor should these, which occur in code spans:"),
131
132        para("Backslash: <code>\\\\</code>"),
133        para("Backtick: <code>\\`</code>"),
134        para("Asterisk: <code>\\*</code>"),
135        para("Underscore: <code>\\_</code>"),
136        para("Left brace: <code>\\{</code>"),
137        para("Right brace: <code>\\}</code>"),
138        para("Left bracket: <code>\\[</code>"),
139        para("Right bracket: <code>\\]</code>"),
140        para("Left paren: <code>\\(</code>"),
141        para("Right paren: <code>\\)</code>"),
142        para("Greater-than: <code>\\></code>"),
143        para("Hash: <code>\\#</code>"),
144        para("Period: <code>\\.</code>"),
145        para("Bang: <code>\\!</code>"),
146        para("Plus: <code>\\+</code>"),
147        para("Minus: <code>\\-</code>"),
148
149        para("These should get escaped, even though they're matching pairs for\n" +
150             "other Markdown constructs:"),
151
152        para("\*asterisks\*"),
153        para("\_underscores\_"),
154        para("`backticks`"),
155
156        para("This is a code span with a literal backslash-backtick " +
157             "sequence: <code>\\`</code>"),
158
159        para("This is a tag with unescaped backticks " +
160             "<span attr='`ticks`'>bar</span>."),
161
162        para("This is a tag with backslashes " +
163             "<span attr='\\\\backslashes\\\\'>bar</span>."))
164
165    assert_equal expected, doc
166  end
167
168  def test_blockquotes_with_code_blocks
169    input = File.read "#{MARKDOWN_TEST_PATH}/Blockquotes with code blocks.text"
170
171    doc = @parser.parse input
172
173    expected =
174      doc(
175        block(
176          para("Example:"),
177          verb("sub status {\n",
178               "    print \"working\";\n",
179               "}\n"),
180          para("Or:"),
181          verb("sub status {\n",
182               "    return \"working\";\n",
183               "}\n")))
184
185    assert_equal expected, doc
186  end
187
188  def test_code_blocks
189    input = File.read "#{MARKDOWN_TEST_PATH}/Code Blocks.text"
190
191    doc = @parser.parse input
192
193    expected =
194      doc(
195        verb("code block on the first line\n"),
196        para("Regular text."),
197
198        verb("code block indented by spaces\n"),
199        para("Regular text."),
200
201        verb("the lines in this block  \n",
202             "all contain trailing spaces  \n"),
203        para("Regular Text."),
204
205        verb("code block on the last line\n"))
206
207    assert_equal expected, doc
208  end
209
210  def test_code_spans
211    input = File.read "#{MARKDOWN_TEST_PATH}/Code Spans.text"
212
213    doc = @parser.parse input
214
215    expected = doc(
216      para("<code><test a=\"</code> content of attribute <code>\"></code>"),
217      para("Fix for backticks within HTML tag: " +
218           "<span attr='`ticks`'>like this</span>"),
219      para("Here's how you put <code>`backticks`</code> in a code span."))
220
221    assert_equal expected, doc
222  end
223
224  def test_hard_wrapped_paragraphs_with_list_like_lines
225    input = File.read "#{MARKDOWN_TEST_PATH}/Hard-wrapped paragraphs with list-like lines.text"
226
227    doc = @parser.parse input
228
229    expected =
230      doc(
231        para("In Markdown 1.0.0 and earlier. Version\n" +
232             "8. This line turns into a list item.\n"   +
233             "Because a hard-wrapped line in the\n"     +
234             "middle of a paragraph looked like a\n"    +
235             "list item."),
236        para("Here's one with a bullet.\n" +
237             "* criminey."))
238
239    assert_equal expected, doc
240  end
241
242  def test_horizontal_rules
243    input = File.read "#{MARKDOWN_TEST_PATH}/Horizontal rules.text"
244
245    doc = @parser.parse input
246
247    expected =
248      doc(
249        para("Dashes:"),
250
251        rule(1),
252        rule(1),
253        rule(1),
254        rule(1),
255        verb("---\n"),
256
257        rule(1),
258        rule(1),
259        rule(1),
260        rule(1),
261        verb("- - -\n"),
262
263        para("Asterisks:"),
264
265        rule(1),
266        rule(1),
267        rule(1),
268        rule(1),
269        verb("***\n"),
270
271        rule(1),
272        rule(1),
273        rule(1),
274        rule(1),
275        verb("* * *\n"),
276
277        para("Underscores:"),
278
279        rule(1),
280        rule(1),
281        rule(1),
282        rule(1),
283        verb("___\n"),
284
285        rule(1),
286        rule(1),
287        rule(1),
288        rule(1),
289        verb("_ _ _\n"))
290
291    assert_equal expected, doc
292  end
293
294  def test_inline_html_advanced
295    input = File.read "#{MARKDOWN_TEST_PATH}/Inline HTML (Advanced).text"
296
297    @parser.html = true
298
299    doc = @parser.parse input
300
301    expected =
302      doc(
303        para("Simple block on one line:"),
304        raw("<div>foo</div>"),
305        para("And nested without indentation:"),
306        raw(<<-RAW.chomp))
307<div>
308<div>
309<div>
310foo
311</div>
312<div style=">"/>
313</div>
314<div>bar</div>
315</div>
316        RAW
317
318    assert_equal expected, doc
319  end
320
321  def test_inline_html_simple
322    input = File.read "#{MARKDOWN_TEST_PATH}/Inline HTML (Simple).text"
323
324    @parser.html = true
325
326    doc = @parser.parse input
327
328    expected =
329      doc(
330       para("Here's a simple block:"),
331       raw("<div>\n\tfoo\n</div>"),
332
333       para("This should be a code block, though:"),
334       verb("<div>\n",
335            "\tfoo\n",
336            "</div>\n"),
337
338       para("As should this:"),
339       verb("<div>foo</div>\n"),
340
341       para("Now, nested:"),
342       raw("<div>\n\t<div>\n\t\t<div>\n\t\t\tfoo\n" +
343           "\t\t</div>\n\t</div>\n</div>"),
344
345       para("This should just be an HTML comment:"),
346       raw("<!-- Comment -->"),
347
348       para("Multiline:"),
349       raw("<!--\nBlah\nBlah\n-->"),
350
351       para("Code block:"),
352       verb("<!-- Comment -->\n"),
353
354       para("Just plain comment, with trailing spaces on the line:"),
355       raw("<!-- foo -->"),
356
357       para("Code:"),
358       verb("<hr />\n"),
359
360       para("Hr's:"),
361       raw("<hr>"),
362       raw("<hr/>"),
363       raw("<hr />"),
364
365       raw("<hr>"),
366       raw("<hr/>"),
367       raw("<hr />"),
368
369       raw("<hr class=\"foo\" id=\"bar\" />"),
370       raw("<hr class=\"foo\" id=\"bar\"/>"),
371       raw("<hr class=\"foo\" id=\"bar\" >"))
372
373    assert_equal expected, doc
374  end
375
376  def test_inline_html_comments
377    input = File.read "#{MARKDOWN_TEST_PATH}/Inline HTML comments.text"
378
379    doc = @parser.parse input
380
381    expected =
382      doc(
383        para("Paragraph one."),
384
385        raw("<!-- This is a simple comment -->"),
386
387        raw("<!--\n\tThis is another comment.\n-->"),
388
389        para("Paragraph two."),
390
391        raw("<!-- one comment block -- -- with two comments -->"),
392
393        para("The end."))
394
395    assert_equal expected, doc
396  end
397
398  def test_links_inline_style
399    input = File.read "#{MARKDOWN_TEST_PATH}/Links, inline style.text"
400
401    doc = @parser.parse input
402
403    expected =
404      doc(
405        para("Just a {URL}[/url/]."),
406        para("{URL and title}[/url/]."),
407        para("{URL and title}[/url/]."),
408        para("{URL and title}[/url/]."),
409        para("{URL and title}[/url/]."),
410        para("{Empty}[]."))
411
412    assert_equal expected, doc
413  end
414
415  def test_links_reference_style
416    input = File.read "#{MARKDOWN_TEST_PATH}/Links, reference style.text"
417
418    doc = @parser.parse input
419
420    expected =
421      doc(
422        para("Foo {bar}[/url/]."),
423        para("Foo {bar}[/url/]."),
424        para("Foo {bar}[/url/]."),
425
426        para("With {embedded [brackets]}[/url/]."),
427
428        para("Indented {once}[/url]."),
429        para("Indented {twice}[/url]."),
430        para("Indented {thrice}[/url]."),
431        para("Indented [four][] times."),
432
433        verb("[four]: /url\n"),
434
435        rule(1),
436
437        para("{this}[foo] should work"),
438        para("So should {this}[foo]."),
439        para("And {this}[foo]."),
440        para("And {this}[foo]."),
441        para("And {this}[foo]."),
442
443        para("But not [that] []."),
444        para("Nor [that][]."),
445        para("Nor [that]."),
446
447        para("[Something in brackets like {this}[foo] should work]"),
448        para("[Same with {this}[foo].]"),
449
450        para("In this case, {this}[/somethingelse/] points to something else."),
451        para("Backslashing should suppress [this] and [this]."),
452
453        rule(1),
454
455        para("Here's one where the {link breaks}[/url/] across lines."),
456        para("Here's another where the {link breaks}[/url/] across lines, " +
457             "but with a line-ending space."))
458
459    assert_equal expected, doc
460  end
461
462  def test_links_shortcut_references
463    input = File.read "#{MARKDOWN_TEST_PATH}/Links, shortcut references.text"
464
465    doc = @parser.parse input
466
467    expected =
468      doc(
469        para("This is the {simple case}[/simple]."),
470        para("This one has a {line break}[/foo]."),
471        para("This one has a {line break}[/foo] with a line-ending space."),
472        para("{this}[/that] and the {other}[/other]"))
473
474    assert_equal expected, doc
475  end
476
477  def test_literal_quotes_in_titles
478    input = File.read "#{MARKDOWN_TEST_PATH}/Literal quotes in titles.text"
479
480    doc = @parser.parse input
481
482    # TODO support title attribute
483    expected =
484      doc(
485        para("Foo {bar}[/url/]."),
486        para("Foo {bar}[/url/]."))
487
488    assert_equal expected, doc
489  end
490
491  def test_markdown_documentation_basics
492    input = File.read "#{MARKDOWN_TEST_PATH}/Markdown Documentation - Basics.text"
493
494    doc = @parser.parse input
495
496    expected =
497      doc(
498        head(1, "Markdown: Basics"),
499
500        raw(<<-RAW.chomp),
501<ul id="ProjectSubmenu">
502    <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
503    <li><a class="selected" title="Markdown Basics">Basics</a></li>
504    <li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
505    <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
506    <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
507</ul>
508        RAW
509
510        head(2, "Getting the Gist of Markdown's Formatting Syntax"),
511
512        para("This page offers a brief overview of what it's like to use Markdown.\n" +
513             "The {syntax page}[/projects/markdown/syntax] provides complete, detailed documentation for\n" +
514             "every feature, but Markdown should be very easy to pick up simply by\n" +
515             "looking at a few examples of it in action. The examples on this page\n" +
516             "are written in a before/after style, showing example syntax and the\n" +
517             "HTML output produced by Markdown."),
518
519        para("It's also helpful to simply try Markdown out; the {Dingus}[/projects/markdown/dingus] is a\n" +
520             "web application that allows you type your own Markdown-formatted text\n" +
521             "and translate it to XHTML."),
522
523        para("<b>Note:</b> This document is itself written using Markdown; you\n" +
524             "can {see the source for it by adding '.text' to the URL}[/projects/markdown/basics.text]."),
525
526        head(2, "Paragraphs, Headers, Blockquotes"),
527
528        para("A paragraph is simply one or more consecutive lines of text, separated\n" +
529             "by one or more blank lines. (A blank line is any line that looks like a\n" +
530             "blank line -- a line containing nothing spaces or tabs is considered\n" +
531             "blank.) Normal paragraphs should not be intended with spaces or tabs."),
532
533        para("Markdown offers two styles of headers: _Setext_ and _atx_.\n" +
534             "Setext-style headers for <code><h1></code> and <code><h2></code> are created by\n" +
535             "\"underlining\" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.\n" +
536             "To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the\n" +
537             "beginning of the line -- the number of hashes equals the resulting\n" +
538             "HTML header level."),
539
540        para("Blockquotes are indicated using email-style '<code>></code>' angle brackets."),
541
542        para("Markdown:"),
543
544        verb("A First Level Header\n",
545             "====================\n",
546             "\n",
547             "A Second Level Header\n",
548             "---------------------\n",
549             "\n",
550             "Now is the time for all good men to come to\n",
551             "the aid of their country. This is just a\n",
552             "regular paragraph.\n",
553             "\n",
554             "The quick brown fox jumped over the lazy\n",
555             "dog's back.\n",
556             "\n",
557             "### Header 3\n",
558             "\n",
559             "> This is a blockquote.\n",
560             "> \n",
561             "> This is the second paragraph in the blockquote.\n",
562             ">\n",
563             "> ## This is an H2 in a blockquote\n"),
564
565        para("Output:"),
566
567        verb("<h1>A First Level Header</h1>\n",
568             "\n",
569             "<h2>A Second Level Header</h2>\n",
570             "\n",
571             "<p>Now is the time for all good men to come to\n",
572             "the aid of their country. This is just a\n",
573             "regular paragraph.</p>\n",
574             "\n",
575             "<p>The quick brown fox jumped over the lazy\n",
576             "dog's back.</p>\n",
577             "\n",
578             "<h3>Header 3</h3>\n",
579             "\n",
580             "<blockquote>\n",
581             "    <p>This is a blockquote.</p>\n",
582             "\n",
583             "    <p>This is the second paragraph in the blockquote.</p>\n",
584             "\n",
585             "    <h2>This is an H2 in a blockquote</h2>\n",
586             "</blockquote>\n"),
587
588        head(3, "Phrase Emphasis"),
589        para("Markdown uses asterisks and underscores to indicate spans of emphasis."),
590
591        para("Markdown:"),
592
593        verb("Some of these words *are emphasized*.\n",
594             "Some of these words _are emphasized also_.\n",
595             "\n",
596             "Use two asterisks for **strong emphasis**.\n",
597             "Or, if you prefer, __use two underscores instead__.\n"),
598
599        para("Output:"),
600
601        verb("<p>Some of these words <em>are emphasized</em>.\n",
602             "Some of these words <em>are emphasized also</em>.</p>\n",
603             "\n",
604             "<p>Use two asterisks for <strong>strong emphasis</strong>.\n",
605             "Or, if you prefer, <strong>use two underscores instead</strong>.</p>\n"),
606
607        head(2, "Lists"),
608
609        para("Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,\n" +
610             "<code>+</code>, and <code>-</code>) as list markers. These three markers are\n" +
611             "interchangable; this:"),
612
613        verb("*   Candy.\n",
614             "*   Gum.\n",
615             "*   Booze.\n"),
616
617        para("this:"),
618
619        verb("+   Candy.\n",
620             "+   Gum.\n",
621             "+   Booze.\n"),
622
623        para("and this:"),
624
625        verb("-   Candy.\n",
626             "-   Gum.\n",
627             "-   Booze.\n"),
628
629        para("all produce the same output:"),
630
631        verb("<ul>\n",
632             "<li>Candy.</li>\n",
633             "<li>Gum.</li>\n",
634             "<li>Booze.</li>\n",
635             "</ul>\n"),
636
637        para("Ordered (numbered) lists use regular numbers, followed by periods, as\n" +
638             "list markers:"),
639
640        verb("1.  Red\n",
641             "2.  Green\n",
642             "3.  Blue\n"),
643
644        para("Output:"),
645
646        verb("<ol>\n",
647             "<li>Red</li>\n",
648             "<li>Green</li>\n",
649             "<li>Blue</li>\n",
650             "</ol>\n"),
651
652        para("If you put blank lines between items, you'll get <code><p></code> tags for the\n" +
653             "list item text. You can create multi-paragraph list items by indenting\n" +
654             "the paragraphs by 4 spaces or 1 tab:"),
655
656        verb("*   A list item.\n",
657             "\n",
658             "    With multiple paragraphs.\n",
659             "\n",
660             "*   Another item in the list.\n"),
661
662        para("Output:"),
663
664        verb("<ul>\n",
665             "<li><p>A list item.</p>\n",
666             "<p>With multiple paragraphs.</p></li>\n",
667             "<li><p>Another item in the list.</p></li>\n",
668             "</ul>\n"),
669
670        head(3, "Links"),
671
672        para("Markdown supports two styles for creating links: _inline_ and\n" +
673             "_reference_. With both styles, you use square brackets to delimit the\n" +
674             "text you want to turn into a link."),
675
676        para("Inline-style links use parentheses immediately after the link text.\n" +
677             "For example:"),
678
679        verb("This is an [example link](http://example.com/).\n"),
680
681        para("Output:"),
682
683        verb("<p>This is an <a href=\"http://example.com/\">\n",
684             "example link</a>.</p>\n"),
685
686        para("Optionally, you may include a title attribute in the parentheses:"),
687
688        verb("This is an [example link](http://example.com/ \"With a Title\").\n"),
689
690        para("Output:"),
691
692        verb("<p>This is an <a href=\"http://example.com/\" title=\"With a Title\">\n",
693             "example link</a>.</p>\n"),
694
695        para("Reference-style links allow you to refer to your links by names, which\n" +
696             "you define elsewhere in your document:"),
697
698        verb("I get 10 times more traffic from [Google][1] than from\n",
699             "[Yahoo][2] or [MSN][3].\n",
700             "\n",
701             "[1]: http://google.com/        \"Google\"\n",
702             "[2]: http://search.yahoo.com/  \"Yahoo Search\"\n",
703             "[3]: http://search.msn.com/    \"MSN Search\"\n"),
704
705        para("Output:"),
706
707        verb("<p>I get 10 times more traffic from <a href=\"http://google.com/\"\n",
708             "title=\"Google\">Google</a> than from <a href=\"http://search.yahoo.com/\"\n",
709             "title=\"Yahoo Search\">Yahoo</a> or <a href=\"http://search.msn.com/\"\n",
710             "title=\"MSN Search\">MSN</a>.</p>\n"),
711
712        para("The title attribute is optional. Link names may contain letters,\n" +
713             "numbers and spaces, but are _not_ case sensitive:"),
714
715        verb("I start my morning with a cup of coffee and\n",
716             "[The New York Times][NY Times].\n",
717             "\n",
718             "[ny times]: http://www.nytimes.com/\n"),
719
720        para("Output:"),
721
722        verb("<p>I start my morning with a cup of coffee and\n",
723             "<a href=\"http://www.nytimes.com/\">The New York Times</a>.</p>\n"),
724
725        head(3, "Images"),
726
727        para("Image syntax is very much like link syntax."),
728
729        para("Inline (titles are optional):"),
730
731        verb("![alt text](/path/to/img.jpg \"Title\")\n"),
732
733        para("Reference-style:"),
734
735        verb("![alt text][id]\n",
736             "\n",
737             "[id]: /path/to/img.jpg \"Title\"\n"),
738
739        para("Both of the above examples produce the same output:"),
740
741        verb("<img src=\"/path/to/img.jpg\" alt=\"alt text\" title=\"Title\" />\n"),
742
743        head(3, "Code"),
744
745        para("In a regular paragraph, you can create code span by wrapping text in\n" +
746             "backtick quotes. Any ampersands (<code>&</code>) and angle brackets (<code><</code> or\n" +
747             "<code>></code>) will automatically be translated into HTML entities. This makes\n" +
748             "it easy to use Markdown to write about HTML example code:"),
749
750        verb(
751             "I strongly recommend against using any `<blink>` tags.\n",
752             "\n",
753             "I wish SmartyPants used named entities like `&mdash;`\n",
754             "instead of decimal-encoded entites like `&#8212;`.\n"),
755
756        para("Output:"),
757
758        verb("<p>I strongly recommend against using any\n",
759             "<code>&lt;blink&gt;</code> tags.</p>\n",
760             "\n",
761             "<p>I wish SmartyPants used named entities like\n",
762             "<code>&amp;mdash;</code> instead of decimal-encoded\n",
763             "entites like <code>&amp;#8212;</code>.</p>\n"),
764
765        para("To specify an entire block of pre-formatted code, indent every line of\n" +
766             "the block by 4 spaces or 1 tab. Just like with code spans, <code>&</code>, <code><</code>,\n" +
767             "and <code>></code> characters will be escaped automatically."),
768
769        para("Markdown:"),
770
771        verb("If you want your page to validate under XHTML 1.0 Strict,\n",
772             "you've got to put paragraph tags in your blockquotes:\n",
773             "\n",
774             "    <blockquote>\n",
775             "        <p>For example.</p>\n",
776             "    </blockquote>\n"),
777
778        para("Output:"),
779
780        verb("<p>If you want your page to validate under XHTML 1.0 Strict,\n",
781             "you've got to put paragraph tags in your blockquotes:</p>\n",
782             "\n",
783             "<pre><code>&lt;blockquote&gt;\n",
784             "    &lt;p&gt;For example.&lt;/p&gt;\n",
785             "&lt;/blockquote&gt;\n",
786             "</code></pre>\n"))
787
788    assert_equal expected, doc
789  end
790
791  def test_markdown_documentation_syntax
792    input = File.read "#{MARKDOWN_TEST_PATH}/Markdown Documentation - Syntax.text"
793
794    doc = @parser.parse input
795
796    expected =
797      doc(
798        head(1, "Markdown: Syntax"),
799
800        raw(<<-RAW.chomp),
801<ul id="ProjectSubmenu">
802    <li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
803    <li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
804    <li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
805    <li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
806    <li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
807</ul>
808        RAW
809
810        list(:BULLET,
811          item(nil,
812            para("{Overview}[#overview]"),
813            list(:BULLET,
814              item(nil,
815                para("{Philosophy}[#philosophy]")),
816              item(nil,
817                para("{Inline HTML}[#html]")),
818              item(nil,
819                para("{Automatic Escaping for Special Characters}[#autoescape]")))),
820          item(nil,
821            para("{Block Elements}[#block]"),
822            list(:BULLET,
823              item(nil,
824                para("{Paragraphs and Line Breaks}[#p]")),
825              item(nil,
826                para("{Headers}[#header]")),
827              item(nil,
828                para("{Blockquotes}[#blockquote]")),
829              item(nil,
830                para("{Lists}[#list]")),
831              item(nil,
832                para("{Code Blocks}[#precode]")),
833              item(nil,
834                para("{Horizontal Rules}[#hr]")))),
835          item(nil,
836            para("{Span Elements}[#span]"),
837            list(:BULLET,
838              item(nil,
839                para("{Links}[#link]")),
840              item(nil,
841                para("{Emphasis}[#em]")),
842              item(nil,
843                para("{Code}[#code]")),
844              item(nil,
845                para("{Images}[#img]")))),
846          item(nil,
847            para("{Miscellaneous}[#misc]"),
848            list(:BULLET,
849              item(nil,
850                para("{Backslash Escapes}[#backslash]")),
851              item(nil,
852                para("{Automatic Links}[#autolink]"))))),
853
854        para("<b>Note:</b> This document is itself written using Markdown; you\n" +
855             "can {see the source for it by adding '.text' to the URL}[/projects/markdown/syntax.text]."),
856
857        rule(1),
858
859        raw("<h2 id=\"overview\">Overview</h2>"),
860
861        raw("<h3 id=\"philosophy\">Philosophy</h3>"),
862
863        para("Markdown is intended to be as easy-to-read and easy-to-write as is feasible."),
864
865        para("Readability, however, is emphasized above all else. A Markdown-formatted\n" +
866             "document should be publishable as-is, as plain text, without looking\n" +
867             "like it's been marked up with tags or formatting instructions. While\n" +
868             "Markdown's syntax has been influenced by several existing text-to-HTML\n" +
869             "filters -- including {Setext}[http://docutils.sourceforge.net/mirror/setext.html], {atx}[http://www.aaronsw.com/2002/atx/], {Textile}[http://textism.com/tools/textile/], {reStructuredText}[http://docutils.sourceforge.net/rst.html],\n" +
870             "{Grutatext}[http://www.triptico.com/software/grutatxt.html], and {EtText}[http://ettext.taint.org/doc/] -- the single biggest source of\n" +
871             "inspiration for Markdown's syntax is the format of plain text email."),
872
873        para("To this end, Markdown's syntax is comprised entirely of punctuation\n" +
874             "characters, which punctuation characters have been carefully chosen so\n" +
875             "as to look like what they mean. E.g., asterisks around a word actually\n" +
876             "look like \*emphasis\*. Markdown lists look like, well, lists. Even\n" +
877             "blockquotes look like quoted passages of text, assuming you've ever\n" +
878             "used email."),
879
880        raw("<h3 id=\"html\">Inline HTML</h3>"),
881
882        para("Markdown's syntax is intended for one purpose: to be used as a\n" +
883             "format for _writing_ for the web."),
884
885        para("Markdown is not a replacement for HTML, or even close to it. Its\n" +
886             "syntax is very small, corresponding only to a very small subset of\n" +
887             "HTML tags. The idea is _not_ to create a syntax that makes it easier\n" +
888             "to insert HTML tags. In my opinion, HTML tags are already easy to\n" +
889             "insert. The idea for Markdown is to make it easy to read, write, and\n" +
890             "edit prose. HTML is a _publishing_ format; Markdown is a _writing_\n" +
891             "format. Thus, Markdown's formatting syntax only addresses issues that\n" +
892             "can be conveyed in plain text."),
893
894        para("For any markup that is not covered by Markdown's syntax, you simply\n" +
895             "use HTML itself. There's no need to preface it or delimit it to\n" +
896             "indicate that you're switching from Markdown to HTML; you just use\n" +
897             "the tags."),
898
899        para("The only restrictions are that block-level HTML elements -- e.g. <code><div></code>,\n" +
900             "<code><table></code>, <code><pre></code>, <code><p></code>, etc. -- must be separated from surrounding\n" +
901             "content by blank lines, and the start and end tags of the block should\n" +
902             "not be indented with tabs or spaces. Markdown is smart enough not\n" +
903             "to add extra (unwanted) <code><p></code> tags around HTML block-level tags."),
904
905        para("For example, to add an HTML table to a Markdown article:"),
906
907        verb("This is a regular paragraph.\n",
908             "\n",
909             "<table>\n",
910             "    <tr>\n",
911             "        <td>Foo</td>\n",
912             "    </tr>\n",
913             "</table>\n",
914             "\n",
915             "This is another regular paragraph.\n"),
916
917        para("Note that Markdown formatting syntax is not processed within block-level\n" +
918             "HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an\n" +
919             "HTML block."),
920
921        para("Span-level HTML tags -- e.g. <code><span></code>, <code><cite></code>, or <code><del></code> -- can be\n" +
922             "used anywhere in a Markdown paragraph, list item, or header. If you\n" +
923             "want, you can even use HTML tags instead of Markdown formatting; e.g. if\n" +
924             "you'd prefer to use HTML <code><a></code> or <code><img></code> tags instead of Markdown's\n" +
925             "link or image syntax, go right ahead."),
926
927        para("Unlike block-level HTML tags, Markdown syntax _is_ processed within\n" +
928             "span-level tags."),
929
930        raw("<h3 id=\"autoescape\">Automatic Escaping for Special Characters</h3>"),
931
932        para("In HTML, there are two characters that demand special treatment: <code><</code>\n" +
933             "and <code>&</code>. Left angle brackets are used to start tags; ampersands are\n" +
934             "used to denote HTML entities. If you want to use them as literal\n" +
935             "characters, you must escape them as entities, e.g. <code>&lt;</code>, and\n" +
936             "<code>&amp;</code>."),
937
938        para("Ampersands in particular are bedeviling for web writers. If you want to\n" +
939             "write about 'AT&T', you need to write '<code>AT&amp;T</code>'. You even need to\n" +
940             "escape ampersands within URLs. Thus, if you want to link to:"),
941
942        verb("http://images.google.com/images?num=30&q=larry+bird\n"),
943
944        para("you need to encode the URL as:"),
945
946        verb("http://images.google.com/images?num=30&amp;q=larry+bird\n"),
947
948        para("in your anchor tag <code>href</code> attribute. Needless to say, this is easy to\n" +
949             "forget, and is probably the single most common source of HTML validation\n" +
950             "errors in otherwise well-marked-up web sites."),
951
952        para("Markdown allows you to use these characters naturally, taking care of\n" +
953             "all the necessary escaping for you. If you use an ampersand as part of\n" +
954             "an HTML entity, it remains unchanged; otherwise it will be translated\n" +
955             "into <code>&amp;</code>."),
956
957        para("So, if you want to include a copyright symbol in your article, you can write:"),
958
959        verb("&copy;\n"),
960
961        para("and Markdown will leave it alone. But if you write:"),
962
963        verb("AT&T\n"),
964
965        para("Markdown will translate it to:"),
966
967        verb("AT&amp;T\n"),
968
969        para("Similarly, because Markdown supports {inline HTML}[#html], if you use\n" +
970             "angle brackets as delimiters for HTML tags, Markdown will treat them as\n" +
971             "such. But if you write:"),
972
973        verb("4 < 5\n"),
974
975        para("Markdown will translate it to:"),
976
977        verb("4 &lt; 5\n"),
978
979        para("However, inside Markdown code spans and blocks, angle brackets and\n" +
980             "ampersands are _always_ encoded automatically. This makes it easy to use\n" +
981             "Markdown to write about HTML code. (As opposed to raw HTML, which is a\n" +
982             "terrible format for writing about HTML syntax, because every single <code><</code>\n" +
983             "and <code>&</code> in your example code needs to be escaped.)"),
984
985        rule(1),
986
987        raw("<h2 id=\"block\">Block Elements</h2>"),
988
989        raw("<h3 id=\"p\">Paragraphs and Line Breaks</h3>"),
990
991        para("A paragraph is simply one or more consecutive lines of text, separated\n" +
992             "by one or more blank lines. (A blank line is any line that looks like a\n" +
993             "blank line -- a line containing nothing but spaces or tabs is considered\n" +
994             "blank.) Normal paragraphs should not be intended with spaces or tabs."),
995
996        para("The implication of the \"one or more consecutive lines of text\" rule is\n" +
997             "that Markdown supports \"hard-wrapped\" text paragraphs. This differs\n" +
998             "significantly from most other text-to-HTML formatters (including Movable\n" +
999             "Type's \"Convert Line Breaks\" option) which translate every line break\n" +
1000             "character in a paragraph into a <code><br /></code> tag."),
1001
1002        para("When you _do_ want to insert a <code><br /></code> break tag using Markdown, you\n" +
1003             "end a line with two or more spaces, then type return."),
1004
1005        para("Yes, this takes a tad more effort to create a <code><br /></code>, but a simplistic\n" +
1006             "\"every line break is a <code><br /></code>\" rule wouldn't work for Markdown.\n" +
1007             "Markdown's email-style {blockquoting}[#blockquote] and multi-paragraph {list items}[#list]\n" +
1008             "work best -- and look better -- when you format them with hard breaks."),
1009
1010        raw("<h3 id=\"header\">Headers</h3>"),
1011
1012        para("Markdown supports two styles of headers, {Setext}[http://docutils.sourceforge.net/mirror/setext.html] and {atx}[http://www.aaronsw.com/2002/atx/]."),
1013
1014        para("Setext-style headers are \"underlined\" using equal signs (for first-level\n" +
1015             "headers) and dashes (for second-level headers). For example:"),
1016
1017        verb("This is an H1\n",
1018             "=============\n",
1019             "\n",
1020             "This is an H2\n",
1021             "-------------\n"),
1022
1023        para("Any number of underlining <code>=</code>'s or <code>-</code>'s will work."),
1024
1025        para("Atx-style headers use 1-6 hash characters at the start of the line,\n" +
1026             "corresponding to header levels 1-6. For example:"),
1027
1028        verb("# This is an H1\n",
1029             "\n",
1030             "## This is an H2\n",
1031             "\n",
1032             "###### This is an H6\n"),
1033
1034        para("Optionally, you may \"close\" atx-style headers. This is purely\n" +
1035             "cosmetic -- you can use this if you think it looks better. The\n" +
1036             "closing hashes don't even need to match the number of hashes\n" +
1037             "used to open the header. (The number of opening hashes\n" +
1038             "determines the header level.) :"),
1039
1040        verb("# This is an H1 #\n",
1041             "\n",
1042             "## This is an H2 ##\n",
1043             "\n",
1044             "### This is an H3 ######\n"),
1045
1046        raw("<h3 id=\"blockquote\">Blockquotes</h3>"),
1047
1048        para(
1049             "Markdown uses email-style <code>></code> characters for blockquoting. If you're\n" +
1050             "familiar with quoting passages of text in an email message, then you\n" +
1051             "know how to create a blockquote in Markdown. It looks best if you hard\n" +
1052             "wrap the text and put a <code>></code> before every line:"),
1053
1054        verb("> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,\n",
1055             "> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.\n",
1056             "> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.\n",
1057             "> \n",
1058             "> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse\n",
1059             "> id sem consectetuer libero luctus adipiscing.\n"),
1060
1061        para("Markdown allows you to be lazy and only put the <code>></code> before the first\n" +
1062             "line of a hard-wrapped paragraph:"),
1063
1064        verb("> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,\n",
1065             "consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.\n",
1066             "Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.\n",
1067             "\n",
1068             "> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse\n",
1069             "id sem consectetuer libero luctus adipiscing.\n"),
1070
1071        para("Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by\n" +
1072             "adding additional levels of <code>></code>:"),
1073
1074        verb("> This is the first level of quoting.\n",
1075             ">\n",
1076             "> > This is nested blockquote.\n",
1077             ">\n",
1078             "> Back to the first level.\n"),
1079
1080        para("Blockquotes can contain other Markdown elements, including headers, lists,\n" +
1081             "and code blocks:"),
1082
1083        verb("> ## This is a header.\n",
1084             "> \n",
1085             "> 1.   This is the first list item.\n",
1086             "> 2.   This is the second list item.\n",
1087             "> \n",
1088             "> Here's some example code:\n",
1089             "> \n",
1090             ">     return shell_exec(\"echo $input | $markdown_script\");\n"),
1091
1092        para("Any decent text editor should make email-style quoting easy. For\n" +
1093             "example, with BBEdit, you can make a selection and choose Increase\n" +
1094             "Quote Level from the Text menu."),
1095
1096        raw("<h3 id=\"list\">Lists</h3>"),
1097
1098        para("Markdown supports ordered (numbered) and unordered (bulleted) lists."),
1099
1100        para("Unordered lists use asterisks, pluses, and hyphens -- interchangably\n" +
1101             "-- as list markers:"),
1102
1103        verb("*   Red\n",
1104             "*   Green\n",
1105             "*   Blue\n"),
1106
1107        para("is equivalent to:"),
1108
1109        verb("+   Red\n",
1110             "+   Green\n",
1111             "+   Blue\n"),
1112
1113        para("and:"),
1114
1115        verb("-   Red\n",
1116             "-   Green\n",
1117             "-   Blue\n"),
1118
1119        para("Ordered lists use numbers followed by periods:"),
1120
1121        verb("1.  Bird\n",
1122             "2.  McHale\n",
1123             "3.  Parish\n"),
1124
1125        para("It's important to note that the actual numbers you use to mark the\n" +
1126             "list have no effect on the HTML output Markdown produces. The HTML\n" +
1127             "Markdown produces from the above list is:"),
1128
1129        verb("<ol>\n",
1130             "<li>Bird</li>\n",
1131             "<li>McHale</li>\n",
1132             "<li>Parish</li>\n",
1133             "</ol>\n"),
1134
1135        para("If you instead wrote the list in Markdown like this:"),
1136
1137        verb("1.  Bird\n",
1138             "1.  McHale\n",
1139             "1.  Parish\n"),
1140
1141        para("or even:"),
1142
1143        verb("3. Bird\n",
1144             "1. McHale\n",
1145             "8. Parish\n"),
1146
1147        para("you'd get the exact same HTML output. The point is, if you want to,\n" +
1148             "you can use ordinal numbers in your ordered Markdown lists, so that\n" +
1149             "the numbers in your source match the numbers in your published HTML.\n" +
1150             "But if you want to be lazy, you don't have to."),
1151
1152        para("If you do use lazy list numbering, however, you should still start the\n" +
1153             "list with the number 1. At some point in the future, Markdown may support\n" +
1154             "starting ordered lists at an arbitrary number."),
1155
1156        para("List markers typically start at the left margin, but may be indented by\n" +
1157             "up to three spaces. List markers must be followed by one or more spaces\n" +
1158             "or a tab."),
1159
1160        para("To make lists look nice, you can wrap items with hanging indents:"),
1161
1162        verb("*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.\n",
1163             "    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,\n",
1164             "    viverra nec, fringilla in, laoreet vitae, risus.\n",
1165             "*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.\n",
1166             "    Suspendisse id sem consectetuer libero luctus adipiscing.\n"),
1167
1168        para("But if you want to be lazy, you don't have to:"),
1169
1170        verb("*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.\n",
1171             "Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,\n",
1172             "viverra nec, fringilla in, laoreet vitae, risus.\n",
1173             "*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.\n",
1174             "Suspendisse id sem consectetuer libero luctus adipiscing.\n"),
1175
1176        para("If list items are separated by blank lines, Markdown will wrap the\n" +
1177             "items in <code><p></code> tags in the HTML output. For example, this input:"),
1178
1179        verb("*   Bird\n",
1180             "*   Magic\n"),
1181
1182        para("will turn into:"),
1183
1184        verb("<ul>\n",
1185             "<li>Bird</li>\n",
1186             "<li>Magic</li>\n",
1187             "</ul>\n"),
1188
1189        para("But this:"),
1190
1191        verb("*   Bird\n",
1192             "\n",
1193             "*   Magic\n"),
1194
1195        para("will turn into:"),
1196
1197        verb("<ul>\n",
1198             "<li><p>Bird</p></li>\n",
1199             "<li><p>Magic</p></li>\n",
1200             "</ul>\n"),
1201
1202        para("List items may consist of multiple paragraphs. Each subsequent\n" +
1203             "paragraph in a list item must be intended by either 4 spaces\n" +
1204             "or one tab:"),
1205
1206        verb("1.  This is a list item with two paragraphs. Lorem ipsum dolor\n",
1207             "    sit amet, consectetuer adipiscing elit. Aliquam hendrerit\n",
1208             "    mi posuere lectus.\n",
1209             "\n",
1210             "    Vestibulum enim wisi, viverra nec, fringilla in, laoreet\n",
1211             "    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum\n",
1212             "    sit amet velit.\n",
1213             "\n",
1214             "2.  Suspendisse id sem consectetuer libero luctus adipiscing.\n"),
1215
1216        para("It looks nice if you indent every line of the subsequent\n" +
1217             "paragraphs, but here again, Markdown will allow you to be\n" +
1218             "lazy:"),
1219
1220        verb("*   This is a list item with two paragraphs.\n",
1221             "\n",
1222             "    This is the second paragraph in the list item. You're\n",
1223             "only required to indent the first line. Lorem ipsum dolor\n",
1224             "sit amet, consectetuer adipiscing elit.\n",
1225             "\n",
1226             "*   Another item in the same list.\n"),
1227
1228        para("To put a blockquote within a list item, the blockquote's <code>></code>\n" +
1229             "delimiters need to be indented:"),
1230
1231        verb("*   A list item with a blockquote:\n",
1232             "\n",
1233             "    > This is a blockquote\n",
1234             "    > inside a list item.\n"),
1235
1236        para(
1237             "To put a code block within a list item, the code block needs\n" +
1238             "to be indented _twice_ -- 8 spaces or two tabs:"),
1239
1240        verb("*   A list item with a code block:\n",
1241             "\n",
1242             "        <code goes here>\n"),
1243
1244        para("It's worth noting that it's possible to trigger an ordered list by\n" +
1245             "accident, by writing something like this:"),
1246
1247        verb("1986. What a great season.\n"),
1248
1249        para("In other words, a <em>number-period-space</em> sequence at the beginning of a\n" +
1250             "line. To avoid this, you can backslash-escape the period:"),
1251
1252        verb("1986\\. What a great season.\n"),
1253
1254        raw("<h3 id=\"precode\">Code Blocks</h3>"),
1255
1256        para("Pre-formatted code blocks are used for writing about programming or\n" +
1257             "markup source code. Rather than forming normal paragraphs, the lines\n" +
1258             "of a code block are interpreted literally. Markdown wraps a code block\n" +
1259             "in both <code><pre></code> and <code><code></code> tags."),
1260
1261        para("To produce a code block in Markdown, simply indent every line of the\n" +
1262             "block by at least 4 spaces or 1 tab. For example, given this input:"),
1263
1264        verb("This is a normal paragraph:\n",
1265             "\n",
1266             "    This is a code block.\n"),
1267
1268        para("Markdown will generate:"),
1269
1270        verb("<p>This is a normal paragraph:</p>\n",
1271             "\n",
1272             "<pre><code>This is a code block.\n",
1273             "</code></pre>\n"),
1274
1275        para("One level of indentation -- 4 spaces or 1 tab -- is removed from each\n" +
1276             "line of the code block. For example, this:"),
1277
1278        verb("Here is an example of AppleScript:\n",
1279             "\n",
1280             "    tell application \"Foo\"\n",
1281             "        beep\n",
1282             "    end tell\n"),
1283
1284        para("will turn into:"),
1285
1286        verb("<p>Here is an example of AppleScript:</p>\n",
1287             "\n",
1288             "<pre><code>tell application \"Foo\"\n",
1289             "    beep\n",
1290             "end tell\n",
1291             "</code></pre>\n"),
1292
1293        para("A code block continues until it reaches a line that is not indented\n" +
1294             "(or the end of the article)."),
1295
1296        para("Within a code block, ampersands (<code>&</code>) and angle brackets (<code><</code> and <code>></code>)\n" +
1297             "are automatically converted into HTML entities. This makes it very\n" +
1298             "easy to include example HTML source code using Markdown -- just paste\n" +
1299             "it and indent it, and Markdown will handle the hassle of encoding the\n" +
1300             "ampersands and angle brackets. For example, this:"),
1301
1302        verb("    <div class=\"footer\">\n",
1303             "        &copy; 2004 Foo Corporation\n",
1304             "    </div>\n"),
1305
1306        para("will turn into:"),
1307
1308        verb("<pre><code>&lt;div class=\"footer\"&gt;\n",
1309             "    &amp;copy; 2004 Foo Corporation\n",
1310             "&lt;/div&gt;\n",
1311             "</code></pre>\n"),
1312
1313        para("Regular Markdown syntax is not processed within code blocks. E.g.,\n" +
1314             "asterisks are just literal asterisks within a code block. This means\n" +
1315             "it's also easy to use Markdown to write about Markdown's own syntax."),
1316
1317        raw("<h3 id=\"hr\">Horizontal Rules</h3>"),
1318
1319        para("You can produce a horizontal rule tag (<code><hr /></code>) by placing three or\n" +
1320             "more hyphens, asterisks, or underscores on a line by themselves. If you\n" +
1321             "wish, you may use spaces between the hyphens or asterisks. Each of the\n" +
1322             "following lines will produce a horizontal rule:"),
1323
1324        verb("* * *\n",
1325             "\n",
1326             "***\n",
1327             "\n",
1328             "*****\n",
1329             "\n",
1330             "- - -\n",
1331             "\n",
1332             "---------------------------------------\n",
1333             "\n",
1334             "_ _ _\n"),
1335
1336        rule(1),
1337
1338        raw("<h2 id=\"span\">Span Elements</h2>"),
1339
1340        raw("<h3 id=\"link\">Links</h3>"),
1341
1342        para("Markdown supports two style of links: _inline_ and _reference_."),
1343
1344        para("In both styles, the link text is delimited by [square brackets]."),
1345
1346        para("To create an inline link, use a set of regular parentheses immediately\n" +
1347             "after the link text's closing square bracket. Inside the parentheses,\n" +
1348             "put the URL where you want the link to point, along with an _optional_\n" +
1349             "title for the link, surrounded in quotes. For example:"),
1350
1351        verb("This is [an example](http://example.com/ \"Title\") inline link.\n",
1352             "\n",
1353             "[This link](http://example.net/) has no title attribute.\n"),
1354
1355        para("Will produce:"),
1356
1357        verb("<p>This is <a href=\"http://example.com/\" title=\"Title\">\n",
1358             "an example</a> inline link.</p>\n",
1359             "\n",
1360             "<p><a href=\"http://example.net/\">This link</a> has no\n",
1361             "title attribute.</p>\n"),
1362
1363        para("If you're referring to a local resource on the same server, you can\n" +
1364             "use relative paths:"),
1365
1366        verb("See my [About](/about/) page for details.\n"),
1367
1368        para("Reference-style links use a second set of square brackets, inside\n" +
1369             "which you place a label of your choosing to identify the link:"),
1370
1371        verb("This is [an example][id] reference-style link.\n"),
1372
1373        para("You can optionally use a space to separate the sets of brackets:"),
1374
1375        verb("This is [an example] [id] reference-style link.\n"),
1376
1377        para("Then, anywhere in the document, you define your link label like this,\n" +
1378             "on a line by itself:"),
1379
1380        verb("[id]: http://example.com/  \"Optional Title Here\"\n"),
1381
1382        para("That is:"),
1383
1384        list(:BULLET,
1385          item(nil,
1386            para("Square brackets containing the link identifier (optionally\n" +
1387                 "indented from the left margin using up to three spaces);")),
1388          item(nil,
1389            para("followed by a colon;")),
1390          item(nil,
1391            para("followed by one or more spaces (or tabs);")),
1392          item(nil,
1393            para("followed by the URL for the link;")),
1394          item(nil,
1395            para("optionally followed by a title attribute for the link, enclosed\n" +
1396                 "in double or single quotes."))),
1397
1398        para("The link URL may, optionally, be surrounded by angle brackets:"),
1399
1400        verb("[id]: <http://example.com/>  \"Optional Title Here\"\n"),
1401
1402        para("You can put the title attribute on the next line and use extra spaces\n" +
1403             "or tabs for padding, which tends to look better with longer URLs:"),
1404
1405        verb("[id]: http://example.com/longish/path/to/resource/here\n",
1406             "    \"Optional Title Here\"\n"),
1407
1408        para("Link definitions are only used for creating links during Markdown\n" +
1409             "processing, and are stripped from your document in the HTML output."),
1410
1411        para("Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are _not_ case sensitive. E.g. these two links:"),
1412
1413        verb("[link text][a]\n",
1414             "[link text][A]\n"),
1415
1416        para("are equivalent."),
1417
1418        para("The <em>implicit link name</em> shortcut allows you to omit the name of the\n" +
1419             "link, in which case the link text itself is used as the name.\n" +
1420             "Just use an empty set of square brackets -- e.g., to link the word\n" +
1421             "\"Google\" to the google.com web site, you could simply write:"),
1422
1423        verb("[Google][]\n"),
1424
1425        para("And then define the link:"),
1426
1427        verb("[Google]: http://google.com/\n"),
1428
1429        para("Because link names may contain spaces, this shortcut even works for\n" +
1430            "multiple words in the link text:"),
1431
1432
1433        verb("Visit [Daring Fireball][] for more information.\n"),
1434
1435        para("And then define the link:"),
1436
1437        verb("[Daring Fireball]: http://daringfireball.net/\n"),
1438
1439        para("Link definitions can be placed anywhere in your Markdown document. I\n" +
1440             "tend to put them immediately after each paragraph in which they're\n" +
1441             "used, but if you want, you can put them all at the end of your\n" +
1442             "document, sort of like footnotes."),
1443
1444        para("Here's an example of reference links in action:"),
1445
1446        verb("I get 10 times more traffic from [Google] [1] than from\n",
1447             "[Yahoo] [2] or [MSN] [3].\n",
1448             "\n",
1449             "  [1]: http://google.com/        \"Google\"\n",
1450             "  [2]: http://search.yahoo.com/  \"Yahoo Search\"\n",
1451             "  [3]: http://search.msn.com/    \"MSN Search\"\n"),
1452
1453        para("Using the implicit link name shortcut, you could instead write:"),
1454
1455        verb("I get 10 times more traffic from [Google][] than from\n",
1456             "[Yahoo][] or [MSN][].\n",
1457             "\n",
1458             "  [google]: http://google.com/        \"Google\"\n",
1459             "  [yahoo]:  http://search.yahoo.com/  \"Yahoo Search\"\n",
1460             "  [msn]:    http://search.msn.com/    \"MSN Search\"\n"),
1461
1462        para("Both of the above examples will produce the following HTML output:"),
1463
1464        verb("<p>I get 10 times more traffic from <a href=\"http://google.com/\"\n",
1465             "title=\"Google\">Google</a> than from\n",
1466             "<a href=\"http://search.yahoo.com/\" title=\"Yahoo Search\">Yahoo</a>\n",
1467             "or <a href=\"http://search.msn.com/\" title=\"MSN Search\">MSN</a>.</p>\n"),
1468
1469        para("For comparison, here is the same paragraph written using\n" +
1470             "Markdown's inline link style:"),
1471
1472        verb("I get 10 times more traffic from [Google](http://google.com/ \"Google\")\n",
1473             "than from [Yahoo](http://search.yahoo.com/ \"Yahoo Search\") or\n",
1474             "[MSN](http://search.msn.com/ \"MSN Search\").\n"),
1475
1476        para("The point of reference-style links is not that they're easier to\n" +
1477             "write. The point is that with reference-style links, your document\n" +
1478             "source is vastly more readable. Compare the above examples: using\n" +
1479             "reference-style links, the paragraph itself is only 81 characters\n" +
1480             "long; with inline-style links, it's 176 characters; and as raw HTML,\n" +
1481             "it's 234 characters. In the raw HTML, there's more markup than there\n" +
1482             "is text."),
1483
1484        para("With Markdown's reference-style links, a source document much more\n" +
1485             "closely resembles the final output, as rendered in a browser. By\n" +
1486             "allowing you to move the markup-related metadata out of the paragraph,\n" +
1487             "you can add links without interrupting the narrative flow of your\n" +
1488             "prose."),
1489
1490        raw("<h3 id=\"em\">Emphasis</h3>"),
1491
1492        para("Markdown treats asterisks (<code>*</code>) and underscores (<code>_</code>) as indicators of\n" +
1493             "emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an\n" +
1494             "HTML <code><em></code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML\n" +
1495             "<code><strong></code> tag. E.g., this input:"),
1496
1497        verb("*single asterisks*\n",
1498             "\n",
1499             "_single underscores_\n",
1500             "\n",
1501             "**double asterisks**\n",
1502             "\n",
1503             "__double underscores__\n"),
1504
1505        para("will produce:"),
1506
1507        verb("<em>single asterisks</em>\n",
1508             "\n",
1509             "<em>single underscores</em>\n",
1510             "\n",
1511             "<strong>double asterisks</strong>\n",
1512             "\n",
1513             "<strong>double underscores</strong>\n"),
1514
1515        para("You can use whichever style you prefer; the lone restriction is that\n" +
1516             "the same character must be used to open and close an emphasis span."),
1517
1518        para("Emphasis can be used in the middle of a word:"),
1519
1520        verb("un*fucking*believable\n"),
1521
1522        para("But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a\n" +
1523             "literal asterisk or underscore."),
1524
1525        para("To produce a literal asterisk or underscore at a position where it\n" +
1526             "would otherwise be used as an emphasis delimiter, you can backslash\n" +
1527             "escape it:"),
1528
1529        verb("\\*this text is surrounded by literal asterisks\\*\n"),
1530
1531        raw("<h3 id=\"code\">Code</h3>"),
1532
1533        para("To indicate a span of code, wrap it with backtick quotes (<code>`</code>).\n" +
1534             "Unlike a pre-formatted code block, a code span indicates code within a\n" +
1535             "normal paragraph. For example:"),
1536
1537        verb("Use the `printf()` function.\n"),
1538
1539        para("will produce:"),
1540
1541        verb("<p>Use the <code>printf()</code> function.</p>\n"),
1542
1543        para("To include a literal backtick character within a code span, you can use\n" +
1544             "multiple backticks as the opening and closing delimiters:"),
1545
1546        verb("``There is a literal backtick (`) here.``\n"),
1547
1548        para("which will produce this:"),
1549
1550        verb("<p><code>There is a literal backtick (`) here.</code></p>\n"),
1551
1552        para("The backtick delimiters surrounding a code span may include spaces --\n" +
1553             "one after the opening, one before the closing. This allows you to place\n" +
1554             "literal backtick characters at the beginning or end of a code span:"),
1555
1556        verb("A single backtick in a code span: `` ` ``\n",
1557             "\n",
1558             "A backtick-delimited string in a code span: `` `foo` ``\n"),
1559
1560        para("will produce:"),
1561
1562        verb("<p>A single backtick in a code span: <code>`</code></p>\n",
1563             "\n",
1564             "<p>A backtick-delimited string in a code span: <code>`foo`</code></p>\n"),
1565
1566        para("With a code span, ampersands and angle brackets are encoded as HTML\n" +
1567             "entities automatically, which makes it easy to include example HTML\n" +
1568             "tags. Markdown will turn this:"),
1569
1570        verb("Please don't use any `<blink>` tags.\n"),
1571
1572        para("into:"),
1573
1574        verb("<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>\n"),
1575
1576        para("You can write this:"),
1577
1578        verb("`&#8212;` is the decimal-encoded equivalent of `&mdash;`.\n"),
1579
1580        para("to produce:"),
1581
1582        verb( "<p><code>&amp;#8212;</code> is the decimal-encoded\n",
1583             "equivalent of <code>&amp;mdash;</code>.</p>\n"),
1584
1585        raw("<h3 id=\"img\">Images</h3>"),
1586
1587        para("Admittedly, it's fairly difficult to devise a \"natural\" syntax for\n" +
1588             "placing images into a plain text document format."),
1589
1590        para("Markdown uses an image syntax that is intended to resemble the syntax\n" +
1591             "for links, allowing for two styles: _inline_ and _reference_."),
1592
1593        para("Inline image syntax looks like this:"),
1594
1595        verb("![Alt text](/path/to/img.jpg)\n",
1596             "\n",
1597             "![Alt text](/path/to/img.jpg \"Optional title\")\n"),
1598
1599        para("That is:"),
1600
1601        list(:BULLET,
1602          item(nil,
1603            para("An exclamation mark: <code>!</code>;")),
1604          item(nil,
1605            para("followed by a set of square brackets, containing the <code>alt</code>\n" +
1606                 "attribute text for the image;")),
1607          item(nil,
1608            para("followed by a set of parentheses, containing the URL or path to\n" +
1609                 "the image, and an optional <code>title</code> attribute enclosed in double\n" +
1610                 "or single quotes."))),
1611
1612        para("Reference-style image syntax looks like this:"),
1613
1614        verb("![Alt text][id]\n"),
1615
1616        para("Where \"id\" is the name of a defined image reference. Image references\n" +
1617             "are defined using syntax identical to link references:"),
1618
1619        verb("[id]: url/to/image  \"Optional title attribute\"\n"),
1620
1621        para("As of this writing, Markdown has no syntax for specifying the\n" +
1622             "dimensions of an image; if this is important to you, you can simply\n" +
1623             "use regular HTML <code><img></code> tags."),
1624
1625        rule(1),
1626
1627        raw("<h2 id=\"misc\">Miscellaneous</h2>"),
1628
1629        raw("<h3 id=\"autolink\">Automatic Links</h3>"),
1630
1631        para("Markdown supports a shortcut style for creating \"automatic\" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:"),
1632
1633        verb("<http://example.com/>\n"),
1634
1635        para("Markdown will turn this into:"),
1636
1637        verb("<a href=\"http://example.com/\">http://example.com/</a>\n"),
1638
1639        para("Automatic links for email addresses work similarly, except that\n" +
1640             "Markdown will also perform a bit of randomized decimal and hex\n" +
1641             "entity-encoding to help obscure your address from address-harvesting\n" +
1642             "spambots. For example, Markdown will turn this:"),
1643
1644        verb("<address@example.com>\n"),
1645
1646        para("into something like this:"),
1647
1648        verb("<a href=\"&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;\n",
1649             "&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;\n",
1650             "&#109;\">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;\n",
1651             "&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>\n"),
1652
1653        para("which will render in a browser as a clickable link to \"address@example.com\"."),
1654
1655        para("(This sort of entity-encoding trick will indeed fool many, if not\n" +
1656               "most, address-harvesting bots, but it definitely won't fool all of\n" +
1657               "them. It's better than nothing, but an address published in this way\n" +
1658               "will probably eventually start receiving spam.)"),
1659
1660        raw("<h3 id=\"backslash\">Backslash Escapes</h3>"),
1661
1662        para("Markdown allows you to use backslash escapes to generate literal\n" +
1663             "characters which would otherwise have special meaning in Markdown's\n" +
1664             "formatting syntax. For example, if you wanted to surround a word with\n" +
1665             "literal asterisks (instead of an HTML <code><em></code> tag), you can backslashes\n" +
1666             "before the asterisks, like this:"),
1667
1668        verb("\\*literal asterisks\\*\n"),
1669
1670        para("Markdown provides backslash escapes for the following characters:"),
1671
1672        verb("\\   backslash\n",
1673             "`   backtick\n",
1674             "*   asterisk\n",
1675             "_   underscore\n",
1676             "{}  curly braces\n",
1677             "[]  square brackets\n",
1678             "()  parentheses\n",
1679             "#   hash mark\n",
1680             "+	plus sign\n",
1681             "-	minus sign (hyphen)\n",
1682             ".   dot\n",
1683             "!   exclamation mark\n"))
1684
1685    assert_equal expected, doc
1686  end
1687
1688  def test_nested_blockquotes
1689    input = File.read "#{MARKDOWN_TEST_PATH}/Nested blockquotes.text"
1690
1691    doc = @parser.parse input
1692
1693    expected =
1694      doc(
1695        block(
1696          para("foo"),
1697          block(
1698            para("bar")),
1699          para("foo")))
1700
1701    assert_equal expected, doc
1702  end
1703
1704  def test_ordered_and_unordered_lists
1705    input = File.read "#{MARKDOWN_TEST_PATH}/Ordered and unordered lists.text"
1706
1707    doc = @parser.parse input
1708
1709    expected =
1710      doc(
1711        head(2, 'Unordered'),
1712
1713        para('Asterisks tight:'),
1714        list(:BULLET,
1715          item(nil, para("asterisk 1")),
1716          item(nil, para("asterisk 2")),
1717          item(nil, para("asterisk 3"))),
1718        para('Asterisks loose:'),
1719        list(:BULLET,
1720          item(nil, para("asterisk 1")),
1721          item(nil, para("asterisk 2")),
1722          item(nil, para("asterisk 3"))),
1723
1724        rule(1),
1725
1726        para("Pluses tight:"),
1727        list(:BULLET,
1728          item(nil, para("Plus 1")),
1729          item(nil, para("Plus 2")),
1730          item(nil, para("Plus 3"))),
1731        para("Pluses loose:"),
1732        list(:BULLET,
1733          item(nil, para("Plus 1")),
1734          item(nil, para("Plus 2")),
1735          item(nil, para("Plus 3"))),
1736
1737        rule(1),
1738
1739        para("Minuses tight:"),
1740        list(:BULLET,
1741          item(nil, para("Minus 1")),
1742          item(nil, para("Minus 2")),
1743          item(nil, para("Minus 3"))),
1744        para("Minuses loose:"),
1745        list(:BULLET,
1746          item(nil, para("Minus 1")),
1747          item(nil, para("Minus 2")),
1748          item(nil, para("Minus 3"))),
1749
1750        head(2, "Ordered"),
1751
1752        para("Tight:"),
1753        list(:NUMBER,
1754          item(nil, para("First")),
1755          item(nil, para("Second")),
1756          item(nil, para("Third"))),
1757        para("and:"),
1758        list(:NUMBER,
1759          item(nil, para("One")),
1760          item(nil, para("Two")),
1761          item(nil, para("Three"))),
1762
1763        para("Loose using tabs:"),
1764        list(:NUMBER,
1765          item(nil, para("First")),
1766          item(nil, para("Second")),
1767          item(nil, para("Third"))),
1768        para("and using spaces:"),
1769        list(:NUMBER,
1770          item(nil, para("One")),
1771          item(nil, para("Two")),
1772          item(nil, para("Three"))),
1773
1774        para("Multiple paragraphs:"),
1775        list(:NUMBER,
1776          item(nil,
1777            para("Item 1, graf one."),
1778            para("Item 2. graf two. The quick brown fox " +
1779                 "jumped over the lazy dog's\nback.")),
1780          item(nil, para("Item 2.")),
1781          item(nil, para("Item 3."))),
1782
1783        head(2, "Nested"),
1784        list(:BULLET,
1785          item(nil,
1786            para("Tab"),
1787            list(:BULLET,
1788              item(nil,
1789                para("Tab"),
1790                list(:BULLET,
1791                  item(nil,
1792                    para("Tab"))))))),
1793
1794        para("Here's another:"),
1795        list(:NUMBER,
1796          item(nil, para("First")),
1797          item(nil, para("Second:"),
1798            list(:BULLET,
1799              item(nil, para("Fee")),
1800              item(nil, para("Fie")),
1801              item(nil, para("Foe")))),
1802          item(nil, para("Third"))),
1803
1804        para("Same thing but with paragraphs:"),
1805        list(:NUMBER,
1806          item(nil, para("First")),
1807          item(nil, para("Second:"),
1808            list(:BULLET,
1809              item(nil, para("Fee")),
1810              item(nil, para("Fie")),
1811              item(nil, para("Foe")))),
1812          item(nil, para("Third"))),
1813
1814        para("This was an error in Markdown 1.0.1:"),
1815        list(:BULLET,
1816          item(nil,
1817            para("this"),
1818            list(:BULLET,
1819              item(nil, para("sub"))),
1820            para("that"))))
1821
1822    assert_equal expected, doc
1823  end
1824
1825  def test_strong_and_em_together
1826    input = File.read "#{MARKDOWN_TEST_PATH}/Strong and em together.text"
1827
1828    doc = @parser.parse input
1829
1830    expected =
1831      doc(
1832        para("<b><em>This is strong and em.</em></b>"),
1833        para("So is <b>_this_</b> word."),
1834        para("<b><em>This is strong and em.</em></b>"),
1835        para("So is <b>_this_</b> word."))
1836
1837    assert_equal expected, doc
1838  end
1839
1840  def test_tabs
1841    input = File.read "#{MARKDOWN_TEST_PATH}/Tabs.text"
1842
1843    doc = @parser.parse input
1844
1845    expected =
1846      doc(
1847        list(:BULLET,
1848          item(nil,
1849            para("this is a list item\nindented with tabs")),
1850          item(nil,
1851            para("this is a list item\nindented with spaces"))),
1852
1853        para("Code:"),
1854
1855        verb("this code block is indented by one tab\n"),
1856
1857        para("And:"),
1858
1859        verb("\tthis code block is indented by two tabs\n"),
1860
1861        para("And:"),
1862
1863        verb(
1864          "+\tthis is an example list item\n",
1865          "\tindented with tabs\n",
1866          "\n",
1867          "+   this is an example list item\n",
1868          "    indented with spaces\n"))
1869
1870    assert_equal expected, doc
1871  end
1872
1873  def test_tidyness
1874    input = File.read "#{MARKDOWN_TEST_PATH}/Tidyness.text"
1875
1876    doc = @parser.parse input
1877
1878    expected =
1879      doc(
1880        block(
1881          para("A list within a blockquote:"),
1882          list(:BULLET,
1883            item(nil, para("asterisk 1")),
1884            item(nil, para("asterisk 2")),
1885            item(nil, para("asterisk 3")))))
1886
1887    assert_equal expected, doc
1888  end
1889
1890end
1891
1892