about summary refs log tree commit diff stats
path: root/filters/html-converters
diff options
context:
space:
mode:
authorJason A. Donenfeld2013-05-27 21:39:43 +0200
committerJason A. Donenfeld2013-05-27 21:54:16 +0200
commit8149be213f1c8f52b0dbe6c213f6073af57fa954 (patch)
treee4d0315f53022bb7335f782ad394d8e7602f1b52 /filters/html-converters
parentreadme: use string_list instead of space deliminations (diff)
downloadcgit-8149be213f1c8f52b0dbe6c213f6073af57fa954.tar.gz
cgit-8149be213f1c8f52b0dbe6c213f6073af57fa954.zip
filters: import more modern scripts
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'filters/html-converters')
-rwxr-xr-xfilters/html-converters/man2html5
-rwxr-xr-xfilters/html-converters/md2html2
-rwxr-xr-xfilters/html-converters/resources/markdown.pl1731
-rw-r--r--filters/html-converters/resources/rst-template.txt4
-rwxr-xr-xfilters/html-converters/rst2html2
-rwxr-xr-xfilters/html-converters/txt2html4
6 files changed, 1748 insertions, 0 deletions
diff --git a/filters/html-converters/man2html b/filters/html-converters/man2html new file mode 100755 index 0000000..1b28437 --- /dev/null +++ b/filters/html-converters/man2html
@@ -0,0 +1,5 @@
1#!/bin/sh
2echo "<div style=\"font-family: monospace\">"
3groff -mandoc -T html -P -r -P -l | egrep -v '(<html>|<head>|<meta|<title>|</title>|</head>|<body>|</body>|</html>|<!DOCTYPE|"http://www.w3.org)'
4echo "</div>"
5
diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html new file mode 100755 index 0000000..5cab749 --- /dev/null +++ b/filters/html-converters/md2html
@@ -0,0 +1,2 @@
1#!/bin/sh
2exec "$(dirname "$0")/resources/markdown.pl"
diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl new file mode 100755 index 0000000..abec173 --- /dev/null +++ b/filters/html-converters/resources/markdown.pl
@@ -0,0 +1,1731 @@
1#!/usr/bin/perl
2
3#
4# Markdown -- A text-to-HTML conversion tool for web writers
5#
6# Copyright (c) 2004 John Gruber
7# <http://daringfireball.net/projects/markdown/>
8#
9
10
11package Markdown;
12require 5.006_000;
13use strict;
14use warnings;
15
16use Digest::MD5 qw(md5_hex);
17use vars qw($VERSION);
18$VERSION = '1.0.1';
19# Tue 14 Dec 2004
20
21## Disabled; causes problems under Perl 5.6.1:
22use utf8;
23binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
24
25
26#
27# Global default settings:
28#
29my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
30my $g_tab_width = 4;
31
32
33#
34# Globals:
35#
36
37# Regex to match balanced [brackets]. See Friedl's
38# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
39my $g_nested_brackets;
40$g_nested_brackets = qr{
41 (?> # Atomic matching
42 [^\[\]]+ # Anything other than brackets
43 |
44 \[
45 (??{ $g_nested_brackets }) # Recursive set of nested brackets
46 \]
47 )*
48}x;
49
50
51# Table of hash values for escaped characters:
52my %g_escape_table;
53foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
54 $g_escape_table{$char} = md5_hex($char);
55}
56
57
58# Global hashes, used by various utility routines
59my %g_urls;
60my %g_titles;
61my %g_html_blocks;
62
63# Used to track when we're inside an ordered or unordered list
64# (see _ProcessListItems() for details):
65my $g_list_level = 0;
66
67
68#### Blosxom plug-in interface ##########################################
69
70# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
71# which posts Markdown should process, using a "meta-markup: markdown"
72# header. If it's set to 0 (the default), Markdown will process all
73# entries.
74my $g_blosxom_use_meta = 0;
75
76sub start { 1; }
77sub story {
78 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
79
80 if ( (! $g_blosxom_use_meta) or
81 (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
82 ){
83 $$body_ref = Markdown($$body_ref);
84 }
85 1;
86}
87
88
89#### Movable Type plug-in interface #####################################
90eval {require MT}; # Test to see if we're running in MT.
91unless ($@) {
92 require MT;
93 import MT;
94 require MT::Template::Context;
95 import MT::Template::Context;
96
97 eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
98 unless ($@) {
99 require MT::Plugin;
100 import MT::Plugin;
101 my $plugin = new MT::Plugin({
102 name => "Markdown",
103 description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
104 doc_link => 'http://daringfireball.net/projects/markdown/'
105 });
106 MT->add_plugin( $plugin );
107 }
108
109 MT::Template::Context->add_container_tag(MarkdownOptions => sub {
110 my $ctx = shift;
111 my $args = shift;
112 my $builder = $ctx->stash('builder');
113 my $tokens = $ctx->stash('tokens');
114
115 if (defined ($args->{'output'}) ) {
116 $ctx->stash('markdown_output', lc $args->{'output'});
117 }
118
119 defined (my $str = $builder->build($ctx, $tokens) )
120 or return $ctx->error($builder->errstr);
121 $str; # return value
122 });
123
124 MT->add_text_filter('markdown' => {
125 label => 'Markdown',
126 docs => 'http://daringfireball.net/projects/markdown/',
127 on_format => sub {
128 my $text = shift;
129 my $ctx = shift;
130 my $raw = 0;
131 if (defined $ctx) {
132 my $output = $ctx->stash('markdown_output');
133 if (defined $output && $output =~ m/^html/i) {
134 $g_empty_element_suffix = ">";
135 $ctx->stash('markdown_output', '');
136 }
137 elsif (defined $output && $output eq 'raw') {
138 $raw = 1;
139 $ctx->stash('markdown_output', '');
140 }
141 else {
142 $raw = 0;
143 $g_empty_element_suffix = " />";
144 }
145 }
146 $text = $raw ? $text : Markdown($text);
147 $text;
148 },
149 });
150
151 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
152 my $smartypants;
153
154 {
155 no warnings "once";
156 $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
157 }
158
159 if ($smartypants) {
160 MT->add_text_filter('markdown_with_smartypants' => {
161 label => 'Markdown With SmartyPants',
162 docs => 'http://daringfireball.net/projects/markdown/',
163 on_format => sub {
164 my $text = shift;
165 my $ctx = shift;
166 if (defined $ctx) {
167 my $output = $ctx->stash('markdown_output');
168 if (defined $output && $output eq 'html') {
169 $g_empty_element_suffix = ">";
170 }
171 else {
172 $g_empty_element_suffix = " />";
173 }
174 }
175 $text = Markdown($text);
176 $text = $smartypants->($text, '1');
177 },
178 });
179 }
180}
181else {
182#### BBEdit/command-line text filter interface ##########################
183# Needs to be hidden from MT (and Blosxom when running in static mode).
184
185 # We're only using $blosxom::version once; tell Perl not to warn us:
186 no warnings 'once';
187 unless ( defined($blosxom::version) ) {
188 use warnings;
189
190 #### Check for command-line switches: #################
191 my %cli_opts;
192 use Getopt::Long;
193 Getopt::Long::Configure('pass_through');
194 GetOptions(\%cli_opts,
195 'version',
196 'shortversion',
197 'html4tags',
198 );
199 if ($cli_opts{'version'}) { # Version info
200 print "\nThis is Markdown, version $VERSION.\n";
201 print "Copyright 2004 John Gruber\n";
202 print "http://daringfireball.net/projects/markdown/\n\n";
203 exit 0;
204 }
205 if ($cli_opts{'shortversion'}) { # Just the version number string.
206 print $VERSION;
207 exit 0;
208 }
209 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
210 $g_empty_element_suffix = ">";
211 }
212
213
214 #### Process incoming text: ###########################
215 my $text;
216 {
217 local $/; # Slurp the whole file
218 $text = <>;
219 }
220 print <<'EOT';
221<style>
222.markdown-body {
223 font-size: 14px;
224 line-height: 1.6;
225 overflow: hidden;
226}
227.markdown-body>*:first-child {
228 margin-top: 0 !important;
229}
230.markdown-body>*:last-child {
231 margin-bottom: 0 !important;
232}
233.markdown-body a.absent {
234 color: #c00;
235}
236.markdown-body a.anchor {
237 display: block;
238 padding-left: 30px;
239 margin-left: -30px;
240 cursor: pointer;
241 position: absolute;
242 top: 0;
243 left: 0;
244 bottom: 0;
245}
246.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 {
247 margin: 20px 0 10px;
248 padding: 0;
249 font-weight: bold;
250 -webkit-font-smoothing: antialiased;
251 cursor: text;
252 position: relative;
253}
254.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link {
255 display: none;
256 color: #000;
257}
258.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor {
259 text-decoration: none;
260 line-height: 1;
261 padding-left: 0;
262 margin-left: -22px;
263 top: 15%}
264.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link {
265 display: inline-block;
266}
267.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code {
268 font-size: inherit;
269}
270.markdown-body h1 {
271 font-size: 28px;
272 color: #000;
273}
274.markdown-body h2 {
275 font-size: 24px;
276 border-bottom: 1px solid #ccc;
277 color: #000;
278}
279.markdown-body h3 {
280 font-size: 18px;
281}
282.markdown-body h4 {
283 font-size: 16px;
284}
285.markdown-body h5 {
286 font-size: 14px;
287}
288.markdown-body h6 {
289 color: #777;
290 font-size: 14px;
291}
292.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre {
293 margin: 15px 0;
294}
295.markdown-body hr {
296 background: transparent url("/dirty-shade.png") repeat-x 0 0;
297 border: 0 none;
298 color: #ccc;
299 height: 4px;
300 padding: 0;
301}
302.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child {
303 margin-top: 0;
304 padding-top: 0;
305}
306.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 {
307 margin-top: 0;
308 padding-top: 0;
309}
310.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p {
311 margin-top: 0;
312}
313.markdown-body li p.first {
314 display: inline-block;
315}
316.markdown-body ul, .markdown-body ol {
317 padding-left: 30px;
318}
319.markdown-body ul.no-list, .markdown-body ol.no-list {
320 list-style-type: none;
321 padding: 0;
322}
323.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type {
324 margin-top: 0px;
325}
326.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type {
327 margin-bottom: 0;
328}
329.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul {
330 margin-bottom: 0;
331}
332.markdown-body dl {
333 padding: 0;
334}
335.markdown-body dl dt {
336 font-size: 14px;
337 font-weight: bold;
338 font-style: italic;
339 padding: 0;
340 margin: 15px 0 5px;
341}
342.markdown-body dl dt:first-child {
343 padding: 0;
344}
345.markdown-body dl dt>:first-child {
346 margin-top: 0px;
347}
348.markdown-body dl dt>:last-child {
349 margin-bottom: 0px;
350}
351.markdown-body dl dd {
352 margin: 0 0 15px;
353 padding: 0 15px;
354}
355.markdown-body dl dd>:first-child {
356 margin-top: 0px;
357}
358.markdown-body dl dd>:last-child {
359 margin-bottom: 0px;
360}
361.markdown-body blockquote {
362 border-left: 4px solid #DDD;
363 padding: 0 15px;
364 color: #777;
365}
366.markdown-body blockquote>:first-child {
367 margin-top: 0px;
368}
369.markdown-body blockquote>:last-child {
370 margin-bottom: 0px;
371}
372.markdown-body table th {
373 font-weight: bold;
374}
375.markdown-body table th, .markdown-body table td {
376 border: 1px solid #ccc;
377 padding: 6px 13px;
378}
379.markdown-body table tr {
380 border-top: 1px solid #ccc;
381 background-color: #fff;
382}
383.markdown-body table tr:nth-child(2n) {
384 background-color: #f8f8f8;
385}
386.markdown-body img {
387 max-width: 100%;
388 -moz-box-sizing: border-box;
389 box-sizing: border-box;
390}
391.markdown-body span.frame {
392 display: block;
393 overflow: hidden;
394}
395.markdown-body span.frame>span {
396 border: 1px solid #ddd;
397 display: block;
398 float: left;
399 overflow: hidden;
400 margin: 13px 0 0;
401 padding: 7px;
402 width: auto;
403}
404.markdown-body span.frame span img {
405 display: block;
406 float: left;
407}
408.markdown-body span.frame span span {
409 clear: both;
410 color: #333;
411 display: block;
412 padding: 5px 0 0;
413}
414.markdown-body span.align-center {
415 display: block;
416 overflow: hidden;
417 clear: both;
418}
419.markdown-body span.align-center>span {
420 display: block;
421 overflow: hidden;
422 margin: 13px auto 0;
423 text-align: center;
424}
425.markdown-body span.align-center span img {
426 margin: 0 auto;
427 text-align: center;
428}
429.markdown-body span.align-right {
430 display: block;
431 overflow: hidden;
432 clear: both;
433}
434.markdown-body span.align-right>span {
435 display: block;
436 overflow: hidden;
437 margin: 13px 0 0;
438 text-align: right;
439}
440.markdown-body span.align-right span img {
441 margin: 0;
442 text-align: right;
443}
444.markdown-body span.float-left {
445 display: block;
446 margin-right: 13px;
447 overflow: hidden;
448 float: left;
449}
450.markdown-body span.float-left span {
451 margin: 13px 0 0;
452}
453.markdown-body span.float-right {
454 display: block;
455 margin-left: 13px;
456 overflow: hidden;
457 float: right;
458}
459.markdown-body span.float-right>span {
460 display: block;
461 overflow: hidden;
462 margin: 13px auto 0;
463 text-align: right;
464}
465.markdown-body code, .markdown-body tt {
466 margin: 0 2px;
467 padding: 0px 5px;
468 border: 1px solid #eaeaea;
469 background-color: #f8f8f8;
470 border-radius: 3px;
471}
472.markdown-body code {
473 white-space: nowrap;
474}
475.markdown-body pre>code {
476 margin: 0;
477 padding: 0;
478 white-space: pre;
479 border: none;
480 background: transparent;
481}
482.markdown-body .highlight pre, .markdown-body pre {
483 background-color: #f8f8f8;
484 border: 1px solid #ccc;
485 font-size: 13px;
486 line-height: 19px;
487 overflow: auto;
488 padding: 6px 10px;
489 border-radius: 3px;
490}
491.markdown-body pre code, .markdown-body pre tt {
492 margin: 0;
493 padding: 0;
494 background-color: transparent;
495 border: none;
496}
497</style>
498EOT
499 print "<div class='markdown-body'>";
500 print Markdown($text);
501 print "</div>";
502 }
503}
504
505
506
507sub Markdown {
508#
509# Main function. The order in which other subs are called here is
510# essential. Link and image substitutions need to happen before
511# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
512# and <img> tags get encoded.
513#
514 my $text = shift;
515
516 # Clear the global hashes. If we don't clear these, you get conflicts
517 # from other articles when generating a page which contains more than
518 # one article (e.g. an index page that shows the N most recent
519 # articles):
520 %g_urls = ();
521 %g_titles = ();
522 %g_html_blocks = ();
523
524
525 # Standardize line endings:
526 $text =~ s{\r\n}{\n}g; # DOS to Unix
527 $text =~ s{\r}{\n}g; # Mac to Unix
528
529 # Make sure $text ends with a couple of newlines:
530 $text .= "\n\n";
531
532 # Convert all tabs to spaces.
533 $text = _Detab($text);
534
535 # Strip any lines consisting only of spaces and tabs.
536 # This makes subsequent regexen easier to write, because we can
537 # match consecutive blank lines with /\n+/ instead of something
538 # contorted like /[ \t]*\n+/ .
539 $text =~ s/^[ \t]+$//mg;
540
541 # Turn block-level HTML blocks into hash entries
542 $text = _HashHTMLBlocks($text);
543
544 # Strip link definitions, store in hashes.
545 $text = _StripLinkDefinitions($text);
546
547 $text = _RunBlockGamut($text);
548
549 $text = _UnescapeSpecialChars($text);
550
551 return $text . "\n";
552}
553
554
555sub _StripLinkDefinitions {
556#
557# Strips link definitions from text, stores the URLs and titles in
558# hash references.
559#
560 my $text = shift;
561 my $less_than_tab = $g_tab_width - 1;
562
563 # Link defs are in the form: ^[id]: url "optional title"
564 while ($text =~ s{
565 ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
566 [ \t]*
567 \n? # maybe *one* newline
568 [ \t]*
569 <?(\S+?)>? # url = $2
570 [ \t]*
571 \n? # maybe one newline
572 [ \t]*
573 (?:
574 (?<=\s) # lookbehind for whitespace
575 ["(]
576 (.+?) # title = $3
577 [")]
578 [ \t]*
579 )? # title is optional
580 (?:\n+|\Z)
581 }
582 {}mx) {
583 $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
584 if ($3) {
585 $g_titles{lc $1} = $3;
586 $g_titles{lc $1} =~ s/"/&quot;/g;
587 }
588 }
589
590 return $text;
591}
592
593
594sub _HashHTMLBlocks {
595 my $text = shift;
596 my $less_than_tab = $g_tab_width - 1;
597
598 # Hashify HTML blocks:
599 # We only want to do this for block-level HTML tags, such as headers,
600 # lists, and tables. That's because we still want to wrap <p>s around
601 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
602 # phrase emphasis, and spans. The list of tags we're looking for is
603 # hard-coded:
604 my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
605 my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
606
607 # First, look for nested blocks, e.g.:
608 # <div>
609 # <div>
610 # tags for inner block must be indented.
611 # </div>
612 # </div>
613 #
614 # The outermost tags must start at the left margin for this to match, and
615 # the inner nested divs must be indented.
616 # We need to do this before the next, more liberal match, because the next
617 # match will start at the first `<div>` and stop at the first `</div>`.
618 $text =~ s{
619 ( # save in $1
620 ^ # start of line (with /m)
621 <($block_tags_a) # start tag = $2
622 \b # word break
623 (.*\n)*? # any number of lines, minimally matching
624 </\2> # the matching end tag
625 [ \t]* # trailing spaces/tabs
626 (?=\n+|\Z) # followed by a newline or end of document
627 )
628 }{
629 my $key = md5_hex($1);
630 $g_html_blocks{$key} = $1;
631 "\n\n" . $key . "\n\n";
632 }egmx;
633
634
635 #
636 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
637 #
638 $text =~ s{
639 ( # save in $1
640 ^ # start of line (with /m)
641 <($block_tags_b) # start tag = $2
642 \b # word break
643 (.*\n)*? # any number of lines, minimally matching
644 .*</\2> # the matching end tag
645 [ \t]* # trailing spaces/tabs
646 (?=\n+|\Z) # followed by a newline or end of document
647 )
648 }{
649 my $key = md5_hex($1);
650 $g_html_blocks{$key} = $1;
651 "\n\n" . $key . "\n\n";
652 }egmx;
653 # Special case just for <hr />. It was easier to make a special case than
654 # to make the other regex more complicated.
655 $text =~ s{
656 (?:
657 (?<=\n\n) # Starting after a blank line
658 | # or
659 \A\n? # the beginning of the doc
660 )
661 ( # save in $1
662 [ ]{0,$less_than_tab}
663 <(hr) # start tag = $2
664 \b # word break
665 ([^<>])*? #
666 /?> # the matching end tag
667 [ \t]*
668 (?=\n{2,}|\Z) # followed by a blank line or end of document
669 )
670 }{
671 my $key = md5_hex($1);
672 $g_html_blocks{$key} = $1;
673 "\n\n" . $key . "\n\n";
674 }egx;
675
676 # Special case for standalone HTML comments:
677 $text =~ s{
678 (?:
679 (?<=\n\n) # Starting after a blank line
680 | # or
681 \A\n? # the beginning of the doc
682 )
683 ( # save in $1
684 [ ]{0,$less_than_tab}
685 (?s:
686 <!
687 (--.*?--\s*)+
688 >
689 )
690 [ \t]*
691 (?=\n{2,}|\Z) # followed by a blank line or end of document
692 )
693 }{
694 my $key = md5_hex($1);
695 $g_html_blocks{$key} = $1;
696 "\n\n" . $key . "\n\n";
697 }egx;
698
699
700 return $text;
701}
702
703
704sub _RunBlockGamut {
705#
706# These are all the transformations that form block-level
707# tags like paragraphs, headers, and list items.
708#
709 my $text = shift;
710
711 $text = _DoHeaders($text);
712
713 # Do Horizontal Rules:
714 $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
715 $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
716 $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
717
718 $text = _DoLists($text);
719
720 $text = _DoCodeBlocks($text);
721
722 $text = _DoBlockQuotes($text);
723
724 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
725 # was to escape raw HTML in the original Markdown source. This time,
726 # we're escaping the markup we've just created, so that we don't wrap
727 # <p> tags around block-level tags.
728 $text = _HashHTMLBlocks($text);
729
730 $text = _FormParagraphs($text);
731
732 return $text;
733}
734
735
736sub _RunSpanGamut {
737#
738# These are all the transformations that occur *within* block-level
739# tags like paragraphs, headers, and list items.
740#
741 my $text = shift;
742
743 $text = _DoCodeSpans($text);
744
745 $text = _EscapeSpecialChars($text);
746
747 # Process anchor and image tags. Images must come first,
748 # because ![foo][f] looks like an anchor.
749 $text = _DoImages($text);
750 $text = _DoAnchors($text);
751
752 # Make links out of things like `<http://example.com/>`
753 # Must come after _DoAnchors(), because you can use < and >
754 # delimiters in inline links like [this](<url>).
755 $text = _DoAutoLinks($text);
756
757 $text = _EncodeAmpsAndAngles($text);
758
759 $text = _DoItalicsAndBold($text);
760
761 # Do hard breaks:
762 $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
763
764 return $text;
765}
766
767
768sub _EscapeSpecialChars {
769 my $text = shift;
770 my $tokens ||= _TokenizeHTML($text);
771
772 $text = ''; # rebuild $text from the tokens
773# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
774# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
775
776 foreach my $cur_token (@$tokens) {
777 if ($cur_token->[0] eq "tag") {
778 # Within tags, encode * and _ so they don't conflict
779 # with their use in Markdown for italics and strong.
780 # We're replacing each such character with its
781 # corresponding MD5 checksum value; this is likely
782 # overkill, but it should prevent us from colliding
783 # with the escape values by accident.
784 $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
785 $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
786 $text .= $cur_token->[1];
787 } else {
788 my $t = $cur_token->[1];
789 $t = _EncodeBackslashEscapes($t);
790 $text .= $t;
791 }
792 }
793 return $text;
794}
795
796
797sub _DoAnchors {
798#
799# Turn Markdown link shortcuts into XHTML <a> tags.
800#
801 my $text = shift;
802
803 #
804 # First, handle reference-style links: [link text] [id]
805 #
806 $text =~ s{
807 ( # wrap whole match in $1
808 \[
809 ($g_nested_brackets) # link text = $2
810 \]
811
812 [ ]? # one optional space
813 (?:\n[ ]*)? # one optional newline followed by spaces
814
815 \[
816 (.*?) # id = $3
817 \]
818 )
819 }{
820 my $result;
821 my $whole_match = $1;
822 my $link_text = $2;
823 my $link_id = lc $3;
824
825 if ($link_id eq "") {
826 $link_id = lc $link_text; # for shortcut links like [this][].
827 }
828
829 if (defined $g_urls{$link_id}) {
830 my $url = $g_urls{$link_id};
831 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
832 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
833 $result = "<a href=\"$url\"";
834 if ( defined $g_titles{$link_id} ) {
835 my $title = $g_titles{$link_id};
836 $title =~ s! \* !$g_escape_table{'*'}!gx;
837 $title =~ s! _ !$g_escape_table{'_'}!gx;
838 $result .= " title=\"$title\"";
839 }
840 $result .= ">$link_text</a>";
841 }
842 else {
843 $result = $whole_match;
844 }
845 $result;
846 }xsge;
847
848 #
849 # Next, inline-style links: [link text](url "optional title")
850 #
851 $text =~ s{
852 ( # wrap whole match in $1
853 \[
854 ($g_nested_brackets) # link text = $2
855 \]
856 \( # literal paren
857 [ \t]*
858 <?(.*?)>? # href = $3
859 [ \t]*
860 ( # $4
861 (['"]) # quote char = $5
862 (.*?) # Title = $6
863 \5 # matching quote
864 )? # title is optional
865 \)
866 )
867 }{
868 my $result;
869 my $whole_match = $1;
870 my $link_text = $2;
871 my $url = $3;
872 my $title = $6;
873
874 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
875 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
876 $result = "<a href=\"$url\"";
877
878 if (defined $title) {
879 $title =~ s/"/&quot;/g;
880 $title =~ s! \* !$g_escape_table{'*'}!gx;
881 $title =~ s! _ !$g_escape_table{'_'}!gx;
882 $result .= " title=\"$title\"";
883 }
884
885 $result .= ">$link_text</a>";
886
887 $result;
888 }xsge;
889
890 return $text;
891}
892
893
894sub _DoImages {
895#
896# Turn Markdown image shortcuts into <img> tags.
897#
898 my $text = shift;
899
900 #
901 # First, handle reference-style labeled images: ![alt text][id]
902 #
903 $text =~ s{
904 ( # wrap whole match in $1
905 !\[
906 (.*?) # alt text = $2
907 \]
908
909 [ ]? # one optional space
910 (?:\n[ ]*)? # one optional newline followed by spaces
911
912 \[
913 (.*?) # id = $3
914 \]
915
916 )
917 }{
918 my $result;
919 my $whole_match = $1;
920 my $alt_text = $2;
921 my $link_id = lc $3;
922
923 if ($link_id eq "") {
924 $link_id = lc $alt_text; # for shortcut links like ![this][].
925 }
926
927 $alt_text =~ s/"/&quot;/g;
928 if (defined $g_urls{$link_id}) {
929 my $url = $g_urls{$link_id};
930 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
931 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
932 $result = "<img src=\"$url\" alt=\"$alt_text\"";
933 if (defined $g_titles{$link_id}) {
934 my $title = $g_titles{$link_id};
935 $title =~ s! \* !$g_escape_table{'*'}!gx;
936 $title =~ s! _ !$g_escape_table{'_'}!gx;
937 $result .= " title=\"$title\"";
938 }
939 $result .= $g_empty_element_suffix;
940 }
941 else {
942 # If there's no such link ID, leave intact:
943 $result = $whole_match;
944 }
945
946 $result;
947 }xsge;
948
949 #
950 # Next, handle inline images: ![alt text](url "optional title")
951 # Don't forget: encode * and _
952
953 $text =~ s{
954 ( # wrap whole match in $1
955 !\[
956 (.*?) # alt text = $2
957 \]
958 \( # literal paren
959 [ \t]*
960 <?(\S+?)>? # src url = $3
961 [ \t]*
962 ( # $4
963 (['"]) # quote char = $5
964 (.*?) # title = $6
965 \5 # matching quote
966 [ \t]*
967 )? # title is optional
968 \)
969 )
970 }{
971 my $result;
972 my $whole_match = $1;
973 my $alt_text = $2;
974 my $url = $3;
975 my $title = '';
976 if (defined($6)) {
977 $title = $6;
978 }
979
980 $alt_text =~ s/"/&quot;/g;
981 $title =~ s/"/&quot;/g;
982 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
983 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
984 $result = "<img src=\"$url\" alt=\"$alt_text\"";
985 if (defined $title) {
986 $title =~ s! \* !$g_escape_table{'*'}!gx;
987 $title =~ s! _ !$g_escape_table{'_'}!gx;
988 $result .= " title=\"$title\"";
989 }
990 $result .= $g_empty_element_suffix;
991
992 $result;
993 }xsge;
994
995 return $text;
996}
997
998
999sub _DoHeaders {
1000 my $text = shift;
1001
1002 # Setext-style headers:
1003 # Header 1
1004 # ========
1005 #
1006 # Header 2
1007 # --------
1008 #
1009 $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
1010 "<h1>" . _RunSpanGamut($1) . "</h1>\n\n";
1011 }egmx;
1012
1013 $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
1014 "<h2>" . _RunSpanGamut($1) . "</h2>\n\n";
1015 }egmx;
1016
1017
1018 # atx-style headers:
1019 # # Header 1
1020 # ## Header 2
1021 # ## Header 2 with closing hashes ##
1022 # ...
1023 # ###### Header 6
1024 #
1025 $text =~ s{
1026 ^(\#{1,6}) # $1 = string of #'s
1027 [ \t]*
1028 (.+?) # $2 = Header text
1029 [ \t]*
1030 \#* # optional closing #'s (not counted)
1031 \n+
1032 }{
1033 my $h_level = length($1);
1034 "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n";
1035 }egmx;
1036
1037 return $text;
1038}
1039
1040
1041sub _DoLists {
1042#
1043# Form HTML ordered (numbered) and unordered (bulleted) lists.
1044#
1045 my $text = shift;
1046 my $less_than_tab = $g_tab_width - 1;
1047
1048 # Re-usable patterns to match list item bullets and number markers:
1049 my $marker_ul = qr/[*+-]/;
1050 my $marker_ol = qr/\d+[.]/;
1051 my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
1052
1053 # Re-usable pattern to match any entirel ul or ol list:
1054 my $whole_list = qr{
1055 ( # $1 = whole list
1056 ( # $2
1057 [ ]{0,$less_than_tab}
1058 (${marker_any}) # $3 = first list item marker
1059 [ \t]+
1060 )
1061 (?s:.+?)
1062 ( # $4
1063 \z
1064 |
1065 \n{2,}
1066 (?=\S)
1067 (?! # Negative lookahead for another list item marker
1068 [ \t]*
1069 ${marker_any}[ \t]+
1070 )
1071 )
1072 )
1073 }mx;
1074
1075 # We use a different prefix before nested lists than top-level lists.
1076 # See extended comment in _ProcessListItems().
1077 #
1078 # Note: There's a bit of duplication here. My original implementation
1079 # created a scalar regex pattern as the conditional result of the test on
1080 # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1081 # substitution once, using the scalar as the pattern. This worked,
1082 # everywhere except when running under MT on my hosting account at Pair
1083 # Networks. There, this caused all rebuilds to be killed by the reaper (or
1084 # perhaps they crashed, but that seems incredibly unlikely given that the
1085 # same script on the same server ran fine *except* under MT. I've spent
1086 # more time trying to figure out why this is happening than I'd like to
1087 # admit. My only guess, backed up by the fact that this workaround works,
1088 # is that Perl optimizes the substition when it can figure out that the
1089 # pattern will never change, and when this optimization isn't on, we run
1090 # afoul of the reaper. Thus, the slightly redundant code to that uses two
1091 # static s/// patterns rather than one conditional pattern.
1092
1093 if ($g_list_level) {
1094 $text =~ s{
1095 ^
1096 $whole_list
1097 }{
1098 my $list = $1;
1099 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1100 # Turn double returns into triple returns, so that we can make a
1101 # paragraph for the last item in a list, if necessary:
1102 $list =~ s/\n{2,}/\n\n\n/g;
1103 my $result = _ProcessListItems($list, $marker_any);
1104 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1105 $result;
1106 }egmx;
1107 }
1108 else {
1109 $text =~ s{
1110 (?:(?<=\n\n)|\A\n?)
1111 $whole_list
1112 }{
1113 my $list = $1;
1114 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1115 # Turn double returns into triple returns, so that we can make a
1116 # paragraph for the last item in a list, if necessary:
1117 $list =~ s/\n{2,}/\n\n\n/g;
1118 my $result = _ProcessListItems($list, $marker_any);
1119 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1120 $result;
1121 }egmx;
1122 }
1123
1124
1125 return $text;
1126}
1127
1128
1129sub _ProcessListItems {
1130#
1131# Process the contents of a single ordered or unordered list, splitting it
1132# into individual list items.
1133#
1134
1135 my $list_str = shift;
1136 my $marker_any = shift;
1137
1138
1139 # The $g_list_level global keeps track of when we're inside a list.
1140 # Each time we enter a list, we increment it; when we leave a list,
1141 # we decrement. If it's zero, we're not in a list anymore.
1142 #
1143 # We do this because when we're not inside a list, we want to treat
1144 # something like this:
1145 #
1146 # I recommend upgrading to version
1147 # 8. Oops, now this line is treated
1148 # as a sub-list.
1149 #
1150 # As a single paragraph, despite the fact that the second line starts
1151 # with a digit-period-space sequence.
1152 #
1153 # Whereas when we're inside a list (or sub-list), that line will be
1154 # treated as the start of a sub-list. What a kludge, huh? This is
1155 # an aspect of Markdown's syntax that's hard to parse perfectly
1156 # without resorting to mind-reading. Perhaps the solution is to
1157 # change the syntax rules such that sub-lists must start with a
1158 # starting cardinal number; e.g. "1." or "a.".
1159
1160 $g_list_level++;
1161
1162 # trim trailing blank lines:
1163 $list_str =~ s/\n{2,}\z/\n/;
1164
1165
1166 $list_str =~ s{
1167 (\n)? # leading line = $1
1168 (^[ \t]*) # leading whitespace = $2
1169 ($marker_any) [ \t]+ # list marker = $3
1170 ((?s:.+?) # list item text = $4
1171 (\n{1,2}))
1172 (?= \n* (\z | \2 ($marker_any) [ \t]+))
1173 }{
1174 my $item = $4;
1175 my $leading_line = $1;
1176 my $leading_space = $2;
1177
1178 if ($leading_line or ($item =~ m/\n{2,}/)) {
1179 $item = _RunBlockGamut(_Outdent($item));
1180 }
1181 else {
1182 # Recursion for sub-lists:
1183 $item = _DoLists(_Outdent($item));
1184 chomp $item;
1185 $item = _RunSpanGamut($item);
1186 }
1187
1188 "<li>" . $item . "</li>\n";
1189 }egmx;
1190
1191 $g_list_level--;
1192 return $list_str;
1193}
1194
1195
1196
1197sub _DoCodeBlocks {
1198#
1199# Process Markdown `<pre><code>` blocks.
1200#
1201
1202 my $text = shift;
1203
1204 $text =~ s{
1205 (?:\n\n|\A)
1206 ( # $1 = the code block -- one or more lines, starting with a space/tab
1207 (?:
1208 (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
1209 .*\n+
1210 )+
1211 )
1212 ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1213 }{
1214 my $codeblock = $1;
1215 my $result; # return value
1216
1217 $codeblock = _EncodeCode(_Outdent($codeblock));
1218 $codeblock = _Detab($codeblock);
1219 $codeblock =~ s/\A\n+//; # trim leading newlines
1220 $codeblock =~ s/\s+\z//; # trim trailing whitespace
1221
1222 $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
1223
1224 $result;
1225 }egmx;
1226
1227 return $text;
1228}
1229
1230
1231sub _DoCodeSpans {
1232#
1233# * Backtick quotes are used for <code></code> spans.
1234#
1235# * You can use multiple backticks as the delimiters if you want to
1236# include literal backticks in the code span. So, this input:
1237#
1238# Just type ``foo `bar` baz`` at the prompt.
1239#
1240# Will translate to:
1241#
1242# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1243#
1244# There's no arbitrary limit to the number of backticks you
1245# can use as delimters. If you need three consecutive backticks
1246# in your code, use four for delimiters, etc.
1247#
1248# * You can use spaces to get literal backticks at the edges:
1249#
1250# ... type `` `bar` `` ...
1251#
1252# Turns to:
1253#
1254# ... type <code>`bar`</code> ...
1255#
1256
1257 my $text = shift;
1258
1259 $text =~ s@
1260 (`+) # $1 = Opening run of `
1261 (.+?) # $2 = The code block
1262 (?<!`)
1263 \1 # Matching closer
1264 (?!`)
1265 @
1266 my $c = "$2";
1267 $c =~ s/^[ \t]*//g; # leading whitespace
1268 $c =~ s/[ \t]*$//g; # trailing whitespace
1269 $c = _EncodeCode($c);
1270 "<code>$c</code>";
1271 @egsx;
1272
1273 return $text;
1274}
1275
1276
1277sub _EncodeCode {
1278#
1279# Encode/escape certain characters inside Markdown code runs.
1280# The point is that in code, these characters are literals,
1281# and lose their special Markdown meanings.
1282#
1283 local $_ = shift;
1284
1285 # Encode all ampersands; HTML entities are not
1286 # entities within a Markdown code span.
1287 s/&/&amp;/g;
1288
1289 # Encode $'s, but only if we're running under Blosxom.
1290 # (Blosxom interpolates Perl variables in article bodies.)
1291 {
1292 no warnings 'once';
1293 if (defined($blosxom::version)) {
1294 s/\$/&#036;/g;
1295 }
1296 }
1297
1298
1299 # Do the angle bracket song and dance:
1300 s! < !&lt;!gx;
1301 s! > !&gt;!gx;
1302
1303 # Now, escape characters that are magic in Markdown:
1304 s! \* !$g_escape_table{'*'}!gx;
1305 s! _ !$g_escape_table{'_'}!gx;
1306 s! { !$g_escape_table{'{'}!gx;
1307 s! } !$g_escape_table{'}'}!gx;
1308 s! \[ !$g_escape_table{'['}!gx;
1309 s! \] !$g_escape_table{']'}!gx;
1310 s! \\ !$g_escape_table{'\\'}!gx;
1311
1312 return $_;
1313}
1314
1315
1316sub _DoItalicsAndBold {
1317 my $text = shift;
1318
1319 # <strong> must go first:
1320 $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1321 {<strong>$2</strong>}gsx;
1322
1323 $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1324 {<em>$2</em>}gsx;
1325
1326 return $text;
1327}
1328
1329
1330sub _DoBlockQuotes {
1331 my $text = shift;
1332
1333 $text =~ s{
1334 ( # Wrap whole match in $1
1335 (
1336 ^[ \t]*>[ \t]? # '>' at the start of a line
1337 .+\n # rest of the first line
1338 (.+\n)* # subsequent consecutive lines
1339 \n* # blanks
1340 )+
1341 )
1342 }{
1343 my $bq = $1;
1344 $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1345 $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1346 $bq = _RunBlockGamut($bq); # recurse
1347
1348 $bq =~ s/^/ /g;
1349 # These leading spaces screw with <pre> content, so we need to fix that:
1350 $bq =~ s{
1351 (\s*<pre>.+?</pre>)
1352 }{
1353 my $pre = $1;
1354 $pre =~ s/^ //mg;
1355 $pre;
1356 }egsx;
1357
1358 "<blockquote>\n$bq\n</blockquote>\n\n";
1359 }egmx;
1360
1361
1362 return $text;
1363}
1364
1365
1366sub _FormParagraphs {
1367#
1368# Params:
1369# $text - string to process with html <p> tags
1370#
1371 my $text = shift;
1372
1373 # Strip leading and trailing lines:
1374 $text =~ s/\A\n+//;
1375 $text =~ s/\n+\z//;
1376
1377 my @grafs = split(/\n{2,}/, $text);
1378
1379 #
1380 # Wrap <p> tags.
1381 #
1382 foreach (@grafs) {
1383 unless (defined( $g_html_blocks{$_} )) {
1384 $_ = _RunSpanGamut($_);
1385 s/^([ \t]*)/<p>/;
1386 $_ .= "</p>";
1387 }
1388 }
1389
1390 #
1391 # Unhashify HTML blocks
1392 #
1393 foreach (@grafs) {
1394 if (defined( $g_html_blocks{$_} )) {
1395 $_ = $g_html_blocks{$_};
1396 }
1397 }
1398
1399 return join "\n\n", @grafs;
1400}
1401
1402
1403sub _EncodeAmpsAndAngles {
1404# Smart processing for ampersands and angle brackets that need to be encoded.
1405
1406 my $text = shift;
1407
1408 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1409 # http://bumppo.net/projects/amputator/
1410 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
1411
1412 # Encode naked <'s
1413 $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
1414
1415 return $text;
1416}
1417
1418
1419sub _EncodeBackslashEscapes {
1420#
1421# Parameter: String.
1422# Returns: The string, with after processing the following backslash
1423# escape sequences.
1424#
1425 local $_ = shift;
1426
1427 s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
1428 s! \\` !$g_escape_table{'`'}!gx;
1429 s! \\\* !$g_escape_table{'*'}!gx;
1430 s! \\_ !$g_escape_table{'_'}!gx;
1431 s! \\\{ !$g_escape_table{'{'}!gx;
1432 s! \\\} !$g_escape_table{'}'}!gx;
1433 s! \\\[ !$g_escape_table{'['}!gx;
1434 s! \\\] !$g_escape_table{']'}!gx;
1435 s! \\\( !$g_escape_table{'('}!gx;
1436 s! \\\) !$g_escape_table{')'}!gx;
1437 s! \\> !$g_escape_table{'>'}!gx;
1438 s! \\\# !$g_escape_table{'#'}!gx;
1439 s! \\\+ !$g_escape_table{'+'}!gx;
1440 s! \\\- !$g_escape_table{'-'}!gx;
1441 s! \\\. !$g_escape_table{'.'}!gx;
1442 s{ \\! }{$g_escape_table{'!'}}gx;
1443
1444 return $_;
1445}
1446
1447
1448sub _DoAutoLinks {
1449 my $text = shift;
1450
1451 $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1452
1453 # Email addresses: <address@domain.foo>
1454 $text =~ s{
1455 <
1456 (?:mailto:)?
1457 (
1458 [-.\w]+
1459 \@
1460 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1461 )
1462 >
1463 }{
1464 _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1465 }egix;
1466
1467 return $text;
1468}
1469
1470
1471sub _EncodeEmailAddress {
1472#
1473# Input: an email address, e.g. "foo@example.com"
1474#
1475# Output: the email address as a mailto link, with each character
1476# of the address encoded as either a decimal or hex entity, in
1477# the hopes of foiling most address harvesting spam bots. E.g.:
1478#
1479# <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1480# x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1481# &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1482#
1483# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1484# mailing list: <http://tinyurl.com/yu7ue>
1485#
1486
1487 my $addr = shift;
1488
1489 srand;
1490 my @encode = (
1491 sub { '&#' . ord(shift) . ';' },
1492 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1493 sub { shift },
1494 );
1495
1496 $addr = "mailto:" . $addr;
1497
1498 $addr =~ s{(.)}{
1499 my $char = $1;
1500 if ( $char eq '@' ) {
1501 # this *must* be encoded. I insist.
1502 $char = $encode[int rand 1]->($char);
1503 } elsif ( $char ne ':' ) {
1504 # leave ':' alone (to spot mailto: later)
1505 my $r = rand;
1506 # roughly 10% raw, 45% hex, 45% dec
1507 $char = (
1508 $r > .9 ? $encode[2]->($char) :
1509 $r < .45 ? $encode[1]->($char) :
1510 $encode[0]->($char)
1511 );
1512 }
1513 $char;
1514 }gex;
1515
1516 $addr = qq{<a href="$addr">$addr</a>};
1517 $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1518
1519 return $addr;
1520}
1521
1522
1523sub _UnescapeSpecialChars {
1524#
1525# Swap back in all the special characters we've hidden.
1526#
1527 my $text = shift;
1528
1529 while( my($char, $hash) = each(%g_escape_table) ) {
1530 $text =~ s/$hash/$char/g;
1531 }
1532 return $text;
1533}
1534
1535
1536sub _TokenizeHTML {
1537#
1538# Parameter: String containing HTML markup.
1539# Returns: Reference to an array of the tokens comprising the input
1540# string. Each token is either a tag (possibly with nested,
1541# tags contained therein, such as <a href="<MTFoo>">, or a
1542# run of text between tags. Each element of the array is a
1543# two-element array; the first is either 'tag' or 'text';
1544# the second is the actual value.
1545#
1546#
1547# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1548# <http://www.bradchoate.com/past/mtregex.php>
1549#
1550
1551 my $str = shift;
1552 my $pos = 0;
1553 my $len = length $str;
1554 my @tokens;
1555
1556 my $depth = 6;
1557 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
1558 my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
1559 (?s: <\? .*? \?> ) | # processing instruction
1560 $nested_tags/ix; # nested tags
1561
1562 while ($str =~ m/($match)/g) {
1563 my $whole_tag = $1;
1564 my $sec_start = pos $str;
1565 my $tag_start = $sec_start - length $whole_tag;
1566 if ($pos < $tag_start) {
1567 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1568 }
1569 push @tokens, ['tag', $whole_tag];
1570 $pos = pos $str;
1571 }
1572 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1573 \@tokens;
1574}
1575
1576
1577sub _Outdent {
1578#
1579# Remove one level of line-leading tabs or spaces
1580#
1581 my $text = shift;
1582
1583 $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1584 return $text;
1585}
1586
1587
1588sub _Detab {
1589#
1590# Cribbed from a post by Bart Lateur:
1591# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1592#
1593 my $text = shift;
1594
1595 $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1596 return $text;
1597}
1598
1599
16001;
1601
1602__END__
1603
1604
1605=pod
1606
1607=head1 NAME
1608
1609B<Markdown>
1610
1611
1612=head1 SYNOPSIS
1613
1614B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
1615 [ I<file> ... ]
1616
1617
1618=head1 DESCRIPTION
1619
1620Markdown is a text-to-HTML filter; it translates an easy-to-read /
1621easy-to-write structured text format into HTML. Markdown's text format
1622is most similar to that of plain text email, and supports features such
1623as headers, *emphasis*, code blocks, blockquotes, and links.
1624
1625Markdown's syntax is designed not as a generic markup language, but
1626specifically to serve as a front-end to (X)HTML. You can use span-level
1627HTML tags anywhere in a Markdown document, and you can use block level
1628HTML tags (like <div> and <table> as well).
1629
1630For more information about Markdown's syntax, see:
1631
1632 http://daringfireball.net/projects/markdown/
1633
1634
1635=head1 OPTIONS
1636
1637Use "--" to end switch parsing. For example, to open a file named "-z", use:
1638
1639 Markdown.pl -- -z
1640
1641=over 4
1642
1643
1644=item B<--html4tags>
1645
1646Use HTML 4 style for empty element tags, e.g.:
1647
1648 <br>
1649
1650instead of Markdown's default XHTML style tags, e.g.:
1651
1652 <br />
1653
1654
1655=item B<-v>, B<--version>
1656
1657Display Markdown's version number and copyright information.
1658
1659
1660=item B<-s>, B<--shortversion>
1661
1662Display the short-form version number.
1663
1664
1665=back
1666
1667
1668
1669=head1 BUGS
1670
1671To file bug reports or feature requests (other than topics listed in the
1672Caveats section above) please send email to:
1673
1674 support@daringfireball.net
1675
1676Please include with your report: (1) the example input; (2) the output
1677you expected; (3) the output Markdown actually produced.
1678
1679
1680=head1 VERSION HISTORY
1681
1682See the readme file for detailed release notes for this version.
1683
16841.0.1 - 14 Dec 2004
1685
16861.0 - 28 Aug 2004
1687
1688
1689=head1 AUTHOR
1690
1691 John Gruber
1692 http://daringfireball.net
1693
1694 PHP port and other contributions by Michel Fortin
1695 http://michelf.com
1696
1697
1698=head1 COPYRIGHT AND LICENSE
1699
1700Copyright (c) 2003-2004 John Gruber
1701<http://daringfireball.net/>
1702All rights reserved.
1703
1704Redistribution and use in source and binary forms, with or without
1705modification, are permitted provided that the following conditions are
1706met:
1707
1708* Redistributions of source code must retain the above copyright notice,
1709 this list of conditions and the following disclaimer.
1710
1711* Redistributions in binary form must reproduce the above copyright
1712 notice, this list of conditions and the following disclaimer in the
1713 documentation and/or other materials provided with the distribution.
1714
1715* Neither the name "Markdown" nor the names of its contributors may
1716 be used to endorse or promote products derived from this software
1717 without specific prior written permission.
1718
1719This software is provided by the copyright holders and contributors "as
1720is" and any express or implied warranties, including, but not limited
1721to, the implied warranties of merchantability and fitness for a
1722particular purpose are disclaimed. In no event shall the copyright owner
1723or contributors be liable for any direct, indirect, incidental, special,
1724exemplary, or consequential damages (including, but not limited to,
1725procurement of substitute goods or services; loss of use, data, or
1726profits; or business interruption) however caused and on any theory of
1727liability, whether in contract, strict liability, or tort (including
1728negligence or otherwise) arising in any way out of the use of this
1729software, even if advised of the possibility of such damage.
1730
1731=cut
diff --git a/filters/html-converters/resources/rst-template.txt b/filters/html-converters/resources/rst-template.txt new file mode 100644 index 0000000..43cde42 --- /dev/null +++ b/filters/html-converters/resources/rst-template.txt
@@ -0,0 +1,4 @@
1%(stylesheet)s
2%(body_pre_docinfo)s
3%(docinfo)s
4%(body)s
diff --git a/filters/html-converters/rst2html b/filters/html-converters/rst2html new file mode 100755 index 0000000..c51f5be --- /dev/null +++ b/filters/html-converters/rst2html
@@ -0,0 +1,2 @@
1#!/bin/sh
2rst2html.py --template="$(dirname $0)/resources/rst-template.txt"
diff --git a/filters/html-converters/txt2html b/filters/html-converters/txt2html new file mode 100755 index 0000000..a795995 --- /dev/null +++ b/filters/html-converters/txt2html
@@ -0,0 +1,4 @@
1#!/bin/sh
2echo "<pre>"
3cat
4echo "</pre>"