diff options
Diffstat (limited to 'filters/html-converters')
-rwxr-xr-x | filters/html-converters/md2html | 283 | ||||
-rwxr-xr-x | filters/html-converters/resources/markdown.pl | 1727 | ||||
-rw-r--r-- | filters/html-converters/resources/rst-template.txt | 4 | ||||
-rwxr-xr-x | filters/html-converters/rst2html | 4 |
4 files changed, 284 insertions, 1734 deletions
diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html index 5cab749..138713d 100755 --- a/filters/html-converters/md2html +++ b/filters/html-converters/md2html | |||
@@ -1,2 +1,283 @@ | |||
1 | #!/bin/sh | 1 | #!/bin/sh |
2 | exec "$(dirname "$0")/resources/markdown.pl" | 2 | cat <<_EOF |
3 | <style> | ||
4 | .markdown-body { | ||
5 | font-size: 14px; | ||
6 | line-height: 1.6; | ||
7 | overflow: hidden; | ||
8 | } | ||
9 | .markdown-body>*:first-child { | ||
10 | margin-top: 0 !important; | ||
11 | } | ||
12 | .markdown-body>*:last-child { | ||
13 | margin-bottom: 0 !important; | ||
14 | } | ||
15 | .markdown-body a.absent { | ||
16 | color: #c00; | ||
17 | } | ||
18 | .markdown-body a.anchor { | ||
19 | display: block; | ||
20 | padding-left: 30px; | ||
21 | margin-left: -30px; | ||
22 | cursor: pointer; | ||
23 | position: absolute; | ||
24 | top: 0; | ||
25 | left: 0; | ||
26 | bottom: 0; | ||
27 | } | ||
28 | .markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { | ||
29 | margin: 20px 0 10px; | ||
30 | padding: 0; | ||
31 | font-weight: bold; | ||
32 | -webkit-font-smoothing: antialiased; | ||
33 | cursor: text; | ||
34 | position: relative; | ||
35 | } | ||
36 | .markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { | ||
37 | display: none; | ||
38 | color: #000; | ||
39 | } | ||
40 | .markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { | ||
41 | text-decoration: none; | ||
42 | line-height: 1; | ||
43 | padding-left: 0; | ||
44 | margin-left: -22px; | ||
45 | top: 15%} | ||
46 | .markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { | ||
47 | display: inline-block; | ||
48 | } | ||
49 | .markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { | ||
50 | font-size: inherit; | ||
51 | } | ||
52 | .markdown-body h1 { | ||
53 | font-size: 28px; | ||
54 | color: #000; | ||
55 | } | ||
56 | .markdown-body h2 { | ||
57 | font-size: 24px; | ||
58 | border-bottom: 1px solid #ccc; | ||
59 | color: #000; | ||
60 | } | ||
61 | .markdown-body h3 { | ||
62 | font-size: 18px; | ||
63 | } | ||
64 | .markdown-body h4 { | ||
65 | font-size: 16px; | ||
66 | } | ||
67 | .markdown-body h5 { | ||
68 | font-size: 14px; | ||
69 | } | ||
70 | .markdown-body h6 { | ||
71 | color: #777; | ||
72 | font-size: 14px; | ||
73 | } | ||
74 | .markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { | ||
75 | margin: 15px 0; | ||
76 | } | ||
77 | .markdown-body hr { | ||
78 | background: transparent url("/dirty-shade.png") repeat-x 0 0; | ||
79 | border: 0 none; | ||
80 | color: #ccc; | ||
81 | height: 4px; | ||
82 | padding: 0; | ||
83 | } | ||
84 | .markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { | ||
85 | margin-top: 0; | ||
86 | padding-top: 0; | ||
87 | } | ||
88 | .markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { | ||
89 | margin-top: 0; | ||
90 | padding-top: 0; | ||
91 | } | ||
92 | .markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { | ||
93 | margin-top: 0; | ||
94 | } | ||
95 | .markdown-body li p.first { | ||
96 | display: inline-block; | ||
97 | } | ||
98 | .markdown-body ul, .markdown-body ol { | ||
99 | padding-left: 30px; | ||
100 | } | ||
101 | .markdown-body ul.no-list, .markdown-body ol.no-list { | ||
102 | list-style-type: none; | ||
103 | padding: 0; | ||
104 | } | ||
105 | .markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { | ||
106 | margin-top: 0px; | ||
107 | } | ||
108 | .markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { | ||
109 | margin-bottom: 0; | ||
110 | } | ||
111 | .markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { | ||
112 | margin-bottom: 0; | ||
113 | } | ||
114 | .markdown-body dl { | ||
115 | padding: 0; | ||
116 | } | ||
117 | .markdown-body dl dt { | ||
118 | font-size: 14px; | ||
119 | font-weight: bold; | ||
120 | font-style: italic; | ||
121 | padding: 0; | ||
122 | margin: 15px 0 5px; | ||
123 | } | ||
124 | .markdown-body dl dt:first-child { | ||
125 | padding: 0; | ||
126 | } | ||
127 | .markdown-body dl dt>:first-child { | ||
128 | margin-top: 0px; | ||
129 | } | ||
130 | .markdown-body dl dt>:last-child { | ||
131 | margin-bottom: 0px; | ||
132 | } | ||
133 | .markdown-body dl dd { | ||
134 | margin: 0 0 15px; | ||
135 | padding: 0 15px; | ||
136 | } | ||
137 | .markdown-body dl dd>:first-child { | ||
138 | margin-top: 0px; | ||
139 | } | ||
140 | .markdown-body dl dd>:last-child { | ||
141 | margin-bottom: 0px; | ||
142 | } | ||
143 | .markdown-body blockquote { | ||
144 | border-left: 4px solid #DDD; | ||
145 | padding: 0 15px; | ||
146 | color: #777; | ||
147 | } | ||
148 | .markdown-body blockquote>:first-child { | ||
149 | margin-top: 0px; | ||
150 | } | ||
151 | .markdown-body blockquote>:last-child { | ||
152 | margin-bottom: 0px; | ||
153 | } | ||
154 | .markdown-body table th { | ||
155 | font-weight: bold; | ||
156 | } | ||
157 | .markdown-body table th, .markdown-body table td { | ||
158 | border: 1px solid #ccc; | ||
159 | padding: 6px 13px; | ||
160 | } | ||
161 | .markdown-body table tr { | ||
162 | border-top: 1px solid #ccc; | ||
163 | background-color: #fff; | ||
164 | } | ||
165 | .markdown-body table tr:nth-child(2n) { | ||
166 | background-color: #f8f8f8; | ||
167 | } | ||
168 | .markdown-body img { | ||
169 | max-width: 100%; | ||
170 | -moz-box-sizing: border-box; | ||
171 | box-sizing: border-box; | ||
172 | } | ||
173 | .markdown-body span.frame { | ||
174 | display: block; | ||
175 | overflow: hidden; | ||
176 | } | ||
177 | .markdown-body span.frame>span { | ||
178 | border: 1px solid #ddd; | ||
179 | display: block; | ||
180 | float: left; | ||
181 | overflow: hidden; | ||
182 | margin: 13px 0 0; | ||
183 | padding: 7px; | ||
184 | width: auto; | ||
185 | } | ||
186 | .markdown-body span.frame span img { | ||
187 | display: block; | ||
188 | float: left; | ||
189 | } | ||
190 | .markdown-body span.frame span span { | ||
191 | clear: both; | ||
192 | color: #333; | ||
193 | display: block; | ||
194 | padding: 5px 0 0; | ||
195 | } | ||
196 | .markdown-body span.align-center { | ||
197 | display: block; | ||
198 | overflow: hidden; | ||
199 | clear: both; | ||
200 | } | ||
201 | .markdown-body span.align-center>span { | ||
202 | display: block; | ||
203 | overflow: hidden; | ||
204 | margin: 13px auto 0; | ||
205 | text-align: center; | ||
206 | } | ||
207 | .markdown-body span.align-center span img { | ||
208 | margin: 0 auto; | ||
209 | text-align: center; | ||
210 | } | ||
211 | .markdown-body span.align-right { | ||
212 | display: block; | ||
213 | overflow: hidden; | ||
214 | clear: both; | ||
215 | } | ||
216 | .markdown-body span.align-right>span { | ||
217 | display: block; | ||
218 | overflow: hidden; | ||
219 | margin: 13px 0 0; | ||
220 | text-align: right; | ||
221 | } | ||
222 | .markdown-body span.align-right span img { | ||
223 | margin: 0; | ||
224 | text-align: right; | ||
225 | } | ||
226 | .markdown-body span.float-left { | ||
227 | display: block; | ||
228 | margin-right: 13px; | ||
229 | overflow: hidden; | ||
230 | float: left; | ||
231 | } | ||
232 | .markdown-body span.float-left span { | ||
233 | margin: 13px 0 0; | ||
234 | } | ||
235 | .markdown-body span.float-right { | ||
236 | display: block; | ||
237 | margin-left: 13px; | ||
238 | overflow: hidden; | ||
239 | float: right; | ||
240 | } | ||
241 | .markdown-body span.float-right>span { | ||
242 | display: block; | ||
243 | overflow: hidden; | ||
244 | margin: 13px auto 0; | ||
245 | text-align: right; | ||
246 | } | ||
247 | .markdown-body code, .markdown-body tt { | ||
248 | margin: 0 2px; | ||
249 | padding: 0px 5px; | ||
250 | border: 1px solid #eaeaea; | ||
251 | background-color: #f8f8f8; | ||
252 | border-radius: 3px; | ||
253 | } | ||
254 | .markdown-body code { | ||
255 | white-space: nowrap; | ||
256 | } | ||
257 | .markdown-body pre>code { | ||
258 | margin: 0; | ||
259 | padding: 0; | ||
260 | white-space: pre; | ||
261 | border: none; | ||
262 | background: transparent; | ||
263 | } | ||
264 | .markdown-body .highlight pre, .markdown-body pre { | ||
265 | background-color: #f8f8f8; | ||
266 | border: 1px solid #ccc; | ||
267 | font-size: 13px; | ||
268 | line-height: 19px; | ||
269 | overflow: auto; | ||
270 | padding: 6px 10px; | ||
271 | border-radius: 3px; | ||
272 | } | ||
273 | .markdown-body pre code, .markdown-body pre tt { | ||
274 | margin: 0; | ||
275 | padding: 0; | ||
276 | background-color: transparent; | ||
277 | border: none; | ||
278 | } | ||
279 | </style> | ||
280 | _EOF | ||
281 | echo "<div class='markdown-body'>" | ||
282 | markdown_py -o html5 | ||
283 | echo "</div>" | ||
diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl deleted file mode 100755 index 4c39808..0000000 --- a/filters/html-converters/resources/markdown.pl +++ /dev/null | |||
@@ -1,1727 +0,0 @@ | |||
1 | #!/usr/bin/perl | ||
2 | |||
3 | # | ||
4 | # Markdown -- A text-to-HTML conversion tool for web writers | ||
5 | # | ||
6 | # Copyright (c) 2004 John Gruber | ||
7 | # <http://daringfireball.net/projects/markdown/> | ||
8 | # | ||
9 | |||
10 | |||
11 | package Markdown; | ||
12 | require 5.006_000; | ||
13 | use strict; | ||
14 | use warnings; | ||
15 | |||
16 | use Digest::MD5 qw(md5_hex); | ||
17 | use vars qw($VERSION); | ||
18 | $VERSION = '1.0.1'; | ||
19 | # Tue 14 Dec 2004 | ||
20 | |||
21 | |||
22 | # | ||
23 | # Global default settings: | ||
24 | # | ||
25 | my $g_empty_element_suffix = " />"; # Change to ">" for HTML output | ||
26 | my $g_tab_width = 4; | ||
27 | |||
28 | |||
29 | # | ||
30 | # Globals: | ||
31 | # | ||
32 | |||
33 | # Regex to match balanced [brackets]. See Friedl's | ||
34 | # "Mastering Regular Expressions", 2nd Ed., pp. 328-331. | ||
35 | my $g_nested_brackets; | ||
36 | $g_nested_brackets = qr{ | ||
37 | (?> # Atomic matching | ||
38 | [^\[\]]+ # Anything other than brackets | ||
39 | | | ||
40 | \[ | ||
41 | (??{ $g_nested_brackets }) # Recursive set of nested brackets | ||
42 | \] | ||
43 | )* | ||
44 | }x; | ||
45 | |||
46 | |||
47 | # Table of hash values for escaped characters: | ||
48 | my %g_escape_table; | ||
49 | foreach my $char (split //, '\\`*_{}[]()>#+-.!') { | ||
50 | $g_escape_table{$char} = md5_hex($char); | ||
51 | } | ||
52 | |||
53 | |||
54 | # Global hashes, used by various utility routines | ||
55 | my %g_urls; | ||
56 | my %g_titles; | ||
57 | my %g_html_blocks; | ||
58 | |||
59 | # Used to track when we're inside an ordered or unordered list | ||
60 | # (see _ProcessListItems() for details): | ||
61 | my $g_list_level = 0; | ||
62 | |||
63 | |||
64 | #### Blosxom plug-in interface ########################################## | ||
65 | |||
66 | # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine | ||
67 | # which posts Markdown should process, using a "meta-markup: markdown" | ||
68 | # header. If it's set to 0 (the default), Markdown will process all | ||
69 | # entries. | ||
70 | my $g_blosxom_use_meta = 0; | ||
71 | |||
72 | sub start { 1; } | ||
73 | sub story { | ||
74 | my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; | ||
75 | |||
76 | if ( (! $g_blosxom_use_meta) or | ||
77 | (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) | ||
78 | ){ | ||
79 | $$body_ref = Markdown($$body_ref); | ||
80 | } | ||
81 | 1; | ||
82 | } | ||
83 | |||
84 | |||
85 | #### Movable Type plug-in interface ##################################### | ||
86 | eval {require MT}; # Test to see if we're running in MT. | ||
87 | unless ($@) { | ||
88 | require MT; | ||
89 | import MT; | ||
90 | require MT::Template::Context; | ||
91 | import MT::Template::Context; | ||
92 | |||
93 | eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. | ||
94 | unless ($@) { | ||
95 | require MT::Plugin; | ||
96 | import MT::Plugin; | ||
97 | my $plugin = new MT::Plugin({ | ||
98 | name => "Markdown", | ||
99 | description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", | ||
100 | doc_link => 'http://daringfireball.net/projects/markdown/' | ||
101 | }); | ||
102 | MT->add_plugin( $plugin ); | ||
103 | } | ||
104 | |||
105 | MT::Template::Context->add_container_tag(MarkdownOptions => sub { | ||
106 | my $ctx = shift; | ||
107 | my $args = shift; | ||
108 | my $builder = $ctx->stash('builder'); | ||
109 | my $tokens = $ctx->stash('tokens'); | ||
110 | |||
111 | if (defined ($args->{'output'}) ) { | ||
112 | $ctx->stash('markdown_output', lc $args->{'output'}); | ||
113 | } | ||
114 | |||
115 | defined (my $str = $builder->build($ctx, $tokens) ) | ||
116 | or return $ctx->error($builder->errstr); | ||
117 | $str; # return value | ||
118 | }); | ||
119 | |||
120 | MT->add_text_filter('markdown' => { | ||
121 | label => 'Markdown', | ||
122 | docs => 'http://daringfireball.net/projects/markdown/', | ||
123 | on_format => sub { | ||
124 | my $text = shift; | ||
125 | my $ctx = shift; | ||
126 | my $raw = 0; | ||
127 | if (defined $ctx) { | ||
128 | my $output = $ctx->stash('markdown_output'); | ||
129 | if (defined $output && $output =~ m/^html/i) { | ||
130 | $g_empty_element_suffix = ">"; | ||
131 | $ctx->stash('markdown_output', ''); | ||
132 | } | ||
133 | elsif (defined $output && $output eq 'raw') { | ||
134 | $raw = 1; | ||
135 | $ctx->stash('markdown_output', ''); | ||
136 | } | ||
137 | else { | ||
138 | $raw = 0; | ||
139 | $g_empty_element_suffix = " />"; | ||
140 | } | ||
141 | } | ||
142 | $text = $raw ? $text : Markdown($text); | ||
143 | $text; | ||
144 | }, | ||
145 | }); | ||
146 | |||
147 | # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: | ||
148 | my $smartypants; | ||
149 | |||
150 | { | ||
151 | no warnings "once"; | ||
152 | $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; | ||
153 | } | ||
154 | |||
155 | if ($smartypants) { | ||
156 | MT->add_text_filter('markdown_with_smartypants' => { | ||
157 | label => 'Markdown With SmartyPants', | ||
158 | docs => 'http://daringfireball.net/projects/markdown/', | ||
159 | on_format => sub { | ||
160 | my $text = shift; | ||
161 | my $ctx = shift; | ||
162 | if (defined $ctx) { | ||
163 | my $output = $ctx->stash('markdown_output'); | ||
164 | if (defined $output && $output eq 'html') { | ||
165 | $g_empty_element_suffix = ">"; | ||
166 | } | ||
167 | else { | ||
168 | $g_empty_element_suffix = " />"; | ||
169 | } | ||
170 | } | ||
171 | $text = Markdown($text); | ||
172 | $text = $smartypants->($text, '1'); | ||
173 | }, | ||
174 | }); | ||
175 | } | ||
176 | } | ||
177 | else { | ||
178 | #### BBEdit/command-line text filter interface ########################## | ||
179 | # Needs to be hidden from MT (and Blosxom when running in static mode). | ||
180 | |||
181 | # We're only using $blosxom::version once; tell Perl not to warn us: | ||
182 | no warnings 'once'; | ||
183 | unless ( defined($blosxom::version) ) { | ||
184 | use warnings; | ||
185 | |||
186 | #### Check for command-line switches: ################# | ||
187 | my %cli_opts; | ||
188 | use Getopt::Long; | ||
189 | Getopt::Long::Configure('pass_through'); | ||
190 | GetOptions(\%cli_opts, | ||
191 | 'version', | ||
192 | 'shortversion', | ||
193 | 'html4tags', | ||
194 | ); | ||
195 | if ($cli_opts{'version'}) { # Version info | ||
196 | print "\nThis is Markdown, version $VERSION.\n"; | ||
197 | print "Copyright 2004 John Gruber\n"; | ||
198 | print "http://daringfireball.net/projects/markdown/\n\n"; | ||
199 | exit 0; | ||
200 | } | ||
201 | if ($cli_opts{'shortversion'}) { # Just the version number string. | ||
202 | print $VERSION; | ||
203 | exit 0; | ||
204 | } | ||
205 | if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML | ||
206 | $g_empty_element_suffix = ">"; | ||
207 | } | ||
208 | |||
209 | |||
210 | #### Process incoming text: ########################### | ||
211 | my $text; | ||
212 | { | ||
213 | local $/; # Slurp the whole file | ||
214 | $text = <>; | ||
215 | } | ||
216 | print <<'EOT'; | ||
217 | <style> | ||
218 | .markdown-body { | ||
219 | font-size: 14px; | ||
220 | line-height: 1.6; | ||
221 | overflow: hidden; | ||
222 | } | ||
223 | .markdown-body>*:first-child { | ||
224 | margin-top: 0 !important; | ||
225 | } | ||
226 | .markdown-body>*:last-child { | ||
227 | margin-bottom: 0 !important; | ||
228 | } | ||
229 | .markdown-body a.absent { | ||
230 | color: #c00; | ||
231 | } | ||
232 | .markdown-body a.anchor { | ||
233 | display: block; | ||
234 | padding-left: 30px; | ||
235 | margin-left: -30px; | ||
236 | cursor: pointer; | ||
237 | position: absolute; | ||
238 | top: 0; | ||
239 | left: 0; | ||
240 | bottom: 0; | ||
241 | } | ||
242 | .markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { | ||
243 | margin: 20px 0 10px; | ||
244 | padding: 0; | ||
245 | font-weight: bold; | ||
246 | -webkit-font-smoothing: antialiased; | ||
247 | cursor: text; | ||
248 | position: relative; | ||
249 | } | ||
250 | .markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { | ||
251 | display: none; | ||
252 | color: #000; | ||
253 | } | ||
254 | .markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { | ||
255 | text-decoration: none; | ||
256 | line-height: 1; | ||
257 | padding-left: 0; | ||
258 | margin-left: -22px; | ||
259 | top: 15%} | ||
260 | .markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { | ||
261 | display: inline-block; | ||
262 | } | ||
263 | .markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { | ||
264 | font-size: inherit; | ||
265 | } | ||
266 | .markdown-body h1 { | ||
267 | font-size: 28px; | ||
268 | color: #000; | ||
269 | } | ||
270 | .markdown-body h2 { | ||
271 | font-size: 24px; | ||
272 | border-bottom: 1px solid #ccc; | ||
273 | color: #000; | ||
274 | } | ||
275 | .markdown-body h3 { | ||
276 | font-size: 18px; | ||
277 | } | ||
278 | .markdown-body h4 { | ||
279 | font-size: 16px; | ||
280 | } | ||
281 | .markdown-body h5 { | ||
282 | font-size: 14px; | ||
283 | } | ||
284 | .markdown-body h6 { | ||
285 | color: #777; | ||
286 | font-size: 14px; | ||
287 | } | ||
288 | .markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { | ||
289 | margin: 15px 0; | ||
290 | } | ||
291 | .markdown-body hr { | ||
292 | background: transparent url("/dirty-shade.png") repeat-x 0 0; | ||
293 | border: 0 none; | ||
294 | color: #ccc; | ||
295 | height: 4px; | ||
296 | padding: 0; | ||
297 | } | ||
298 | .markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { | ||
299 | margin-top: 0; | ||
300 | padding-top: 0; | ||
301 | } | ||
302 | .markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { | ||
303 | margin-top: 0; | ||
304 | padding-top: 0; | ||
305 | } | ||
306 | .markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { | ||
307 | margin-top: 0; | ||
308 | } | ||
309 | .markdown-body li p.first { | ||
310 | display: inline-block; | ||
311 | } | ||
312 | .markdown-body ul, .markdown-body ol { | ||
313 | padding-left: 30px; | ||
314 | } | ||
315 | .markdown-body ul.no-list, .markdown-body ol.no-list { | ||
316 | list-style-type: none; | ||
317 | padding: 0; | ||
318 | } | ||
319 | .markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { | ||
320 | margin-top: 0px; | ||
321 | } | ||
322 | .markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { | ||
323 | margin-bottom: 0; | ||
324 | } | ||
325 | .markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { | ||
326 | margin-bottom: 0; | ||
327 | } | ||
328 | .markdown-body dl { | ||
329 | padding: 0; | ||
330 | } | ||
331 | .markdown-body dl dt { | ||
332 | font-size: 14px; | ||
333 | font-weight: bold; | ||
334 | font-style: italic; | ||
335 | padding: 0; | ||
336 | margin: 15px 0 5px; | ||
337 | } | ||
338 | .markdown-body dl dt:first-child { | ||
339 | padding: 0; | ||
340 | } | ||
341 | .markdown-body dl dt>:first-child { | ||
342 | margin-top: 0px; | ||
343 | } | ||
344 | .markdown-body dl dt>:last-child { | ||
345 | margin-bottom: 0px; | ||
346 | } | ||
347 | .markdown-body dl dd { | ||
348 | margin: 0 0 15px; | ||
349 | padding: 0 15px; | ||
350 | } | ||
351 | .markdown-body dl dd>:first-child { | ||
352 | margin-top: 0px; | ||
353 | } | ||
354 | .markdown-body dl dd>:last-child { | ||
355 | margin-bottom: 0px; | ||
356 | } | ||
357 | .markdown-body blockquote { | ||
358 | border-left: 4px solid #DDD; | ||
359 | padding: 0 15px; | ||
360 | color: #777; | ||
361 | } | ||
362 | .markdown-body blockquote>:first-child { | ||
363 | margin-top: 0px; | ||
364 | } | ||
365 | .markdown-body blockquote>:last-child { | ||
366 | margin-bottom: 0px; | ||
367 | } | ||
368 | .markdown-body table th { | ||
369 | font-weight: bold; | ||
370 | } | ||
371 | .markdown-body table th, .markdown-body table td { | ||
372 | border: 1px solid #ccc; | ||
373 | padding: 6px 13px; | ||
374 | } | ||
375 | .markdown-body table tr { | ||
376 | border-top: 1px solid #ccc; | ||
377 | background-color: #fff; | ||
378 | } | ||
379 | .markdown-body table tr:nth-child(2n) { | ||
380 | background-color: #f8f8f8; | ||
381 | } | ||
382 | .markdown-body img { | ||
383 | max-width: 100%; | ||
384 | -moz-box-sizing: border-box; | ||
385 | box-sizing: border-box; | ||
386 | } | ||
387 | .markdown-body span.frame { | ||
388 | display: block; | ||
389 | overflow: hidden; | ||
390 | } | ||
391 | .markdown-body span.frame>span { | ||
392 | border: 1px solid #ddd; | ||
393 | display: block; | ||
394 | float: left; | ||
395 | overflow: hidden; | ||
396 | margin: 13px 0 0; | ||
397 | padding: 7px; | ||
398 | width: auto; | ||
399 | } | ||
400 | .markdown-body span.frame span img { | ||
401 | display: block; | ||
402 | float: left; | ||
403 | } | ||
404 | .markdown-body span.frame span span { | ||
405 | clear: both; | ||
406 | color: #333; | ||
407 | display: block; | ||
408 | padding: 5px 0 0; | ||
409 | } | ||
410 | .markdown-body span.align-center { | ||
411 | display: block; | ||
412 | overflow: hidden; | ||
413 | clear: both; | ||
414 | } | ||
415 | .markdown-body span.align-center>span { | ||
416 | display: block; | ||
417 | overflow: hidden; | ||
418 | margin: 13px auto 0; | ||
419 | text-align: center; | ||
420 | } | ||
421 | .markdown-body span.align-center span img { | ||
422 | margin: 0 auto; | ||
423 | text-align: center; | ||
424 | } | ||
425 | .markdown-body span.align-right { | ||
426 | display: block; | ||
427 | overflow: hidden; | ||
428 | clear: both; | ||
429 | } | ||
430 | .markdown-body span.align-right>span { | ||
431 | display: block; | ||
432 | overflow: hidden; | ||
433 | margin: 13px 0 0; | ||
434 | text-align: right; | ||
435 | } | ||
436 | .markdown-body span.align-right span img { | ||
437 | margin: 0; | ||
438 | text-align: right; | ||
439 | } | ||
440 | .markdown-body span.float-left { | ||
441 | display: block; | ||
442 | margin-right: 13px; | ||
443 | overflow: hidden; | ||
444 | float: left; | ||
445 | } | ||
446 | .markdown-body span.float-left span { | ||
447 | margin: 13px 0 0; | ||
448 | } | ||
449 | .markdown-body span.float-right { | ||
450 | display: block; | ||
451 | margin-left: 13px; | ||
452 | overflow: hidden; | ||
453 | float: right; | ||
454 | } | ||
455 | .markdown-body span.float-right>span { | ||
456 | display: block; | ||
457 | overflow: hidden; | ||
458 | margin: 13px auto 0; | ||
459 | text-align: right; | ||
460 | } | ||
461 | .markdown-body code, .markdown-body tt { | ||
462 | margin: 0 2px; | ||
463 | padding: 0px 5px; | ||
464 | border: 1px solid #eaeaea; | ||
465 | background-color: #f8f8f8; | ||
466 | border-radius: 3px; | ||
467 | } | ||
468 | .markdown-body code { | ||
469 | white-space: nowrap; | ||
470 | } | ||
471 | .markdown-body pre>code { | ||
472 | margin: 0; | ||
473 | padding: 0; | ||
474 | white-space: pre; | ||
475 | border: none; | ||
476 | background: transparent; | ||
477 | } | ||
478 | .markdown-body .highlight pre, .markdown-body pre { | ||
479 | background-color: #f8f8f8; | ||
480 | border: 1px solid #ccc; | ||
481 | font-size: 13px; | ||
482 | line-height: 19px; | ||
483 | overflow: auto; | ||
484 | padding: 6px 10px; | ||
485 | border-radius: 3px; | ||
486 | } | ||
487 | .markdown-body pre code, .markdown-body pre tt { | ||
488 | margin: 0; | ||
489 | padding: 0; | ||
490 | background-color: transparent; | ||
491 | border: none; | ||
492 | } | ||
493 | </style> | ||
494 | EOT | ||
495 | print "<div class='markdown-body'>"; | ||
496 | print Markdown($text); | ||
497 | print "</div>"; | ||
498 | } | ||
499 | } | ||
500 | |||
501 | |||
502 | |||
503 | sub Markdown { | ||
504 | # | ||
505 | # Main function. The order in which other subs are called here is | ||
506 | # essential. Link and image substitutions need to happen before | ||
507 | # _EscapeSpecialChars(), so that any *'s or _'s in the <a> | ||
508 | # and <img> tags get encoded. | ||
509 | # | ||
510 | my $text = shift; | ||
511 | |||
512 | # Clear the global hashes. If we don't clear these, you get conflicts | ||
513 | # from other articles when generating a page which contains more than | ||
514 | # one article (e.g. an index page that shows the N most recent | ||
515 | # articles): | ||
516 | %g_urls = (); | ||
517 | %g_titles = (); | ||
518 | %g_html_blocks = (); | ||
519 | |||
520 | |||
521 | # Standardize line endings: | ||
522 | $text =~ s{\r\n}{\n}g; # DOS to Unix | ||
523 | $text =~ s{\r}{\n}g; # Mac to Unix | ||
524 | |||
525 | # Make sure $text ends with a couple of newlines: | ||
526 | $text .= "\n\n"; | ||
527 | |||
528 | # Convert all tabs to spaces. | ||
529 | $text = _Detab($text); | ||
530 | |||
531 | # Strip any lines consisting only of spaces and tabs. | ||
532 | # This makes subsequent regexen easier to write, because we can | ||
533 | # match consecutive blank lines with /\n+/ instead of something | ||
534 | # contorted like /[ \t]*\n+/ . | ||
535 | $text =~ s/^[ \t]+$//mg; | ||
536 | |||
537 | # Turn block-level HTML blocks into hash entries | ||
538 | $text = _HashHTMLBlocks($text); | ||
539 | |||
540 | # Strip link definitions, store in hashes. | ||
541 | $text = _StripLinkDefinitions($text); | ||
542 | |||
543 | $text = _RunBlockGamut($text); | ||
544 | |||
545 | $text = _UnescapeSpecialChars($text); | ||
546 | |||
547 | return $text . "\n"; | ||
548 | } | ||
549 | |||
550 | |||
551 | sub _StripLinkDefinitions { | ||
552 | # | ||
553 | # Strips link definitions from text, stores the URLs and titles in | ||
554 | # hash references. | ||
555 | # | ||
556 | my $text = shift; | ||
557 | my $less_than_tab = $g_tab_width - 1; | ||
558 | |||
559 | # Link defs are in the form: ^[id]: url "optional title" | ||
560 | while ($text =~ s{ | ||
561 | ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 | ||
562 | [ \t]* | ||
563 | \n? # maybe *one* newline | ||
564 | [ \t]* | ||
565 | <?(\S+?)>? # url = $2 | ||
566 | [ \t]* | ||
567 | \n? # maybe one newline | ||
568 | [ \t]* | ||
569 | (?: | ||
570 | (?<=\s) # lookbehind for whitespace | ||
571 | ["(] | ||
572 | (.+?) # title = $3 | ||
573 | [")] | ||
574 | [ \t]* | ||
575 | )? # title is optional | ||
576 | (?:\n+|\Z) | ||
577 | } | ||
578 | {}mx) { | ||
579 | $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive | ||
580 | if ($3) { | ||
581 | $g_titles{lc $1} = $3; | ||
582 | $g_titles{lc $1} =~ s/"/"/g; | ||
583 | } | ||
584 | } | ||
585 | |||
586 | return $text; | ||
587 | } | ||
588 | |||
589 | |||
590 | sub _HashHTMLBlocks { | ||
591 | my $text = shift; | ||
592 | my $less_than_tab = $g_tab_width - 1; | ||
593 | |||
594 | # Hashify HTML blocks: | ||
595 | # We only want to do this for block-level HTML tags, such as headers, | ||
596 | # lists, and tables. That's because we still want to wrap <p>s around | ||
597 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, | ||
598 | # phrase emphasis, and spans. The list of tags we're looking for is | ||
599 | # hard-coded: | ||
600 | my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; | ||
601 | my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; | ||
602 | |||
603 | # First, look for nested blocks, e.g.: | ||
604 | # <div> | ||
605 | # <div> | ||
606 | # tags for inner block must be indented. | ||
607 | # </div> | ||
608 | # </div> | ||
609 | # | ||
610 | # The outermost tags must start at the left margin for this to match, and | ||
611 | # the inner nested divs must be indented. | ||
612 | # We need to do this before the next, more liberal match, because the next | ||
613 | # match will start at the first `<div>` and stop at the first `</div>`. | ||
614 | $text =~ s{ | ||
615 | ( # save in $1 | ||
616 | ^ # start of line (with /m) | ||
617 | <($block_tags_a) # start tag = $2 | ||
618 | \b # word break | ||
619 | (.*\n)*? # any number of lines, minimally matching | ||
620 | </\2> # the matching end tag | ||
621 | [ \t]* # trailing spaces/tabs | ||
622 | (?=\n+|\Z) # followed by a newline or end of document | ||
623 | ) | ||
624 | }{ | ||
625 | my $key = md5_hex($1); | ||
626 | $g_html_blocks{$key} = $1; | ||
627 | "\n\n" . $key . "\n\n"; | ||
628 | }egmx; | ||
629 | |||
630 | |||
631 | # | ||
632 | # Now match more liberally, simply from `\n<tag>` to `</tag>\n` | ||
633 | # | ||
634 | $text =~ s{ | ||
635 | ( # save in $1 | ||
636 | ^ # start of line (with /m) | ||
637 | <($block_tags_b) # start tag = $2 | ||
638 | \b # word break | ||
639 | (.*\n)*? # any number of lines, minimally matching | ||
640 | .*</\2> # the matching end tag | ||
641 | [ \t]* # trailing spaces/tabs | ||
642 | (?=\n+|\Z) # followed by a newline or end of document | ||
643 | ) | ||
644 | }{ | ||
645 | my $key = md5_hex($1); | ||
646 | $g_html_blocks{$key} = $1; | ||
647 | "\n\n" . $key . "\n\n"; | ||
648 | }egmx; | ||
649 | # Special case just for <hr />. It was easier to make a special case than | ||
650 | # to make the other regex more complicated. | ||
651 | $text =~ s{ | ||
652 | (?: | ||
653 | (?<=\n\n) # Starting after a blank line | ||
654 | | # or | ||
655 | \A\n? # the beginning of the doc | ||
656 | ) | ||
657 | ( # save in $1 | ||
658 | [ ]{0,$less_than_tab} | ||
659 | <(hr) # start tag = $2 | ||
660 | \b # word break | ||
661 | ([^<>])*? # | ||
662 | /?> # the matching end tag | ||
663 | [ \t]* | ||
664 | (?=\n{2,}|\Z) # followed by a blank line or end of document | ||
665 | ) | ||
666 | }{ | ||
667 | my $key = md5_hex($1); | ||
668 | $g_html_blocks{$key} = $1; | ||
669 | "\n\n" . $key . "\n\n"; | ||
670 | }egx; | ||
671 | |||
672 | # Special case for standalone HTML comments: | ||
673 | $text =~ s{ | ||
674 | (?: | ||
675 | (?<=\n\n) # Starting after a blank line | ||
676 | | # or | ||
677 | \A\n? # the beginning of the doc | ||
678 | ) | ||
679 | ( # save in $1 | ||
680 | [ ]{0,$less_than_tab} | ||
681 | (?s: | ||
682 | <! | ||
683 | (--.*?--\s*)+ | ||
684 | > | ||
685 | ) | ||
686 | [ \t]* | ||
687 | (?=\n{2,}|\Z) # followed by a blank line or end of document | ||
688 | ) | ||
689 | }{ | ||
690 | my $key = md5_hex($1); | ||
691 | $g_html_blocks{$key} = $1; | ||
692 | "\n\n" . $key . "\n\n"; | ||
693 | }egx; | ||
694 | |||
695 | |||
696 | return $text; | ||
697 | } | ||
698 | |||
699 | |||
700 | sub _RunBlockGamut { | ||
701 | # | ||
702 | # These are all the transformations that form block-level | ||
703 | # tags like paragraphs, headers, and list items. | ||
704 | # | ||
705 | my $text = shift; | ||
706 | |||
707 | $text = _DoHeaders($text); | ||
708 | |||
709 | # Do Horizontal Rules: | ||
710 | $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; | ||
711 | $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; | ||
712 | $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; | ||
713 | |||
714 | $text = _DoLists($text); | ||
715 | |||
716 | $text = _DoCodeBlocks($text); | ||
717 | |||
718 | $text = _DoBlockQuotes($text); | ||
719 | |||
720 | # We already ran _HashHTMLBlocks() before, in Markdown(), but that | ||
721 | # was to escape raw HTML in the original Markdown source. This time, | ||
722 | # we're escaping the markup we've just created, so that we don't wrap | ||
723 | # <p> tags around block-level tags. | ||
724 | $text = _HashHTMLBlocks($text); | ||
725 | |||
726 | $text = _FormParagraphs($text); | ||
727 | |||
728 | return $text; | ||
729 | } | ||
730 | |||
731 | |||
732 | sub _RunSpanGamut { | ||
733 | # | ||
734 | # These are all the transformations that occur *within* block-level | ||
735 | # tags like paragraphs, headers, and list items. | ||
736 | # | ||
737 | my $text = shift; | ||
738 | |||
739 | $text = _DoCodeSpans($text); | ||
740 | |||
741 | $text = _EscapeSpecialChars($text); | ||
742 | |||
743 | # Process anchor and image tags. Images must come first, | ||
744 | # because ![foo][f] looks like an anchor. | ||
745 | $text = _DoImages($text); | ||
746 | $text = _DoAnchors($text); | ||
747 | |||
748 | # Make links out of things like `<http://example.com/>` | ||
749 | # Must come after _DoAnchors(), because you can use < and > | ||
750 | # delimiters in inline links like [this](<url>). | ||
751 | $text = _DoAutoLinks($text); | ||
752 | |||
753 | $text = _EncodeAmpsAndAngles($text); | ||
754 | |||
755 | $text = _DoItalicsAndBold($text); | ||
756 | |||
757 | # Do hard breaks: | ||
758 | $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; | ||
759 | |||
760 | return $text; | ||
761 | } | ||
762 | |||
763 | |||
764 | sub _EscapeSpecialChars { | ||
765 | my $text = shift; | ||
766 | my $tokens ||= _TokenizeHTML($text); | ||
767 | |||
768 | $text = ''; # rebuild $text from the tokens | ||
769 | # my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. | ||
770 | # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; | ||
771 | |||
772 | foreach my $cur_token (@$tokens) { | ||
773 | if ($cur_token->[0] eq "tag") { | ||
774 | # Within tags, encode * and _ so they don't conflict | ||
775 | # with their use in Markdown for italics and strong. | ||
776 | # We're replacing each such character with its | ||
777 | # corresponding MD5 checksum value; this is likely | ||
778 | # overkill, but it should prevent us from colliding | ||
779 | # with the escape values by accident. | ||
780 | $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; | ||
781 | $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; | ||
782 | $text .= $cur_token->[1]; | ||
783 | } else { | ||
784 | my $t = $cur_token->[1]; | ||
785 | $t = _EncodeBackslashEscapes($t); | ||
786 | $text .= $t; | ||
787 | } | ||
788 | } | ||
789 | return $text; | ||
790 | } | ||
791 | |||
792 | |||
793 | sub _DoAnchors { | ||
794 | # | ||
795 | # Turn Markdown link shortcuts into XHTML <a> tags. | ||
796 | # | ||
797 | my $text = shift; | ||
798 | |||
799 | # | ||
800 | # First, handle reference-style links: [link text] [id] | ||
801 | # | ||
802 | $text =~ s{ | ||
803 | ( # wrap whole match in $1 | ||
804 | \[ | ||
805 | ($g_nested_brackets) # link text = $2 | ||
806 | \] | ||
807 | |||
808 | [ ]? # one optional space | ||
809 | (?:\n[ ]*)? # one optional newline followed by spaces | ||
810 | |||
811 | \[ | ||
812 | (.*?) # id = $3 | ||
813 | \] | ||
814 | ) | ||
815 | }{ | ||
816 | my $result; | ||
817 | my $whole_match = $1; | ||
818 | my $link_text = $2; | ||
819 | my $link_id = lc $3; | ||
820 | |||
821 | if ($link_id eq "") { | ||
822 | $link_id = lc $link_text; # for shortcut links like [this][]. | ||
823 | } | ||
824 | |||
825 | if (defined $g_urls{$link_id}) { | ||
826 | my $url = $g_urls{$link_id}; | ||
827 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid | ||
828 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. | ||
829 | $result = "<a href=\"$url\""; | ||
830 | if ( defined $g_titles{$link_id} ) { | ||
831 | my $title = $g_titles{$link_id}; | ||
832 | $title =~ s! \* !$g_escape_table{'*'}!gx; | ||
833 | $title =~ s! _ !$g_escape_table{'_'}!gx; | ||
834 | $result .= " title=\"$title\""; | ||
835 | } | ||
836 | $result .= ">$link_text</a>"; | ||
837 | } | ||
838 | else { | ||
839 | $result = $whole_match; | ||
840 | } | ||
841 | $result; | ||
842 | }xsge; | ||
843 | |||
844 | # | ||
845 | # Next, inline-style links: [link text](url "optional title") | ||
846 | # | ||
847 | $text =~ s{ | ||
848 | ( # wrap whole match in $1 | ||
849 | \[ | ||
850 | ($g_nested_brackets) # link text = $2 | ||
851 | \] | ||
852 | \( # literal paren | ||
853 | [ \t]* | ||
854 | <?(.*?)>? # href = $3 | ||
855 | [ \t]* | ||
856 | ( # $4 | ||
857 | (['"]) # quote char = $5 | ||
858 | (.*?) # Title = $6 | ||
859 | \5 # matching quote | ||
860 | )? # title is optional | ||
861 | \) | ||
862 | ) | ||
863 | }{ | ||
864 | my $result; | ||
865 | my $whole_match = $1; | ||
866 | my $link_text = $2; | ||
867 | my $url = $3; | ||
868 | my $title = $6; | ||
869 | |||
870 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid | ||
871 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. | ||
872 | $result = "<a href=\"$url\""; | ||
873 | |||
874 | if (defined $title) { | ||
875 | $title =~ s/"/"/g; | ||
876 | $title =~ s! \* !$g_escape_table{'*'}!gx; | ||
877 | $title =~ s! _ !$g_escape_table{'_'}!gx; | ||
878 | $result .= " title=\"$title\""; | ||
879 | } | ||
880 | |||
881 | $result .= ">$link_text</a>"; | ||
882 | |||
883 | $result; | ||
884 | }xsge; | ||
885 | |||
886 | return $text; | ||
887 | } | ||
888 | |||
889 | |||
890 | sub _DoImages { | ||
891 | # | ||
892 | # Turn Markdown image shortcuts into <img> tags. | ||
893 | # | ||
894 | my $text = shift; | ||
895 | |||
896 | # | ||
897 | # First, handle reference-style labeled images: ![alt text][id] | ||
898 | # | ||
899 | $text =~ s{ | ||
900 | ( # wrap whole match in $1 | ||
901 | !\[ | ||
902 | (.*?) # alt text = $2 | ||
903 | \] | ||
904 | |||
905 | [ ]? # one optional space | ||
906 | (?:\n[ ]*)? # one optional newline followed by spaces | ||
907 | |||
908 | \[ | ||
909 | (.*?) # id = $3 | ||
910 | \] | ||
911 | |||
912 | ) | ||
913 | }{ | ||
914 | my $result; | ||
915 | my $whole_match = $1; | ||
916 | my $alt_text = $2; | ||
917 | my $link_id = lc $3; | ||
918 | |||
919 | if ($link_id eq "") { | ||
920 | $link_id = lc $alt_text; # for shortcut links like ![this][]. | ||
921 | } | ||
922 | |||
923 | $alt_text =~ s/"/"/g; | ||
924 | if (defined $g_urls{$link_id}) { | ||
925 | my $url = $g_urls{$link_id}; | ||
926 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid | ||
927 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. | ||
928 | $result = "<img src=\"$url\" alt=\"$alt_text\""; | ||
929 | if (defined $g_titles{$link_id}) { | ||
930 | my $title = $g_titles{$link_id}; | ||
931 | $title =~ s! \* !$g_escape_table{'*'}!gx; | ||
932 | $title =~ s! _ !$g_escape_table{'_'}!gx; | ||
933 | $result .= " title=\"$title\""; | ||
934 | } | ||
935 | $result .= $g_empty_element_suffix; | ||
936 | } | ||
937 | else { | ||
938 | # If there's no such link ID, leave intact: | ||
939 | $result = $whole_match; | ||
940 | } | ||
941 | |||
942 | $result; | ||
943 | }xsge; | ||
944 | |||
945 | # | ||
946 | # Next, handle inline images: ![alt text](url "optional title") | ||
947 | # Don't forget: encode * and _ | ||
948 | |||
949 | $text =~ s{ | ||
950 | ( # wrap whole match in $1 | ||
951 | !\[ | ||
952 | (.*?) # alt text = $2 | ||
953 | \] | ||
954 | \( # literal paren | ||
955 | [ \t]* | ||
956 | <?(\S+?)>? # src url = $3 | ||
957 | [ \t]* | ||
958 | ( # $4 | ||
959 | (['"]) # quote char = $5 | ||
960 | (.*?) # title = $6 | ||
961 | \5 # matching quote | ||
962 | [ \t]* | ||
963 | )? # title is optional | ||
964 | \) | ||
965 | ) | ||
966 | }{ | ||
967 | my $result; | ||
968 | my $whole_match = $1; | ||
969 | my $alt_text = $2; | ||
970 | my $url = $3; | ||
971 | my $title = ''; | ||
972 | if (defined($6)) { | ||
973 | $title = $6; | ||
974 | } | ||
975 | |||
976 | $alt_text =~ s/"/"/g; | ||
977 | $title =~ s/"/"/g; | ||
978 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid | ||
979 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. | ||
980 | $result = "<img src=\"$url\" alt=\"$alt_text\""; | ||
981 | if (defined $title) { | ||
982 | $title =~ s! \* !$g_escape_table{'*'}!gx; | ||
983 | $title =~ s! _ !$g_escape_table{'_'}!gx; | ||
984 | $result .= " title=\"$title\""; | ||
985 | } | ||
986 | $result .= $g_empty_element_suffix; | ||
987 | |||
988 | $result; | ||
989 | }xsge; | ||
990 | |||
991 | return $text; | ||
992 | } | ||
993 | |||
994 | |||
995 | sub _DoHeaders { | ||
996 | my $text = shift; | ||
997 | |||
998 | # Setext-style headers: | ||
999 | # Header 1 | ||
1000 | # ======== | ||
1001 | # | ||
1002 | # Header 2 | ||
1003 | # -------- | ||
1004 | # | ||
1005 | $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{ | ||
1006 | "<h1>" . _RunSpanGamut($1) . "</h1>\n\n"; | ||
1007 | }egmx; | ||
1008 | |||
1009 | $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ | ||
1010 | "<h2>" . _RunSpanGamut($1) . "</h2>\n\n"; | ||
1011 | }egmx; | ||
1012 | |||
1013 | |||
1014 | # atx-style headers: | ||
1015 | # # Header 1 | ||
1016 | # ## Header 2 | ||
1017 | # ## Header 2 with closing hashes ## | ||
1018 | # ... | ||
1019 | # ###### Header 6 | ||
1020 | # | ||
1021 | $text =~ s{ | ||
1022 | ^(\#{1,6}) # $1 = string of #'s | ||
1023 | [ \t]* | ||
1024 | (.+?) # $2 = Header text | ||
1025 | [ \t]* | ||
1026 | \#* # optional closing #'s (not counted) | ||
1027 | \n+ | ||
1028 | }{ | ||
1029 | my $h_level = length($1); | ||
1030 | "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n"; | ||
1031 | }egmx; | ||
1032 | |||
1033 | return $text; | ||
1034 | } | ||
1035 | |||
1036 | |||
1037 | sub _DoLists { | ||
1038 | # | ||
1039 | # Form HTML ordered (numbered) and unordered (bulleted) lists. | ||
1040 | # | ||
1041 | my $text = shift; | ||
1042 | my $less_than_tab = $g_tab_width - 1; | ||
1043 | |||
1044 | # Re-usable patterns to match list item bullets and number markers: | ||
1045 | my $marker_ul = qr/[*+-]/; | ||
1046 | my $marker_ol = qr/\d+[.]/; | ||
1047 | my $marker_any = qr/(?:$marker_ul|$marker_ol)/; | ||
1048 | |||
1049 | # Re-usable pattern to match any entirel ul or ol list: | ||
1050 | my $whole_list = qr{ | ||
1051 | ( # $1 = whole list | ||
1052 | ( # $2 | ||
1053 | [ ]{0,$less_than_tab} | ||
1054 | (${marker_any}) # $3 = first list item marker | ||
1055 | [ \t]+ | ||
1056 | ) | ||
1057 | (?s:.+?) | ||
1058 | ( # $4 | ||
1059 | \z | ||
1060 | | | ||
1061 | \n{2,} | ||
1062 | (?=\S) | ||
1063 | (?! # Negative lookahead for another list item marker | ||
1064 | [ \t]* | ||
1065 | ${marker_any}[ \t]+ | ||
1066 | ) | ||
1067 | ) | ||
1068 | ) | ||
1069 | }mx; | ||
1070 | |||
1071 | # We use a different prefix before nested lists than top-level lists. | ||
1072 | # See extended comment in _ProcessListItems(). | ||
1073 | # | ||
1074 | # Note: There's a bit of duplication here. My original implementation | ||
1075 | # created a scalar regex pattern as the conditional result of the test on | ||
1076 | # $g_list_level, and then only ran the $text =~ s{...}{...}egmx | ||
1077 | # substitution once, using the scalar as the pattern. This worked, | ||
1078 | # everywhere except when running under MT on my hosting account at Pair | ||
1079 | # Networks. There, this caused all rebuilds to be killed by the reaper (or | ||
1080 | # perhaps they crashed, but that seems incredibly unlikely given that the | ||
1081 | # same script on the same server ran fine *except* under MT. I've spent | ||
1082 | # more time trying to figure out why this is happening than I'd like to | ||
1083 | # admit. My only guess, backed up by the fact that this workaround works, | ||
1084 | # is that Perl optimizes the substition when it can figure out that the | ||
1085 | # pattern will never change, and when this optimization isn't on, we run | ||
1086 | # afoul of the reaper. Thus, the slightly redundant code to that uses two | ||
1087 | # static s/// patterns rather than one conditional pattern. | ||
1088 | |||
1089 | if ($g_list_level) { | ||
1090 | $text =~ s{ | ||
1091 | ^ | ||
1092 | $whole_list | ||
1093 | }{ | ||
1094 | my $list = $1; | ||
1095 | my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; | ||
1096 | # Turn double returns into triple returns, so that we can make a | ||
1097 | # paragraph for the last item in a list, if necessary: | ||
1098 | $list =~ s/\n{2,}/\n\n\n/g; | ||
1099 | my $result = _ProcessListItems($list, $marker_any); | ||
1100 | $result = "<$list_type>\n" . $result . "</$list_type>\n"; | ||
1101 | $result; | ||
1102 | }egmx; | ||
1103 | } | ||
1104 | else { | ||
1105 | $text =~ s{ | ||
1106 | (?:(?<=\n\n)|\A\n?) | ||
1107 | $whole_list | ||
1108 | }{ | ||
1109 | my $list = $1; | ||
1110 | my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; | ||
1111 | # Turn double returns into triple returns, so that we can make a | ||
1112 | # paragraph for the last item in a list, if necessary: | ||
1113 | $list =~ s/\n{2,}/\n\n\n/g; | ||
1114 | my $result = _ProcessListItems($list, $marker_any); | ||
1115 | $result = "<$list_type>\n" . $result . "</$list_type>\n"; | ||
1116 | $result; | ||
1117 | }egmx; | ||
1118 | } | ||
1119 | |||
1120 | |||
1121 | return $text; | ||
1122 | } | ||
1123 | |||
1124 | |||
1125 | sub _ProcessListItems { | ||
1126 | # | ||
1127 | # Process the contents of a single ordered or unordered list, splitting it | ||
1128 | # into individual list items. | ||
1129 | # | ||
1130 | |||
1131 | my $list_str = shift; | ||
1132 | my $marker_any = shift; | ||
1133 | |||
1134 | |||
1135 | # The $g_list_level global keeps track of when we're inside a list. | ||
1136 | # Each time we enter a list, we increment it; when we leave a list, | ||
1137 | # we decrement. If it's zero, we're not in a list anymore. | ||
1138 | # | ||
1139 | # We do this because when we're not inside a list, we want to treat | ||
1140 | # something like this: | ||
1141 | # | ||
1142 | # I recommend upgrading to version | ||
1143 | # 8. Oops, now this line is treated | ||
1144 | # as a sub-list. | ||
1145 | # | ||
1146 | # As a single paragraph, despite the fact that the second line starts | ||
1147 | # with a digit-period-space sequence. | ||
1148 | # | ||
1149 | # Whereas when we're inside a list (or sub-list), that line will be | ||
1150 | # treated as the start of a sub-list. What a kludge, huh? This is | ||
1151 | # an aspect of Markdown's syntax that's hard to parse perfectly | ||
1152 | # without resorting to mind-reading. Perhaps the solution is to | ||
1153 | # change the syntax rules such that sub-lists must start with a | ||
1154 | # starting cardinal number; e.g. "1." or "a.". | ||
1155 | |||
1156 | $g_list_level++; | ||
1157 | |||
1158 | # trim trailing blank lines: | ||
1159 | $list_str =~ s/\n{2,}\z/\n/; | ||
1160 | |||
1161 | |||
1162 | $list_str =~ s{ | ||
1163 | (\n)? # leading line = $1 | ||
1164 | (^[ \t]*) # leading whitespace = $2 | ||
1165 | ($marker_any) [ \t]+ # list marker = $3 | ||
1166 | ((?s:.+?) # list item text = $4 | ||
1167 | (\n{1,2})) | ||
1168 | (?= \n* (\z | \2 ($marker_any) [ \t]+)) | ||
1169 | }{ | ||
1170 | my $item = $4; | ||
1171 | my $leading_line = $1; | ||
1172 | my $leading_space = $2; | ||
1173 | |||
1174 | if ($leading_line or ($item =~ m/\n{2,}/)) { | ||
1175 | $item = _RunBlockGamut(_Outdent($item)); | ||
1176 | } | ||
1177 | else { | ||
1178 | # Recursion for sub-lists: | ||
1179 | $item = _DoLists(_Outdent($item)); | ||
1180 | chomp $item; | ||
1181 | $item = _RunSpanGamut($item); | ||
1182 | } | ||
1183 | |||
1184 | "<li>" . $item . "</li>\n"; | ||
1185 | }egmx; | ||
1186 | |||
1187 | $g_list_level--; | ||
1188 | return $list_str; | ||
1189 | } | ||
1190 | |||
1191 | |||
1192 | |||
1193 | sub _DoCodeBlocks { | ||
1194 | # | ||
1195 | # Process Markdown `<pre><code>` blocks. | ||
1196 | # | ||
1197 | |||
1198 | my $text = shift; | ||
1199 | |||
1200 | $text =~ s{ | ||
1201 | (?:\n\n|\A) | ||
1202 | ( # $1 = the code block -- one or more lines, starting with a space/tab | ||
1203 | (?: | ||
1204 | (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces | ||
1205 | .*\n+ | ||
1206 | )+ | ||
1207 | ) | ||
1208 | ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc | ||
1209 | }{ | ||
1210 | my $codeblock = $1; | ||
1211 | my $result; # return value | ||
1212 | |||
1213 | $codeblock = _EncodeCode(_Outdent($codeblock)); | ||
1214 | $codeblock = _Detab($codeblock); | ||
1215 | $codeblock =~ s/\A\n+//; # trim leading newlines | ||
1216 | $codeblock =~ s/\s+\z//; # trim trailing whitespace | ||
1217 | |||
1218 | $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n"; | ||
1219 | |||
1220 | $result; | ||
1221 | }egmx; | ||
1222 | |||
1223 | return $text; | ||
1224 | } | ||
1225 | |||
1226 | |||
1227 | sub _DoCodeSpans { | ||
1228 | # | ||
1229 | # * Backtick quotes are used for <code></code> spans. | ||
1230 | # | ||
1231 | # * You can use multiple backticks as the delimiters if you want to | ||
1232 | # include literal backticks in the code span. So, this input: | ||
1233 | # | ||
1234 | # Just type ``foo `bar` baz`` at the prompt. | ||
1235 | # | ||
1236 | # Will translate to: | ||
1237 | # | ||
1238 | # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> | ||
1239 | # | ||
1240 | # There's no arbitrary limit to the number of backticks you | ||
1241 | # can use as delimters. If you need three consecutive backticks | ||
1242 | # in your code, use four for delimiters, etc. | ||
1243 | # | ||
1244 | # * You can use spaces to get literal backticks at the edges: | ||
1245 | # | ||
1246 | # ... type `` `bar` `` ... | ||
1247 | # | ||
1248 | # Turns to: | ||
1249 | # | ||
1250 | # ... type <code>`bar`</code> ... | ||
1251 | # | ||
1252 | |||
1253 | my $text = shift; | ||
1254 | |||
1255 | $text =~ s@ | ||
1256 | (`+) # $1 = Opening run of ` | ||
1257 | (.+?) # $2 = The code block | ||
1258 | (?<!`) | ||
1259 | \1 # Matching closer | ||
1260 | (?!`) | ||
1261 | @ | ||
1262 | my $c = "$2"; | ||
1263 | $c =~ s/^[ \t]*//g; # leading whitespace | ||
1264 | $c =~ s/[ \t]*$//g; # trailing whitespace | ||
1265 | $c = _EncodeCode($c); | ||
1266 | "<code>$c</code>"; | ||
1267 | @egsx; | ||
1268 | |||
1269 | return $text; | ||
1270 | } | ||
1271 | |||
1272 | |||
1273 | sub _EncodeCode { | ||
1274 | # | ||
1275 | # Encode/escape certain characters inside Markdown code runs. | ||
1276 | # The point is that in code, these characters are literals, | ||
1277 | # and lose their special Markdown meanings. | ||
1278 | # | ||
1279 | local $_ = shift; | ||
1280 | |||
1281 | # Encode all ampersands; HTML entities are not | ||
1282 | # entities within a Markdown code span. | ||
1283 | s/&/&/g; | ||
1284 | |||
1285 | # Encode $'s, but only if we're running under Blosxom. | ||
1286 | # (Blosxom interpolates Perl variables in article bodies.) | ||
1287 | { | ||
1288 | no warnings 'once'; | ||
1289 | if (defined($blosxom::version)) { | ||
1290 | s/\$/$/g; | ||
1291 | } | ||
1292 | } | ||
1293 | |||
1294 | |||
1295 | # Do the angle bracket song and dance: | ||
1296 | s! < !<!gx; | ||
1297 | s! > !>!gx; | ||
1298 | |||
1299 | # Now, escape characters that are magic in Markdown: | ||
1300 | s! \* !$g_escape_table{'*'}!gx; | ||
1301 | s! _ !$g_escape_table{'_'}!gx; | ||
1302 | s! { !$g_escape_table{'{'}!gx; | ||
1303 | s! } !$g_escape_table{'}'}!gx; | ||
1304 | s! \[ !$g_escape_table{'['}!gx; | ||
1305 | s! \] !$g_escape_table{']'}!gx; | ||
1306 | s! \\ !$g_escape_table{'\\'}!gx; | ||
1307 | |||
1308 | return $_; | ||
1309 | } | ||
1310 | |||
1311 | |||
1312 | sub _DoItalicsAndBold { | ||
1313 | my $text = shift; | ||
1314 | |||
1315 | # <strong> must go first: | ||
1316 | $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } | ||
1317 | {<strong>$2</strong>}gsx; | ||
1318 | |||
1319 | $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } | ||
1320 | {<em>$2</em>}gsx; | ||
1321 | |||
1322 | return $text; | ||
1323 | } | ||
1324 | |||
1325 | |||
1326 | sub _DoBlockQuotes { | ||
1327 | my $text = shift; | ||
1328 | |||
1329 | $text =~ s{ | ||
1330 | ( # Wrap whole match in $1 | ||
1331 | ( | ||
1332 | ^[ \t]*>[ \t]? # '>' at the start of a line | ||
1333 | .+\n # rest of the first line | ||
1334 | (.+\n)* # subsequent consecutive lines | ||
1335 | \n* # blanks | ||
1336 | )+ | ||
1337 | ) | ||
1338 | }{ | ||
1339 | my $bq = $1; | ||
1340 | $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting | ||
1341 | $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines | ||
1342 | $bq = _RunBlockGamut($bq); # recurse | ||
1343 | |||
1344 | $bq =~ s/^/ /g; | ||
1345 | # These leading spaces screw with <pre> content, so we need to fix that: | ||
1346 | $bq =~ s{ | ||
1347 | (\s*<pre>.+?</pre>) | ||
1348 | }{ | ||
1349 | my $pre = $1; | ||
1350 | $pre =~ s/^ //mg; | ||
1351 | $pre; | ||
1352 | }egsx; | ||
1353 | |||
1354 | "<blockquote>\n$bq\n</blockquote>\n\n"; | ||
1355 | }egmx; | ||
1356 | |||
1357 | |||
1358 | return $text; | ||
1359 | } | ||
1360 | |||
1361 | |||
1362 | sub _FormParagraphs { | ||
1363 | # | ||
1364 | # Params: | ||
1365 | # $text - string to process with html <p> tags | ||
1366 | # | ||
1367 | my $text = shift; | ||
1368 | |||
1369 | # Strip leading and trailing lines: | ||
1370 | $text =~ s/\A\n+//; | ||
1371 | $text =~ s/\n+\z//; | ||
1372 | |||
1373 | my @grafs = split(/\n{2,}/, $text); | ||
1374 | |||
1375 | # | ||
1376 | # Wrap <p> tags. | ||
1377 | # | ||
1378 | foreach (@grafs) { | ||
1379 | unless (defined( $g_html_blocks{$_} )) { | ||
1380 | $_ = _RunSpanGamut($_); | ||
1381 | s/^([ \t]*)/<p>/; | ||
1382 | $_ .= "</p>"; | ||
1383 | } | ||
1384 | } | ||
1385 | |||
1386 | # | ||
1387 | # Unhashify HTML blocks | ||
1388 | # | ||
1389 | foreach (@grafs) { | ||
1390 | if (defined( $g_html_blocks{$_} )) { | ||
1391 | $_ = $g_html_blocks{$_}; | ||
1392 | } | ||
1393 | } | ||
1394 | |||
1395 | return join "\n\n", @grafs; | ||
1396 | } | ||
1397 | |||
1398 | |||
1399 | sub _EncodeAmpsAndAngles { | ||
1400 | # Smart processing for ampersands and angle brackets that need to be encoded. | ||
1401 | |||
1402 | my $text = shift; | ||
1403 | |||
1404 | # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: | ||
1405 | # http://bumppo.net/projects/amputator/ | ||
1406 | $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; | ||
1407 | |||
1408 | # Encode naked <'s | ||
1409 | $text =~ s{<(?![a-z/?\$!])}{<}gi; | ||
1410 | |||
1411 | return $text; | ||
1412 | } | ||
1413 | |||
1414 | |||
1415 | sub _EncodeBackslashEscapes { | ||
1416 | # | ||
1417 | # Parameter: String. | ||
1418 | # Returns: The string, with after processing the following backslash | ||
1419 | # escape sequences. | ||
1420 | # | ||
1421 | local $_ = shift; | ||
1422 | |||
1423 | s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. | ||
1424 | s! \\` !$g_escape_table{'`'}!gx; | ||
1425 | s! \\\* !$g_escape_table{'*'}!gx; | ||
1426 | s! \\_ !$g_escape_table{'_'}!gx; | ||
1427 | s! \\\{ !$g_escape_table{'{'}!gx; | ||
1428 | s! \\\} !$g_escape_table{'}'}!gx; | ||
1429 | s! \\\[ !$g_escape_table{'['}!gx; | ||
1430 | s! \\\] !$g_escape_table{']'}!gx; | ||
1431 | s! \\\( !$g_escape_table{'('}!gx; | ||
1432 | s! \\\) !$g_escape_table{')'}!gx; | ||
1433 | s! \\> !$g_escape_table{'>'}!gx; | ||
1434 | s! \\\# !$g_escape_table{'#'}!gx; | ||
1435 | s! \\\+ !$g_escape_table{'+'}!gx; | ||
1436 | s! \\\- !$g_escape_table{'-'}!gx; | ||
1437 | s! \\\. !$g_escape_table{'.'}!gx; | ||
1438 | s{ \\! }{$g_escape_table{'!'}}gx; | ||
1439 | |||
1440 | return $_; | ||
1441 | } | ||
1442 | |||
1443 | |||
1444 | sub _DoAutoLinks { | ||
1445 | my $text = shift; | ||
1446 | |||
1447 | $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi; | ||
1448 | |||
1449 | # Email addresses: <address@domain.foo> | ||
1450 | $text =~ s{ | ||
1451 | < | ||
1452 | (?:mailto:)? | ||
1453 | ( | ||
1454 | [-.\w]+ | ||
1455 | \@ | ||
1456 | [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ | ||
1457 | ) | ||
1458 | > | ||
1459 | }{ | ||
1460 | _EncodeEmailAddress( _UnescapeSpecialChars($1) ); | ||
1461 | }egix; | ||
1462 | |||
1463 | return $text; | ||
1464 | } | ||
1465 | |||
1466 | |||
1467 | sub _EncodeEmailAddress { | ||
1468 | # | ||
1469 | # Input: an email address, e.g. "foo@example.com" | ||
1470 | # | ||
1471 | # Output: the email address as a mailto link, with each character | ||
1472 | # of the address encoded as either a decimal or hex entity, in | ||
1473 | # the hopes of foiling most address harvesting spam bots. E.g.: | ||
1474 | # | ||
1475 | # <a href="mailto:foo@e | ||
1476 | # xample.com">foo | ||
1477 | # @example.com</a> | ||
1478 | # | ||
1479 | # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk | ||
1480 | # mailing list: <http://tinyurl.com/yu7ue> | ||
1481 | # | ||
1482 | |||
1483 | my $addr = shift; | ||
1484 | |||
1485 | srand; | ||
1486 | my @encode = ( | ||
1487 | sub { '&#' . ord(shift) . ';' }, | ||
1488 | sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, | ||
1489 | sub { shift }, | ||
1490 | ); | ||
1491 | |||
1492 | $addr = "mailto:" . $addr; | ||
1493 | |||
1494 | $addr =~ s{(.)}{ | ||
1495 | my $char = $1; | ||
1496 | if ( $char eq '@' ) { | ||
1497 | # this *must* be encoded. I insist. | ||
1498 | $char = $encode[int rand 1]->($char); | ||
1499 | } elsif ( $char ne ':' ) { | ||
1500 | # leave ':' alone (to spot mailto: later) | ||
1501 | my $r = rand; | ||
1502 | # roughly 10% raw, 45% hex, 45% dec | ||
1503 | $char = ( | ||
1504 | $r > .9 ? $encode[2]->($char) : | ||
1505 | $r < .45 ? $encode[1]->($char) : | ||
1506 | $encode[0]->($char) | ||
1507 | ); | ||
1508 | } | ||
1509 | $char; | ||
1510 | }gex; | ||
1511 | |||
1512 | $addr = qq{<a href="$addr">$addr</a>}; | ||
1513 | $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part | ||
1514 | |||
1515 | return $addr; | ||
1516 | } | ||
1517 | |||
1518 | |||
1519 | sub _UnescapeSpecialChars { | ||
1520 | # | ||
1521 | # Swap back in all the special characters we've hidden. | ||
1522 | # | ||
1523 | my $text = shift; | ||
1524 | |||
1525 | while( my($char, $hash) = each(%g_escape_table) ) { | ||
1526 | $text =~ s/$hash/$char/g; | ||
1527 | } | ||
1528 | return $text; | ||
1529 | } | ||
1530 | |||
1531 | |||
1532 | sub _TokenizeHTML { | ||
1533 | # | ||
1534 | # Parameter: String containing HTML markup. | ||
1535 | # Returns: Reference to an array of the tokens comprising the input | ||
1536 | # string. Each token is either a tag (possibly with nested, | ||
1537 | # tags contained therein, such as <a href="<MTFoo>">, or a | ||
1538 | # run of text between tags. Each element of the array is a | ||
1539 | # two-element array; the first is either 'tag' or 'text'; | ||
1540 | # the second is the actual value. | ||
1541 | # | ||
1542 | # | ||
1543 | # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. | ||
1544 | # <http://www.bradchoate.com/past/mtregex.php> | ||
1545 | # | ||
1546 | |||
1547 | my $str = shift; | ||
1548 | my $pos = 0; | ||
1549 | my $len = length $str; | ||
1550 | my @tokens; | ||
1551 | |||
1552 | my $depth = 6; | ||
1553 | my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); | ||
1554 | my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment | ||
1555 | (?s: <\? .*? \?> ) | # processing instruction | ||
1556 | $nested_tags/ix; # nested tags | ||
1557 | |||
1558 | while ($str =~ m/($match)/g) { | ||
1559 | my $whole_tag = $1; | ||
1560 | my $sec_start = pos $str; | ||
1561 | my $tag_start = $sec_start - length $whole_tag; | ||
1562 | if ($pos < $tag_start) { | ||
1563 | push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; | ||
1564 | } | ||
1565 | push @tokens, ['tag', $whole_tag]; | ||
1566 | $pos = pos $str; | ||
1567 | } | ||
1568 | push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; | ||
1569 | \@tokens; | ||
1570 | } | ||
1571 | |||
1572 | |||
1573 | sub _Outdent { | ||
1574 | # | ||
1575 | # Remove one level of line-leading tabs or spaces | ||
1576 | # | ||
1577 | my $text = shift; | ||
1578 | |||
1579 | $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; | ||
1580 | return $text; | ||
1581 | } | ||
1582 | |||
1583 | |||
1584 | sub _Detab { | ||
1585 | # | ||
1586 | # Cribbed from a post by Bart Lateur: | ||
1587 | # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> | ||
1588 | # | ||
1589 | my $text = shift; | ||
1590 | |||
1591 | $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; | ||
1592 | return $text; | ||
1593 | } | ||
1594 | |||
1595 | |||
1596 | 1; | ||
1597 | |||
1598 | __END__ | ||
1599 | |||
1600 | |||
1601 | =pod | ||
1602 | |||
1603 | =head1 NAME | ||
1604 | |||
1605 | B<Markdown> | ||
1606 | |||
1607 | |||
1608 | =head1 SYNOPSIS | ||
1609 | |||
1610 | B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] | ||
1611 | [ I<file> ... ] | ||
1612 | |||
1613 | |||
1614 | =head1 DESCRIPTION | ||
1615 | |||
1616 | Markdown is a text-to-HTML filter; it translates an easy-to-read / | ||
1617 | easy-to-write structured text format into HTML. Markdown's text format | ||
1618 | is most similar to that of plain text email, and supports features such | ||
1619 | as headers, *emphasis*, code blocks, blockquotes, and links. | ||
1620 | |||
1621 | Markdown's syntax is designed not as a generic markup language, but | ||
1622 | specifically to serve as a front-end to (X)HTML. You can use span-level | ||
1623 | HTML tags anywhere in a Markdown document, and you can use block level | ||
1624 | HTML tags (like <div> and <table> as well). | ||
1625 | |||
1626 | For more information about Markdown's syntax, see: | ||
1627 | |||
1628 | http://daringfireball.net/projects/markdown/ | ||
1629 | |||
1630 | |||
1631 | =head1 OPTIONS | ||
1632 | |||
1633 | Use "--" to end switch parsing. For example, to open a file named "-z", use: | ||
1634 | |||
1635 | Markdown.pl -- -z | ||
1636 | |||
1637 | =over 4 | ||
1638 | |||
1639 | |||
1640 | =item B<--html4tags> | ||
1641 | |||
1642 | Use HTML 4 style for empty element tags, e.g.: | ||
1643 | |||
1644 | <br> | ||
1645 | |||
1646 | instead of Markdown's default XHTML style tags, e.g.: | ||
1647 | |||
1648 | <br /> | ||
1649 | |||
1650 | |||
1651 | =item B<-v>, B<--version> | ||
1652 | |||
1653 | Display Markdown's version number and copyright information. | ||
1654 | |||
1655 | |||
1656 | =item B<-s>, B<--shortversion> | ||
1657 | |||
1658 | Display the short-form version number. | ||
1659 | |||
1660 | |||
1661 | =back | ||
1662 | |||
1663 | |||
1664 | |||
1665 | =head1 BUGS | ||
1666 | |||
1667 | To file bug reports or feature requests (other than topics listed in the | ||
1668 | Caveats section above) please send email to: | ||
1669 | |||
1670 | support@daringfireball.net | ||
1671 | |||
1672 | Please include with your report: (1) the example input; (2) the output | ||
1673 | you expected; (3) the output Markdown actually produced. | ||
1674 | |||
1675 | |||
1676 | =head1 VERSION HISTORY | ||
1677 | |||
1678 | See the readme file for detailed release notes for this version. | ||
1679 | |||
1680 | 1.0.1 - 14 Dec 2004 | ||
1681 | |||
1682 | 1.0 - 28 Aug 2004 | ||
1683 | |||
1684 | |||
1685 | =head1 AUTHOR | ||
1686 | |||
1687 | John Gruber | ||
1688 | http://daringfireball.net | ||
1689 | |||
1690 | PHP port and other contributions by Michel Fortin | ||
1691 | http://michelf.com | ||
1692 | |||
1693 | |||
1694 | =head1 COPYRIGHT AND LICENSE | ||
1695 | |||
1696 | Copyright (c) 2003-2004 John Gruber | ||
1697 | <http://daringfireball.net/> | ||
1698 | All rights reserved. | ||
1699 | |||
1700 | Redistribution and use in source and binary forms, with or without | ||
1701 | modification, are permitted provided that the following conditions are | ||
1702 | met: | ||
1703 | |||
1704 | * Redistributions of source code must retain the above copyright notice, | ||
1705 | this list of conditions and the following disclaimer. | ||
1706 | |||
1707 | * Redistributions in binary form must reproduce the above copyright | ||
1708 | notice, this list of conditions and the following disclaimer in the | ||
1709 | documentation and/or other materials provided with the distribution. | ||
1710 | |||
1711 | * Neither the name "Markdown" nor the names of its contributors may | ||
1712 | be used to endorse or promote products derived from this software | ||
1713 | without specific prior written permission. | ||
1714 | |||
1715 | This software is provided by the copyright holders and contributors "as | ||
1716 | is" and any express or implied warranties, including, but not limited | ||
1717 | to, the implied warranties of merchantability and fitness for a | ||
1718 | particular purpose are disclaimed. In no event shall the copyright owner | ||
1719 | or contributors be liable for any direct, indirect, incidental, special, | ||
1720 | exemplary, or consequential damages (including, but not limited to, | ||
1721 | procurement of substitute goods or services; loss of use, data, or | ||
1722 | profits; or business interruption) however caused and on any theory of | ||
1723 | liability, whether in contract, strict liability, or tort (including | ||
1724 | negligence or otherwise) arising in any way out of the use of this | ||
1725 | software, even if advised of the possibility of such damage. | ||
1726 | |||
1727 | =cut | ||
diff --git a/filters/html-converters/resources/rst-template.txt b/filters/html-converters/resources/rst-template.txt deleted file mode 100644 index 43cde42..0000000 --- a/filters/html-converters/resources/rst-template.txt +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | %(stylesheet)s | ||
2 | %(body_pre_docinfo)s | ||
3 | %(docinfo)s | ||
4 | %(body)s | ||
diff --git a/filters/html-converters/rst2html b/filters/html-converters/rst2html index c51f5be..756a4e1 100755 --- a/filters/html-converters/rst2html +++ b/filters/html-converters/rst2html | |||
@@ -1,2 +1,2 @@ | |||
1 | #!/bin/sh | 1 | #!/bin/bash |
2 | rst2html.py --template="$(dirname $0)/resources/rst-template.txt" | 2 | rst2html.py --template <(echo -e "%(stylesheet)s\n%(body_pre_docinfo)s\n%(docinfo)s\n%(body)s") |