diff options
Diffstat (limited to 'ht.awk')
-rwxr-xr-x | ht.awk | 176 |
1 files changed, 108 insertions, 68 deletions
diff --git a/ht.awk b/ht.awk index a382ae7..9288060 100755 --- a/ht.awk +++ b/ht.awk | |||
@@ -3,96 +3,136 @@ | |||
3 | # (C) 2022 C. Duckworth | 3 | # (C) 2022 C. Duckworth |
4 | 4 | ||
5 | # ht.awk converts mostly-html (with some conveniences) to actual html | 5 | # ht.awk converts mostly-html (with some conveniences) to actual html |
6 | 6 | /^;/ { | |
7 | function bufpush(s) { | 7 | sub(/^;/, "", $0) |
8 | BUF = BUF (BUF ? "\n" : "") s; | 8 | print "<!--", esc($0), "-->" |
9 | next | ||
9 | } | 10 | } |
10 | 11 | ||
11 | function buflush() { | 12 | /^</ { # Raw HTML |
12 | if (BUF) print BUF; | 13 | if (! (tag == "html")) { |
13 | BUF = ""; | 14 | tag = "html" |
14 | if (tag && (tag != "html") && (tag != "raw")) | 15 | } |
15 | print "</" tag ">"; | 16 | bufpush($0) |
17 | next | ||
16 | } | 18 | } |
17 | 19 | ||
18 | function esc(t) { | 20 | /^```$/ { # Raw block |
19 | # This is of much more limited utility than I initially realized. | 21 | if (! (tag == "raw")) { |
20 | gsub(/&/, "\\&", t); | 22 | tag = "raw" |
21 | gsub(/</, "\\<", t); | 23 | getline |
22 | gsub(/>/, "\\>", t); | 24 | bufpush("<pre><code>" $0) |
23 | sub(/^ /, "\\ ", t); | 25 | } else { |
24 | return t; | 26 | bufpush("</code></pre>") |
27 | buflush() | ||
28 | tag = "" | ||
29 | } | ||
30 | next | ||
25 | } | 31 | } |
26 | 32 | ||
27 | /^;/ { sub(/^;/,""); print "<!--", esc($0), "-->"; next; } | 33 | /^=>/ { # Links (Gemini-style) |
34 | if (tag == "raw") { | ||
35 | next | ||
36 | } | ||
37 | link = "<a href=\"" esc($2) "\">" $3 | ||
38 | for (i = 4; i <= NF; i++) { | ||
39 | link = link " " $i | ||
40 | } | ||
41 | link = link "</a>" | ||
42 | bufpush(link) | ||
43 | next | ||
44 | } | ||
28 | 45 | ||
29 | /^</ { # Raw HTML | 46 | /^-/ { # Unordered lists |
30 | if (! (tag == "html")) tag = "html"; | 47 | if (tag == "raw") { |
31 | bufpush($0); | 48 | next |
32 | next; | 49 | } |
50 | if (! (tag == "ul")) { | ||
51 | tag = "ul" | ||
52 | } | ||
53 | sub(/^-[ \t]*/, "<li>", $0) | ||
33 | } | 54 | } |
34 | 55 | ||
35 | /^```$/ { # Raw block | 56 | /^[0-9]+\./ { # Ordered lists |
36 | if (! (tag == "raw")) { | 57 | if (tag == "raw") { |
37 | tag = "raw"; | 58 | next |
38 | getline; | 59 | } |
39 | bufpush("<pre><code>" $0); | 60 | if (! (tag == "ol")) { |
40 | } else { | 61 | tag = "ol" |
41 | bufpush("</code></pre>"); | 62 | } |
42 | buflush(); | 63 | sub(/^[0-9]+\.[ \t]/, "<li>", $0) |
43 | tag = ""; | ||
44 | } | ||
45 | next; | ||
46 | } | 64 | } |
47 | 65 | ||
48 | /^=>/ { # Links (Gemini-style) | 66 | /^>/ { # Blockquotes |
49 | if (tag == "raw") next; | 67 | if (tag == "raw") { |
50 | link = "<a href=\"" esc($2) "\">" $3; | 68 | next |
51 | for (i=4;i<=NF;i++) link = link " " $i; | 69 | } |
52 | link = link "</a>"; | 70 | if (! (tag == "blockquote")) { |
53 | bufpush(link); | 71 | tag = "blockquote" |
54 | next; | 72 | } |
73 | sub(/^>[ \t]*/, "", $0) | ||
55 | } | 74 | } |
56 | 75 | ||
57 | /^-/ { # Unordered lists | 76 | /^#+/ { # Headers |
58 | if (tag == "raw") next; | 77 | if (tag == "raw") { |
59 | if (! (tag == "ul")) tag = "ul"; | 78 | next |
60 | sub(/^-[ \t]*/, "<li>"); | 79 | } |
80 | match($0, /^#+/) | ||
81 | if (! (tag == "h" RLENGTH)) { | ||
82 | buflush() | ||
83 | tag = "h" RLENGTH | ||
84 | } | ||
85 | sub(/^#+[ \t]*/, "", $0) | ||
61 | } | 86 | } |
62 | 87 | ||
63 | /^[0-9]+\./ { # Ordered lists | 88 | /^$/ { |
64 | if (tag == "raw") next; | 89 | if (tag == "raw") { |
65 | if (! (tag == "ol")) tag = "ol"; | 90 | next |
66 | sub(/^[0-9]+\.[ \t]/, "<li>"); | 91 | } |
92 | buflush() | ||
93 | tag = "" | ||
67 | } | 94 | } |
68 | 95 | ||
69 | /^>/ { # Blockquotes | 96 | /./ { |
70 | if (tag == "raw") next; | 97 | if (! tag) { |
71 | if (! (tag == "blockquote")) tag = "blockquote"; | 98 | tag = "p" |
72 | sub(/^>[ \t]*/,""); | 99 | } |
100 | if (! BUF) { | ||
101 | bufpush("<" tag ">") | ||
102 | } | ||
103 | if (tag == "raw") { | ||
104 | $0 = esc($0) | ||
105 | } | ||
106 | bufpush($0) | ||
73 | } | 107 | } |
74 | 108 | ||
75 | /^#+/ { # Headers | 109 | END { |
76 | if (tag == "raw") next; | 110 | buflush() |
77 | match($0, /^#+/); | ||
78 | if (! (tag == "h" RLENGTH)) { | ||
79 | buflush(); | ||
80 | tag = "h" RLENGTH; | ||
81 | } | ||
82 | sub(/^#+[ \t]*/,""); | ||
83 | } | 111 | } |
84 | 112 | ||
85 | /^$/ { | 113 | |
86 | if (tag == "raw") next; | 114 | function buflush() |
87 | buflush(); | 115 | { |
88 | tag = ""; | 116 | if (BUF) { |
117 | print BUF | ||
118 | } | ||
119 | BUF = "" | ||
120 | if (tag && (tag != "html") && (tag != "raw")) { | ||
121 | print "</" tag ">" | ||
122 | } | ||
89 | } | 123 | } |
90 | 124 | ||
91 | /./ { | 125 | function bufpush(s) |
92 | if (! tag) tag = "p"; | 126 | { |
93 | if (! BUF) bufpush("<" tag ">"); | 127 | BUF = BUF (BUF ? "\n" : "") s |
94 | if (tag == "raw") $0 = esc($0); | ||
95 | bufpush($0); | ||
96 | } | 128 | } |
97 | 129 | ||
98 | END { buflush(); } | 130 | function esc(t) |
131 | { | ||
132 | # This is of much more limited utility than I initially realized. | ||
133 | gsub(/&/, "\\&", t) | ||
134 | gsub(/</, "\\<", t) | ||
135 | gsub(/>/, "\\>", t) | ||
136 | sub(/^ /, "\\ ", t) | ||
137 | return t | ||
138 | } | ||