diff options
Diffstat (limited to 'ht3.awk')
-rwxr-xr-x | ht3.awk | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/ht3.awk b/ht3.awk new file mode 100755 index 0000000..996cb1a --- /dev/null +++ b/ht3.awk | |||
@@ -0,0 +1,168 @@ | |||
1 | #!/usr/bin/awk -f | ||
2 | # -*- indent-tabs-mode: t; -*- | ||
3 | # HAT TRICK | ||
4 | # (C) 2022 C. Duckworth | ||
5 | |||
6 | ### Commentary: | ||
7 | |||
8 | ### Code: | ||
9 | BEGIN { | ||
10 | split("html,gemini,gopher", HT_FORMATS_AVAILABLE, ",") | ||
11 | process_arguments() | ||
12 | normalize_ht_formats() | ||
13 | if (! HT_TAGCHARS[1]) { | ||
14 | split("b:**,i://,code:``", HT_TAGCHARS, ",") | ||
15 | } | ||
16 | # Output buffer. The output will be chunked into blocks. | ||
17 | BUFFER = "" | ||
18 | # The current block type. We start with a standard paragraph. | ||
19 | BLOCK = "p" | ||
20 | } | ||
21 | |||
22 | ### RAW TEXT | ||
23 | /^>>>/ { | ||
24 | } | ||
25 | |||
26 | /^<<</ { | ||
27 | } | ||
28 | |||
29 | ### BLOCKS | ||
30 | /^#+/ { # Headers | ||
31 | } | ||
32 | |||
33 | /^>/ { # Block quote | ||
34 | } | ||
35 | |||
36 | /^-/ { # Unordered list | ||
37 | } | ||
38 | |||
39 | /^[0-9]\./ { # Ordered list | ||
40 | } | ||
41 | |||
42 | /^---$/ { # Section break | ||
43 | } | ||
44 | |||
45 | ### LINES | ||
46 | /^=>/ { # Link | ||
47 | } | ||
48 | |||
49 | /^</ { # HTML tag | ||
50 | } | ||
51 | |||
52 | /^;/ { # Comment | ||
53 | } | ||
54 | |||
55 | ### EVERYTHING ELSE | ||
56 | { | ||
57 | } | ||
58 | |||
59 | ### FINISH | ||
60 | END { | ||
61 | buflush() | ||
62 | } | ||
63 | |||
64 | ### FUNCTIONS | ||
65 | function buflush() | ||
66 | { | ||
67 | # Print the buffer and close the current block. | ||
68 | if (BUFFER) { | ||
69 | ht_print(BUFFER) | ||
70 | } | ||
71 | BUFFER = "" | ||
72 | if ("html" in HT_FORMATS && BLOCK != "raw") { | ||
73 | ht_print("</" BLOCK ">") | ||
74 | } | ||
75 | |||
76 | } | ||
77 | |||
78 | function bufpush(str) | ||
79 | { | ||
80 | # Push STR onto the buffer after a newline. | ||
81 | BUFFER = BUFFER (BUFFER ? "\n" : "") str | ||
82 | } | ||
83 | |||
84 | function ht_print(str) | ||
85 | { | ||
86 | if (HT_FORMATS_COUNT == 1) { | ||
87 | print str | ||
88 | } else { | ||
89 | split(str, arr, "\n") | ||
90 | for (format in HT_FORMATS) { | ||
91 | line = 1 | ||
92 | while (arr[line]) { | ||
93 | printf "%s\t%s\n", format, arr[line++] | ||
94 | } | ||
95 | } | ||
96 | } | ||
97 | } | ||
98 | |||
99 | function html_escape(str) | ||
100 | { | ||
101 | # Escape HTML entities and beginning-line spaces. | ||
102 | gsub(/&/, "\\&", t) | ||
103 | gsub(/</, "\\<", t) | ||
104 | gsub(/>/, "\\>", t) | ||
105 | sub(/^ /, "\\ ", t) | ||
106 | return t | ||
107 | } | ||
108 | |||
109 | function normalize_ht_formats() | ||
110 | { | ||
111 | for (format in HT_FORMATS_AVAILABLE) { | ||
112 | normat[format] = 0 | ||
113 | } | ||
114 | if (! HT_FORMATS[1]) { | ||
115 | for (i in HT_FORMATS_AVAILABLE) { | ||
116 | HT_FORMATS[i] = HT_FORMATS_AVAILABLE[i] | ||
117 | } | ||
118 | } | ||
119 | for (format in HT_FORMATS) { | ||
120 | if (format == "all") { | ||
121 | for (i in HT_FORMATS_AVAILABLE) { | ||
122 | HT_FORMATS[i] = HT_FORMATS_AVAILABLE[i] | ||
123 | } | ||
124 | return | ||
125 | } else if (format ~ /^-/) { | ||
126 | delete normat[substr(format, 2)] | ||
127 | } else { | ||
128 | normat[format] = 1 | ||
129 | } | ||
130 | } | ||
131 | for (format in normat) { | ||
132 | if (normat[format]) { | ||
133 | HT_FORMATS[format] = format | ||
134 | } | ||
135 | } | ||
136 | for (format in HT_FORMATS) { | ||
137 | HT_FORMATS_COUNT++ | ||
138 | } | ||
139 | } | ||
140 | |||
141 | function process_arguments() | ||
142 | { | ||
143 | a = 1 | ||
144 | HT_FORMATS[1] = 0 | ||
145 | HT_TAGCHARS[1] = 0 | ||
146 | while (ARGV[a]) { | ||
147 | if (a == "-c" || a ~ /^--chars=/) { | ||
148 | # HTML tag <-> markup character correspondance | ||
149 | if (a == "-c") { | ||
150 | a++ | ||
151 | } else if (a ~ /^--chars=/) { | ||
152 | sub(/^[^=]*=/, "", a) | ||
153 | # HT_TAGCHARS is an array | ||
154 | } | ||
155 | split(a, HT_TAGCHARS, ",") | ||
156 | } else if (a == "-f" || a ~ /^--format=/) { | ||
157 | # Output format | ||
158 | if (a == "-f") { | ||
159 | a++ | ||
160 | } else if (a ~ /^--format=/) { | ||
161 | sub(/^[^=]*=/, "", a) | ||
162 | # HT_FORMATS is an array | ||
163 | } | ||
164 | split(a, HT_FORMATS, ",") | ||
165 | } | ||
166 | a++ | ||
167 | } | ||
168 | } | ||