diff options
author | Case Duckworth | 2024-05-09 22:28:47 -0500 |
---|---|---|
committer | Case Duckworth | 2024-05-09 22:28:47 -0500 |
commit | 6b63dad736d44ef0e61f4ca1aa95e07430b89af3 (patch) | |
tree | 37889195ba326025273a26501fee18d63b39e32f | |
parent | Begin an sh impl (diff) | |
download | jimmy-6b63dad736d44ef0e61f4ca1aa95e07430b89af3.tar.gz jimmy-6b63dad736d44ef0e61f4ca1aa95e07430b89af3.zip |
Finish shell impl
This version includes - customizable format specifiers for all elements - line and buffer filtering, which makes all sorts of things possible - and more!
-rwxr-xr-x | jimmy | 373 | ||||
-rwxr-xr-x | jimmy.sh | 137 | ||||
-rw-r--r-- | test.gmi | 8 |
3 files changed, 194 insertions, 324 deletions
diff --git a/jimmy b/jimmy index aa79183..5632b5e 100755 --- a/jimmy +++ b/jimmy | |||
@@ -1,208 +1,209 @@ | |||
1 | #!/bin/sh | 1 | #!/bin/sh |
2 | { dummy=":" "exec" "awk" "-f" "$0" "$@"; } # -*- awk -*- | ||
3 | |||
4 | #: # h1 | ||
5 | #: ## h2 | ||
6 | #: ### h3 | ||
7 | #: => link | ||
8 | #: * list | ||
9 | #: > quote | ||
10 | #: ``` ... ``` verbatim | ||
11 | |||
12 | BEGIN { # configuration | ||
13 | if (!to) to = "html" # default: html | ||
14 | |||
15 | if (to == "html") { | ||
16 | # blocks | ||
17 | isblock["paragraph"] = 1 | ||
18 | opener["paragraph"] = "<p>" | ||
19 | closer["paragraph"] = "</p>\n" | ||
20 | linefmt["paragraph"] = "%s\n" | ||
21 | isblock["verbatim"] = 1 | ||
22 | opener["verbatim"] = "<pre><code>" | ||
23 | closer["verbatim"] = "</code></pre>\n" | ||
24 | linefmt["verbatim"] = "%s\n" | ||
25 | isblock["quote"] = 1 | ||
26 | opener["quote"] = "<blockquote>" | ||
27 | closer["quote"] = "</blockquote>\n" | ||
28 | linefmt["quote"] = "%s\n" | ||
29 | isblock["list"] = 1 | ||
30 | opener["list"] = "<ul>\n" | ||
31 | closer["list"] = "</ul>\n" | ||
32 | linefmt["list"] = "<li>%s</li>\n" | ||
33 | isblock["linklist"] = 1 | ||
34 | opener["linklist"] = "<ul class=\"linklist\">\n" | ||
35 | closer["linklist"] = "</ul>\n" | ||
36 | linefmt["linklist"] = "<li><a href=\"%s\">%s</a></li>\n" | ||
37 | # lines | ||
38 | isblock["header"] = 1 | ||
39 | linefmt["header", 1] = "<h1>%s</h1>\n" | ||
40 | linefmt["header", 2] = "<h2>%s</h2>\n" | ||
41 | linefmt["header", 3] = "<h3>%s</h3>\n" | ||
42 | isblock["link"] = 0 | ||
43 | linefmt["link"] = "<a href=\"%s\">%s</a>\n" | ||
44 | # escapes -- TODO: rethink these. | ||
45 | ## I think the best solution is to have a pair of arrays -- | ||
46 | ## esc_orig and esc_repl. These will have keys in the | ||
47 | ## [n, block] format to keep them straight. | ||
48 | esc["verbatim", 0, "&"] = "\\&" | ||
49 | esc["verbatim", 8, "<"] = "\\<" | ||
50 | esc["verbatim", 9, ">"] = "\\>" | ||
51 | # fill -- # fill[BLOCK] can be a width, 0, or -1. | ||
52 | ## 0 => do nothing . -1 => all one line | ||
53 | fill["paragraph"] = 0 | ||
54 | fill["quote"] = 0 | ||
55 | # collapse whitespace | ||
56 | collapse_blanks = 1 | ||
57 | # collapse_blanks is a boolean: collapse >1 blank lines? | ||
58 | } else if (to == "gemini") { | ||
59 | # blocks | ||
60 | isblock["paragraph"] = 1 | ||
61 | opener["paragraph"] = "" | ||
62 | closer["paragraph"] = "\n" | ||
63 | linefmt["paragraph"] = "%s\n" | ||
64 | isblock["verbatim"] = 1 | ||
65 | opener["verbatim"] = "```\n" | ||
66 | closer["verbatim"] = "```\n" | ||
67 | linefmt["verbatim"] = "%s\n" | ||
68 | isblock["quote"] = 1 | ||
69 | opener["quote"] = "> " | ||
70 | closer["quote"] = "\n" | ||
71 | linefmt["quote"] = "%s" | ||
72 | isblock["list"] = 1 | ||
73 | opener["list"] = "" | ||
74 | closer["list"] = "" | ||
75 | linefmt["list"] = "* %s\n" | ||
76 | isblock["linklist"] = 1 | ||
77 | opener["linklist"] = "" | ||
78 | closer["linklist"] = "" | ||
79 | linefmt["linklist"] = "=> %s %s\n" | ||
80 | # lines | ||
81 | isblock["header"] = 1 | ||
82 | linefmt["header", 1] = "# %s\n" | ||
83 | linefmt["header", 2] = "## %s\n" | ||
84 | linefmt["header", 3] = "### %s\n" | ||
85 | isblock["link"] = 0 | ||
86 | linefmt["link"] = "@NL@=> %s %s@NL@" | ||
87 | # escapes | ||
88 | # fill -- # fill[BLOCK] can be a width, 0, or -1. | ||
89 | ## 0 => do nothing . -1 => all one line | ||
90 | fill["paragraph"] = -1 | ||
91 | fill["quote"] = -1 | ||
92 | # collapse whitespace | ||
93 | collapse_blanks = 1 | ||
94 | } else die("Unknown `to' type: `" to "'") | ||
95 | } | ||
96 | 2 | ||
97 | /^```/ { | 3 | ### Initialize |
98 | bl = BLOCK | 4 | # init buffers |
99 | close_block() | 5 | buff="$(mktemp)" |
100 | if (bl != "verbatim") BLOCK = "verbatim" | 6 | lbuf="$(mktemp)" |
101 | bl = "" | 7 | trap 'rm "$buff" "$lbuf"' EXIT INT KILL |
102 | next | 8 | |
9 | # init state | ||
10 | prev= | ||
11 | curr= | ||
12 | verbatim=false | ||
13 | # (tuneables) | ||
14 | nl='::NL::' | ||
15 | sp='::SP::' | ||
16 | to=html | ||
17 | |||
18 | ### Formats | ||
19 | ## HTML and GMI are given here. Other formats can be defined in their | ||
20 | ## own files and they'll be sourced. | ||
21 | |||
22 | ## NOTES | ||
23 | # should we allow modifying variables from the environment ? | ||
24 | |||
25 | html() { | ||
26 | fmtbuff_hd_1="<h1>%s</h1>$nl" | ||
27 | fmtline_hd_1="%s" | ||
28 | fmtbuff_hd_2="<h2>%s</h2>$nl" | ||
29 | fmtline_hd_2="%s" | ||
30 | fmtbuff_hd_3="<h3>%s</h3>$nl" | ||
31 | fmtline_hd_3="%s" | ||
32 | fmtbuff_quot="<blockquote>$nl%s</blockquote>$nl" | ||
33 | fmtline_quot="%s$nl" | ||
34 | fmtbuff_list="<ul>$nl%s</ul>$nl" | ||
35 | fmtline_list="<li>%s</li>$nl" | ||
36 | fmtbuff_para="<p>%s</p>$nl" | ||
37 | fmtline_para="%s$nl" | ||
38 | fmtline_plnk="<a href=\"%s\">%s</a>$nl" | ||
39 | fmtbuff_link="<ul class=\"links\">$nl%s</ul>$nl" | ||
40 | fmtline_link="<li><a href=\"%s\">%s</li>$nl" | ||
41 | fmtbuff_verb="<pre><code>%s</code></pre>$nl" | ||
42 | fmtline_verb="%s$nl" | ||
43 | fmtbuff_blank="$nl" | ||
44 | fmtline_blank="$nl" | ||
103 | } | 45 | } |
104 | 46 | ||
105 | BLOCK == "verbatim" { | 47 | gmi() { |
106 | for (s in verbatim_esc) gsub(verbatim_esc[s], verbatim_repl[s]) | 48 | fmtbuff_hd_1="# %s$nl" |
107 | bufpush($0 "\n") | 49 | fmtline_hd_1="%s" |
108 | next | 50 | fmtbuff_hd_2="## %s$nl" |
51 | fmtline_hd_2="%s" | ||
52 | fmtbuff_hd_3="### %s$nl" | ||
53 | fmtline_hd_3="%s" | ||
54 | fmtbuff_quot="> %s$nl" | ||
55 | fmtline_quot="%s$sp" | ||
56 | fmtbuff_list="%s$nl" | ||
57 | fmtline_list="* %s$nl" | ||
58 | fmtbuff_para="%s$nl" | ||
59 | fmtline_para="%s$sp" | ||
60 | fmtline_plnk="$nl=> %s %s$nl" | ||
61 | fmtbuff_link="%s$nl" | ||
62 | fmtline_link="=> %s %s" | ||
63 | fmtbuff_verb="\`\`\`$nl%s\`\`\`$nl" | ||
64 | fmtline_verb="%s$nl" | ||
65 | fmtbuff_blank="$nl" | ||
66 | fmtline_blank="$nl" | ||
109 | } | 67 | } |
110 | 68 | ||
111 | /^#/ { | 69 | ### Filters |
112 | close_block() | 70 | |
113 | match($0, /^#+/) | 71 | filter_buff() { |
114 | bufpush(sprintf(linefmt["header", RLENGTH], collect(2))) | 72 | case "$1" in |
115 | BLOCK = "header" | 73 | (html) |
116 | next | 74 | sed -e "s/$nl/\n/g" -e "s/$sp/ /g" | # fix whitespace |
75 | sed 's#\*\([^*]*\)\*#<b>\1</b>#g' | # *strong* | ||
76 | sed 's#_\([^_]*\)_#<i>\1</i>#g' | # _emph_ | ||
77 | sed 's#`\([^`]*\)`#<code>\1</code>#' # `code` | ||
78 | cat | ||
79 | ;; | ||
80 | (*) cat ;; | ||
81 | esac | ||
117 | } | 82 | } |
118 | 83 | ||
119 | /^=>/ { | 84 | filter_line() { |
120 | if (BLOCK == "paragraph" && !isblock["link"]) { | 85 | case "$1" in |
121 | bufpush(sprintf(linefmt["link"], $2, collect(3))) | 86 | (*) cat ;; |
122 | next | 87 | esac |
123 | } | ||
124 | if (BLOCK != "linklist") close_block() | ||
125 | BLOCK = "linklist" | ||
126 | bufpush(sprintf(linefmt[BLOCK], $2, collect(3))) | ||
127 | next | ||
128 | } | 88 | } |
129 | 89 | ||
130 | /^\*/ { | 90 | ### Processing |
131 | if (BLOCK != "list") close_block() | ||
132 | BLOCK = "list" | ||
133 | bufpush(sprintf(linefmt[BLOCK], collect(2))) | ||
134 | next | ||
135 | } | ||
136 | 91 | ||
137 | /^>/ { | 92 | ## Utility functions |
138 | if (BLOCK != "quote") close_block() | ||
139 | BLOCK = "quote" | ||
140 | bufpush(sprintf(linefmt[BLOCK], collect(2))) | ||
141 | next | ||
142 | } | ||
143 | 93 | ||
144 | /^$/ { | 94 | buffpush() { |
145 | if (BLOCK == "verbatim") bufpush("\n") | 95 | tag="$1"; shift |
146 | else close_block() | 96 | printf "$(eval echo "\$fmtline_$tag")" "$@" | |
147 | next | 97 | filter_line "$to" >> "$buff" |
148 | } | 98 | } |
149 | 99 | ||
150 | { | 100 | buffclose() { |
151 | if (BLOCK != "paragraph") close_block() | 101 | b="$(cat<"$buff")" |
152 | BLOCK = "paragraph" | 102 | test -n "$b" || return |
153 | bufpush(sprintf(linefmt[BLOCK], $0)) | 103 | printf "$(eval echo "\$fmtbuff_$1")" "$b" | filter_buff "$to" |
154 | next | 104 | :>"$buff" |
155 | } | 105 | } |
156 | 106 | ||
157 | END { close_block(); printf "\n" } | 107 | ## Where the magic happens |
158 | 108 | process() { | |
159 | function close_block () { | 109 | set -f |
160 | if (!BLOCK) { | 110 | while read -r sigil line |
161 | if (collapse_blanks) return | 111 | do |
162 | else printf "\n" | 112 | if $verbatim && test "$sigil" != '```' |
163 | } | 113 | then |
164 | 114 | buffpush verb "$sigil $line" | |
165 | if (!isblock[BLOCK]) return | 115 | continue |
166 | if (fill[BLOCK]) BUFFER = buffill(BUFFER, fill[BLOCK]) | 116 | fi |
167 | 117 | ||
168 | for (e in esc) { | 118 | case "$sigil" in |
169 | if (BLOCK && match(e, "^" BLOCK ".[0-9]")) { | 119 | ('```') |
170 | gsub(substr(e, RLENGTH+2), esc[e], BUFFER) | 120 | if $verbatim |
171 | } | 121 | then |
172 | } | 122 | buffclose verb |
173 | 123 | verbatim=false | |
174 | printf("%s%s%s\n", opener[BLOCK], BUFFER, closer[BLOCK]) | 124 | prev= |
175 | BUFFER = BLOCK = "" | 125 | else |
126 | buffclose "$prev" | ||
127 | verbatim=true | ||
128 | fi | ||
129 | continue | ||
130 | ;; | ||
131 | ('=>') | ||
132 | printf '%s\n' "$line" > "$lbuf" | ||
133 | read -r url title < "$lbuf" | ||
134 | if test "$curr" = para | ||
135 | then | ||
136 | buffpush plnk "$url" "$title" | ||
137 | continue | ||
138 | else curr=link | ||
139 | fi | ||
140 | ;; | ||
141 | ('#'*) curr=hd_${#sigil} ;; | ||
142 | ('>') curr=quot ;; | ||
143 | ('*') curr=list ;; | ||
144 | ('') curr=blank ;; | ||
145 | (*) | ||
146 | curr=para | ||
147 | line="$sigil $line" | ||
148 | ;; | ||
149 | esac | ||
150 | |||
151 | test "$curr" = "$prev" || buffclose "$prev" | ||
152 | prev="$curr" | ||
153 | |||
154 | if test "$curr" = verb | ||
155 | then | ||
156 | buffpush "$curr" "$line" | ||
157 | continue | ||
158 | fi | ||
159 | |||
160 | if test "$curr" = link | ||
161 | then buffpush "$curr" "$url" "$title" | ||
162 | else buffpush "$curr" "$line" | ||
163 | fi | ||
164 | done | ||
165 | |||
166 | buffclose "$curr" | ||
176 | } | 167 | } |
177 | 168 | ||
178 | function collect (begin, end, out) { | 169 | ### Entry point |
179 | for (f = (begin?begin:1); f <= (end?end:NF); f++) | 170 | |
180 | out = out (out?" ":"") $f | 171 | usage() { |
181 | return out | 172 | cat <<EOF >&2 |
173 | jimmy: convert gmi to other formats | ||
174 | usage: jimmy [-h] [-t FORMAT] [FILE...] | ||
175 | If no FILE is given on the command line, jimmy reads standard input. | ||
176 | options: | ||
177 | -h show this help and exit | ||
178 | -t FORMAT | ||
179 | convert gmi to FORMAT. html is default, gmi is built-in. | ||
180 | you can also pass the name of a file that will be sourced. | ||
181 | EOF | ||
182 | } | 182 | } |
183 | 183 | ||
184 | function bufpush (str) { | 184 | main() { |
185 | BUFFER = BUFFER str | 185 | while getopts ht:x OPT |
186 | do | ||
187 | case "$OPT" in | ||
188 | (h) usage 0 ;; | ||
189 | (t) to="$OPTARG" ;; | ||
190 | (x) set -x ;; | ||
191 | (*) usage 1 ;; | ||
192 | esac | ||
193 | done | ||
194 | shift $((OPTIND - 1)) | ||
195 | |||
196 | case "$to" in | ||
197 | (html|gmi) "$to" ;; | ||
198 | (*) . "$to" || { | ||
199 | echo >&2 "Can't find file: '$to'" | ||
200 | exit 2 | ||
201 | } | ||
202 | ;; | ||
203 | esac | ||
204 | |||
205 | # while read requires a final newline | ||
206 | (cat "${@:--}"; echo) | process | ||
186 | } | 207 | } |
187 | 208 | ||
188 | function buffill(buf, width, out) { | 209 | main "$@" |
189 | if (width < 0) { | ||
190 | gsub("\n", " ", buf) | ||
191 | out = buf | ||
192 | } | ||
193 | else { | ||
194 | split(buf, arr) | ||
195 | nline = 0 | ||
196 | for (w=1;w<=length(arr);w++) { | ||
197 | if (nline + length(arr[w]) >= width) { | ||
198 | out = out (out?"\n":"") arr[w] | ||
199 | nline = length(arr[w]) | ||
200 | } else { | ||
201 | out = out (out?" ":"") arr[w] | ||
202 | nline += length(arr[w]) + 1 | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | gsub("@NL@", "\n", out) | ||
207 | return out | ||
208 | } | ||
diff --git a/jimmy.sh b/jimmy.sh deleted file mode 100755 index f039f4f..0000000 --- a/jimmy.sh +++ /dev/null | |||
@@ -1,137 +0,0 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | BLOCK= | ||
4 | BUFFER= | ||
5 | |||
6 | process() { | ||
7 | while read -r LINE | ||
8 | do | ||
9 | if test "$BLOCK" = verbatim | ||
10 | then | ||
11 | printf '%s\n' "$LINE" | ||
12 | continue | ||
13 | fi | ||
14 | |||
15 | set -- $LINE | ||
16 | |||
17 | case "$LINE" in | ||
18 | ('```') | ||
19 | if test "$BLOCK" = verbatim | ||
20 | then BLOCK= | ||
21 | else BLOCK=verbatim | ||
22 | fi | ||
23 | ;; | ||
24 | ('') | ||
25 | if test "$BLOCK" = verbatim | ||
26 | then bufpush $'\n' | ||
27 | else bufclose | ||
28 | fi | ||
29 | ;; | ||
30 | ('=>'*) link "$@" ;; | ||
31 | ('#'*) header "$@" ;; | ||
32 | ('*'*) shift; blknew list "$*" ;; | ||
33 | ('>'*) shift; blknew quote "$*" ;; | ||
34 | (*) shift; blknew paragraph "$*" ;; | ||
35 | esac | ||
36 | done | ||
37 | bufclose | ||
38 | } | ||
39 | |||
40 | blknew() { | ||
41 | test "$BLOCK" = "$1" || bufclose | ||
42 | bufpush "$(printf "$(eval echo "\$format_$BLOCK")" "$@")" | ||
43 | BLOCK="$1" | ||
44 | } | ||
45 | |||
46 | bufclose() { | ||
47 | if test -z "$BLOCK" | ||
48 | then "$COLLAPSE_BLANKS" && return | ||
49 | else newline; return | ||
50 | fi | ||
51 | |||
52 | # blockp "$BLOCK" || return | ||
53 | # fillp $BLOCK && buffill "$(fillp $BLOCK)" | ||
54 | |||
55 | # TODO: escape shit | ||
56 | |||
57 | printf '%s%s%s\n' \ | ||
58 | "$(echo eval "\$opener_$BLOCK")" \ | ||
59 | "$BUFFER" \ | ||
60 | "$(echo eval "\$closer_$BLOCK")" | ||
61 | |||
62 | BLOCK= | ||
63 | } | ||
64 | |||
65 | buffill() { # buffill WIDTH | ||
66 | if test $1 -lt 0 | ||
67 | then BUFFER="$(printf '%s\n' "$BUFFER" | tr '\n' ' ')" | ||
68 | else | ||
69 | out= | ||
70 | nline=0 | ||
71 | printf '%s\n' "$BUFFER" | sed 's/[ \t]\+/\n/g' | | ||
72 | while read -r word | ||
73 | do | ||
74 | if test $((nline + ${#word})) -ge "$1" | ||
75 | then | ||
76 | out="${out}"${out:+$'\n'}"${word}" | ||
77 | nline=${#word} | ||
78 | else | ||
79 | out="${out}${out:+ }${word}" | ||
80 | nline=$((nline + ${#word} + 1)) | ||
81 | fi | ||
82 | done | ||
83 | BUFFER="$out" | ||
84 | fi | ||
85 | } | ||
86 | |||
87 | bufpush() { BUFFER="${BUFFER}$@"; } | ||
88 | |||
89 | fillp() { | ||
90 | : | ||
91 | } | ||
92 | |||
93 | header() { | ||
94 | bufclose | ||
95 | lvl=${#1}; shift | ||
96 | bufpush "$(printf "$(eval echo "\$format_h$lvl")" "$*")" | ||
97 | BLOCK=header | ||
98 | } | ||
99 | |||
100 | link() { | ||
101 | url="$2"; shift 2 | ||
102 | if test "$BLOCK" = paragraph #&& ! blockp link | ||
103 | then bufpush "$(printf "$format_link" "$url" "$*")" | ||
104 | else blknew linklist "$url" "$*" | ||
105 | fi | ||
106 | } | ||
107 | |||
108 | newline() { printf '\n'; } | ||
109 | |||
110 | html() { | ||
111 | format_link='<a href="%s">%s</a>\n' | ||
112 | format_h1='<h1>%s</h1>\n' | ||
113 | format_h2='<h2>%s</h2>\n' | ||
114 | format_h3='<h3>%s</h3>\n' | ||
115 | opener_verbatim='<pre><code>' | ||
116 | closer_verbatim='</code></pre>\n' | ||
117 | format_verbatim='%s\n' | ||
118 | opener_paragraph='<p>' | ||
119 | closer_paragraph='</p>\n' | ||
120 | format_paragraph='%s\n' | ||
121 | opener_quote='<blockquote>' | ||
122 | closer_quote='</blockquote>\n' | ||
123 | format_quote='%s\n' | ||
124 | opener_list='<ul>\n' | ||
125 | closer_list='</ul>\n' | ||
126 | format_list='<li>%s</li>' | ||
127 | opener_linklist='<ul class="linklist">' | ||
128 | closer_linklist='</ul>' | ||
129 | format_linklist="$(printf "$format_list" "$format_link")" | ||
130 | } | ||
131 | |||
132 | main() { | ||
133 | html | ||
134 | process "$@" | ||
135 | } | ||
136 | |||
137 | main "$@" | ||
diff --git a/test.gmi b/test.gmi index 8d8e178..f2dc0dc 100644 --- a/test.gmi +++ b/test.gmi | |||
@@ -24,4 +24,10 @@ for (a=1;a<=4;a++) { | |||
24 | => example.com link list 2 | 24 | => example.com link list 2 |
25 | => example.com link list 3 | 25 | => example.com link list 3 |
26 | 26 | ||
27 | ok | 27 | ok, now for another test: |
28 | will *strong* in-line text be converted? | ||
29 | as well as `code`, _emph_ and such? | ||
30 | what if *i _nest_ them* | ||
31 | what if *i _nest them* wrong_ ? | ||
32 | what about *breaking them | ||
33 | over two lines?* \ No newline at end of file | ||