From 6b63dad736d44ef0e61f4ca1aa95e07430b89af3 Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Thu, 9 May 2024 22:28:47 -0500
Subject: Finish shell impl
This version includes
- customizable format specifiers for all elements
- line and buffer filtering, which makes all sorts of things possible
- and more!
---
jimmy | 373 ++++++++++++++++++++++++++++++++-------------------------------
jimmy.sh | 137 -----------------------
test.gmi | 8 +-
3 files changed, 194 insertions(+), 324 deletions(-)
delete mode 100755 jimmy.sh
diff --git a/jimmy b/jimmy
index aa79183..5632b5e 100755
--- a/jimmy
+++ b/jimmy
@@ -1,208 +1,209 @@
#!/bin/sh
-{ dummy=":" "exec" "awk" "-f" "$0" "$@"; } # -*- awk -*-
-
-#: # h1
-#: ## h2
-#: ### h3
-#: => link
-#: * list
-#: > quote
-#: ``` ... ``` verbatim
-
-BEGIN { # configuration
- if (!to) to = "html" # default: html
-
- if (to == "html") {
- # blocks
- isblock["paragraph"] = 1
- opener["paragraph"] = "
"
- closer["paragraph"] = "
\n"
- linefmt["paragraph"] = "%s\n"
- isblock["verbatim"] = 1
- opener["verbatim"] = ""
- closer["verbatim"] = "
\n"
- linefmt["verbatim"] = "%s\n"
- isblock["quote"] = 1
- opener["quote"] = ""
- closer["quote"] = "
\n"
- linefmt["quote"] = "%s\n"
- isblock["list"] = 1
- opener["list"] = "\n"
- linefmt["list"] = "%s\n"
- isblock["linklist"] = 1
- opener["linklist"] = "\n"
- closer["linklist"] = "
\n"
- linefmt["linklist"] = "%s\n"
- # lines
- isblock["header"] = 1
- linefmt["header", 1] = "%s
\n"
- linefmt["header", 2] = "%s
\n"
- linefmt["header", 3] = "%s
\n"
- isblock["link"] = 0
- linefmt["link"] = "%s\n"
- # escapes -- TODO: rethink these.
- ## I think the best solution is to have a pair of arrays --
- ## esc_orig and esc_repl. These will have keys in the
- ## [n, block] format to keep them straight.
- esc["verbatim", 0, "&"] = "\\&"
- esc["verbatim", 8, "<"] = "\\<"
- esc["verbatim", 9, ">"] = "\\>"
- # fill -- # fill[BLOCK] can be a width, 0, or -1.
- ## 0 => do nothing . -1 => all one line
- fill["paragraph"] = 0
- fill["quote"] = 0
- # collapse whitespace
- collapse_blanks = 1
- # collapse_blanks is a boolean: collapse >1 blank lines?
- } else if (to == "gemini") {
- # blocks
- isblock["paragraph"] = 1
- opener["paragraph"] = ""
- closer["paragraph"] = "\n"
- linefmt["paragraph"] = "%s\n"
- isblock["verbatim"] = 1
- opener["verbatim"] = "```\n"
- closer["verbatim"] = "```\n"
- linefmt["verbatim"] = "%s\n"
- isblock["quote"] = 1
- opener["quote"] = "> "
- closer["quote"] = "\n"
- linefmt["quote"] = "%s"
- isblock["list"] = 1
- opener["list"] = ""
- closer["list"] = ""
- linefmt["list"] = "* %s\n"
- isblock["linklist"] = 1
- opener["linklist"] = ""
- closer["linklist"] = ""
- linefmt["linklist"] = "=> %s %s\n"
- # lines
- isblock["header"] = 1
- linefmt["header", 1] = "# %s\n"
- linefmt["header", 2] = "## %s\n"
- linefmt["header", 3] = "### %s\n"
- isblock["link"] = 0
- linefmt["link"] = "@NL@=> %s %s@NL@"
- # escapes
- # fill -- # fill[BLOCK] can be a width, 0, or -1.
- ## 0 => do nothing . -1 => all one line
- fill["paragraph"] = -1
- fill["quote"] = -1
- # collapse whitespace
- collapse_blanks = 1
- } else die("Unknown `to' type: `" to "'")
-}
-/^```/ {
- bl = BLOCK
- close_block()
- if (bl != "verbatim") BLOCK = "verbatim"
- bl = ""
- next
+### Initialize
+# init buffers
+buff="$(mktemp)"
+lbuf="$(mktemp)"
+trap 'rm "$buff" "$lbuf"' EXIT INT KILL
+
+# init state
+prev=
+curr=
+verbatim=false
+# (tuneables)
+nl='::NL::'
+sp='::SP::'
+to=html
+
+### Formats
+## HTML and GMI are given here. Other formats can be defined in their
+## own files and they'll be sourced.
+
+## NOTES
+# should we allow modifying variables from the environment ?
+
+html() {
+ fmtbuff_hd_1="%s
$nl"
+ fmtline_hd_1="%s"
+ fmtbuff_hd_2="%s
$nl"
+ fmtline_hd_2="%s"
+ fmtbuff_hd_3="%s
$nl"
+ fmtline_hd_3="%s"
+ fmtbuff_quot="$nl%s
$nl"
+ fmtline_quot="%s$nl"
+ fmtbuff_list="$nl"
+ fmtline_list="%s$nl"
+ fmtbuff_para="%s
$nl"
+ fmtline_para="%s$nl"
+ fmtline_plnk="%s$nl"
+ fmtbuff_link="$nl"
+ fmtline_link="%s$nl"
+ fmtbuff_verb="%s
$nl"
+ fmtline_verb="%s$nl"
+ fmtbuff_blank="$nl"
+ fmtline_blank="$nl"
}
-BLOCK == "verbatim" {
- for (s in verbatim_esc) gsub(verbatim_esc[s], verbatim_repl[s])
- bufpush($0 "\n")
- next
+gmi() {
+ fmtbuff_hd_1="# %s$nl"
+ fmtline_hd_1="%s"
+ fmtbuff_hd_2="## %s$nl"
+ fmtline_hd_2="%s"
+ fmtbuff_hd_3="### %s$nl"
+ fmtline_hd_3="%s"
+ fmtbuff_quot="> %s$nl"
+ fmtline_quot="%s$sp"
+ fmtbuff_list="%s$nl"
+ fmtline_list="* %s$nl"
+ fmtbuff_para="%s$nl"
+ fmtline_para="%s$sp"
+ fmtline_plnk="$nl=> %s %s$nl"
+ fmtbuff_link="%s$nl"
+ fmtline_link="=> %s %s"
+ fmtbuff_verb="\`\`\`$nl%s\`\`\`$nl"
+ fmtline_verb="%s$nl"
+ fmtbuff_blank="$nl"
+ fmtline_blank="$nl"
}
-/^#/ {
- close_block()
- match($0, /^#+/)
- bufpush(sprintf(linefmt["header", RLENGTH], collect(2)))
- BLOCK = "header"
- next
+### Filters
+
+filter_buff() {
+ case "$1" in
+ (html)
+ sed -e "s/$nl/\n/g" -e "s/$sp/ /g" | # fix whitespace
+ sed 's#\*\([^*]*\)\*#\1#g' | # *strong*
+ sed 's#_\([^_]*\)_#\1#g' | # _emph_
+ sed 's#`\([^`]*\)`#\1
#' # `code`
+ cat
+ ;;
+ (*) cat ;;
+ esac
}
-/^=>/ {
- if (BLOCK == "paragraph" && !isblock["link"]) {
- bufpush(sprintf(linefmt["link"], $2, collect(3)))
- next
- }
- if (BLOCK != "linklist") close_block()
- BLOCK = "linklist"
- bufpush(sprintf(linefmt[BLOCK], $2, collect(3)))
- next
+filter_line() {
+ case "$1" in
+ (*) cat ;;
+ esac
}
-/^\*/ {
- if (BLOCK != "list") close_block()
- BLOCK = "list"
- bufpush(sprintf(linefmt[BLOCK], collect(2)))
- next
-}
+### Processing
-/^>/ {
- if (BLOCK != "quote") close_block()
- BLOCK = "quote"
- bufpush(sprintf(linefmt[BLOCK], collect(2)))
- next
-}
+## Utility functions
-/^$/ {
- if (BLOCK == "verbatim") bufpush("\n")
- else close_block()
- next
+buffpush() {
+ tag="$1"; shift
+ printf "$(eval echo "\$fmtline_$tag")" "$@" |
+ filter_line "$to" >> "$buff"
}
-{
- if (BLOCK != "paragraph") close_block()
- BLOCK = "paragraph"
- bufpush(sprintf(linefmt[BLOCK], $0))
- next
+buffclose() {
+ b="$(cat<"$buff")"
+ test -n "$b" || return
+ printf "$(eval echo "\$fmtbuff_$1")" "$b" | filter_buff "$to"
+ :>"$buff"
}
-END { close_block(); printf "\n" }
-
-function close_block () {
- if (!BLOCK) {
- if (collapse_blanks) return
- else printf "\n"
- }
-
- if (!isblock[BLOCK]) return
- if (fill[BLOCK]) BUFFER = buffill(BUFFER, fill[BLOCK])
-
- for (e in esc) {
- if (BLOCK && match(e, "^" BLOCK ".[0-9]")) {
- gsub(substr(e, RLENGTH+2), esc[e], BUFFER)
- }
- }
-
- printf("%s%s%s\n", opener[BLOCK], BUFFER, closer[BLOCK])
- BUFFER = BLOCK = ""
+## Where the magic happens
+process() {
+ set -f
+ while read -r sigil line
+ do
+ if $verbatim && test "$sigil" != '```'
+ then
+ buffpush verb "$sigil $line"
+ continue
+ fi
+
+ case "$sigil" in
+ ('```')
+ if $verbatim
+ then
+ buffclose verb
+ verbatim=false
+ prev=
+ else
+ buffclose "$prev"
+ verbatim=true
+ fi
+ continue
+ ;;
+ ('=>')
+ printf '%s\n' "$line" > "$lbuf"
+ read -r url title < "$lbuf"
+ if test "$curr" = para
+ then
+ buffpush plnk "$url" "$title"
+ continue
+ else curr=link
+ fi
+ ;;
+ ('#'*) curr=hd_${#sigil} ;;
+ ('>') curr=quot ;;
+ ('*') curr=list ;;
+ ('') curr=blank ;;
+ (*)
+ curr=para
+ line="$sigil $line"
+ ;;
+ esac
+
+ test "$curr" = "$prev" || buffclose "$prev"
+ prev="$curr"
+
+ if test "$curr" = verb
+ then
+ buffpush "$curr" "$line"
+ continue
+ fi
+
+ if test "$curr" = link
+ then buffpush "$curr" "$url" "$title"
+ else buffpush "$curr" "$line"
+ fi
+ done
+
+ buffclose "$curr"
}
-function collect (begin, end, out) {
- for (f = (begin?begin:1); f <= (end?end:NF); f++)
- out = out (out?" ":"") $f
- return out
+### Entry point
+
+usage() {
+ cat <&2
+jimmy: convert gmi to other formats
+usage: jimmy [-h] [-t FORMAT] [FILE...]
+If no FILE is given on the command line, jimmy reads standard input.
+options:
+ -h show this help and exit
+ -t FORMAT
+ convert gmi to FORMAT. html is default, gmi is built-in.
+ you can also pass the name of a file that will be sourced.
+EOF
}
-function bufpush (str) {
- BUFFER = BUFFER str
+main() {
+ while getopts ht:x OPT
+ do
+ case "$OPT" in
+ (h) usage 0 ;;
+ (t) to="$OPTARG" ;;
+ (x) set -x ;;
+ (*) usage 1 ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+
+ case "$to" in
+ (html|gmi) "$to" ;;
+ (*) . "$to" || {
+ echo >&2 "Can't find file: '$to'"
+ exit 2
+ }
+ ;;
+ esac
+
+ # while read requires a final newline
+ (cat "${@:--}"; echo) | process
}
-function buffill(buf, width, out) {
- if (width < 0) {
- gsub("\n", " ", buf)
- out = buf
- }
- else {
- split(buf, arr)
- nline = 0
- for (w=1;w<=length(arr);w++) {
- if (nline + length(arr[w]) >= width) {
- out = out (out?"\n":"") arr[w]
- nline = length(arr[w])
- } else {
- out = out (out?" ":"") arr[w]
- nline += length(arr[w]) + 1
- }
- }
- }
- gsub("@NL@", "\n", out)
- return out
-}
+main "$@"
diff --git a/jimmy.sh b/jimmy.sh
deleted file mode 100755
index f039f4f..0000000
--- a/jimmy.sh
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/bin/sh
-
-BLOCK=
-BUFFER=
-
-process() {
- while read -r LINE
- do
- if test "$BLOCK" = verbatim
- then
- printf '%s\n' "$LINE"
- continue
- fi
-
- set -- $LINE
-
- case "$LINE" in
- ('```')
- if test "$BLOCK" = verbatim
- then BLOCK=
- else BLOCK=verbatim
- fi
- ;;
- ('')
- if test "$BLOCK" = verbatim
- then bufpush $'\n'
- else bufclose
- fi
- ;;
- ('=>'*) link "$@" ;;
- ('#'*) header "$@" ;;
- ('*'*) shift; blknew list "$*" ;;
- ('>'*) shift; blknew quote "$*" ;;
- (*) shift; blknew paragraph "$*" ;;
- esac
- done
- bufclose
-}
-
-blknew() {
- test "$BLOCK" = "$1" || bufclose
- bufpush "$(printf "$(eval echo "\$format_$BLOCK")" "$@")"
- BLOCK="$1"
-}
-
-bufclose() {
- if test -z "$BLOCK"
- then "$COLLAPSE_BLANKS" && return
- else newline; return
- fi
-
- # blockp "$BLOCK" || return
- # fillp $BLOCK && buffill "$(fillp $BLOCK)"
-
- # TODO: escape shit
-
- printf '%s%s%s\n' \
- "$(echo eval "\$opener_$BLOCK")" \
- "$BUFFER" \
- "$(echo eval "\$closer_$BLOCK")"
-
- BLOCK=
-}
-
-buffill() { # buffill WIDTH
- if test $1 -lt 0
- then BUFFER="$(printf '%s\n' "$BUFFER" | tr '\n' ' ')"
- else
- out=
- nline=0
- printf '%s\n' "$BUFFER" | sed 's/[ \t]\+/\n/g' |
- while read -r word
- do
- if test $((nline + ${#word})) -ge "$1"
- then
- out="${out}"${out:+$'\n'}"${word}"
- nline=${#word}
- else
- out="${out}${out:+ }${word}"
- nline=$((nline + ${#word} + 1))
- fi
- done
- BUFFER="$out"
- fi
-}
-
-bufpush() { BUFFER="${BUFFER}$@"; }
-
-fillp() {
- :
-}
-
-header() {
- bufclose
- lvl=${#1}; shift
- bufpush "$(printf "$(eval echo "\$format_h$lvl")" "$*")"
- BLOCK=header
-}
-
-link() {
- url="$2"; shift 2
- if test "$BLOCK" = paragraph #&& ! blockp link
- then bufpush "$(printf "$format_link" "$url" "$*")"
- else blknew linklist "$url" "$*"
- fi
-}
-
-newline() { printf '\n'; }
-
-html() {
- format_link='%s\n'
- format_h1='%s
\n'
- format_h2='%s
\n'
- format_h3='%s
\n'
- opener_verbatim=''
- closer_verbatim='
\n'
- format_verbatim='%s\n'
- opener_paragraph=''
- closer_paragraph='
\n'
- format_paragraph='%s\n'
- opener_quote=''
- closer_quote='
\n'
- format_quote='%s\n'
- opener_list='\n'
- format_list='%s'
- opener_linklist=''
- format_linklist="$(printf "$format_list" "$format_link")"
-}
-
-main() {
- html
- process "$@"
-}
-
-main "$@"
diff --git a/test.gmi b/test.gmi
index 8d8e178..f2dc0dc 100644
--- a/test.gmi
+++ b/test.gmi
@@ -24,4 +24,10 @@ for (a=1;a<=4;a++) {
=> example.com link list 2
=> example.com link list 3
-ok
+ok, now for another test:
+will *strong* in-line text be converted?
+as well as `code`, _emph_ and such?
+what if *i _nest_ them*
+what if *i _nest them* wrong_ ?
+what about *breaking them
+over two lines?*
\ No newline at end of file
--
cgit 1.4.1-21-gabe81