From 6b63dad736d44ef0e61f4ca1aa95e07430b89af3 Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Thu, 9 May 2024 22:28:47 -0500
Subject: Finish shell impl

This version includes
- customizable format specifiers for all elements
- line and buffer filtering, which makes all sorts of things possible
- and more!
---
 jimmy    | 373 ++++++++++++++++++++++++++++++++-------------------------------
 jimmy.sh | 137 -----------------------
 test.gmi |   8 +-
 3 files changed, 194 insertions(+), 324 deletions(-)
 delete mode 100755 jimmy.sh
diff --git a/jimmy b/jimmy
index aa79183..5632b5e 100755
--- a/jimmy
+++ b/jimmy
@@ -1,208 +1,209 @@
 #!/bin/sh
-{ dummy=":" "exec" "awk" "-f" "$0" "$@"; } # -*- awk -*-
-
-#: # h1
-#: ## h2
-#: ### h3
-#: => link
-#: * list
-#: > quote
-#: ``` ... ```	verbatim
-
-BEGIN { # configuration
-	if (!to) to = "html" # default: html
-
-	if (to == "html") {
-		# blocks
-		isblock["paragraph"] = 1
-		opener["paragraph"] = "<p>"
-		closer["paragraph"] = "</p>\n"
-		linefmt["paragraph"] = "%s\n"
-		isblock["verbatim"] = 1
-		opener["verbatim"] = "<pre><code>"
-		closer["verbatim"] = "</code></pre>\n"
-		linefmt["verbatim"] = "%s\n"
-		isblock["quote"] = 1
-		opener["quote"] = "<blockquote>"
-		closer["quote"] = "</blockquote>\n"
-		linefmt["quote"] = "%s\n"
-		isblock["list"] = 1
-		opener["list"] = "<ul>\n"
-		closer["list"] = "</ul>\n"
-		linefmt["list"] = "<li>%s</li>\n"
-		isblock["linklist"] = 1
-		opener["linklist"] = "<ul class=\"linklist\">\n"
-		closer["linklist"] = "</ul>\n"
-		linefmt["linklist"] = "<li><a href=\"%s\">%s</a></li>\n"
-		# lines
-		isblock["header"] = 1
-		linefmt["header", 1] = "<h1>%s</h1>\n"
-		linefmt["header", 2] = "<h2>%s</h2>\n"
-		linefmt["header", 3] = "<h3>%s</h3>\n"
-		isblock["link"] = 0
-		linefmt["link"] = "<a href=\"%s\">%s</a>\n"
-		# escapes -- TODO: rethink these.
-		## I think the best solution is to have a pair of arrays --
-		## esc_orig and esc_repl.  These will have keys in the
-		## [n, block] format to keep them straight.
-		esc["verbatim", 0, "&"] = "\\&amp;"
-		esc["verbatim", 8, "<"] = "\\&lt;"
-		esc["verbatim", 9, ">"] = "\\&gt;"
-		# fill -- # fill[BLOCK] can be a width, 0, or -1.
-		## 0 => do nothing . -1 => all one line
-		fill["paragraph"] = 0
-		fill["quote"] = 0
-		# collapse whitespace
-		collapse_blanks = 1
-		# collapse_blanks is a boolean: collapse >1 blank lines?
-	} else if (to == "gemini") {
-		# blocks
-		isblock["paragraph"] = 1
-		opener["paragraph"] = ""
-		closer["paragraph"] = "\n"
-		linefmt["paragraph"] = "%s\n"
-		isblock["verbatim"] = 1
-		opener["verbatim"] = "```\n"
-		closer["verbatim"] = "```\n"
-		linefmt["verbatim"] = "%s\n"
-		isblock["quote"] = 1
-		opener["quote"] = "> "
-		closer["quote"] = "\n"
-		linefmt["quote"] = "%s"
-		isblock["list"] = 1
-		opener["list"] = ""
-		closer["list"] = ""
-		linefmt["list"] = "* %s\n"
-		isblock["linklist"] = 1
-		opener["linklist"] = ""
-		closer["linklist"] = ""
-		linefmt["linklist"] = "=> %s %s\n"
-		# lines
-		isblock["header"] = 1
-		linefmt["header", 1] = "# %s\n"
-		linefmt["header", 2] = "## %s\n"
-		linefmt["header", 3] = "### %s\n"
-		isblock["link"] = 0
-		linefmt["link"] = "@NL@=> %s %s@NL@"
-		# escapes
-		# fill -- # fill[BLOCK] can be a width, 0, or -1.
-		## 0 => do nothing . -1 => all one line
-		fill["paragraph"] = -1
-		fill["quote"] = -1
-		# collapse whitespace
-		collapse_blanks = 1
-	} else die("Unknown `to' type: `" to "'")
-}
 
-/^```/ {
-	bl = BLOCK
-	close_block()
-	if (bl != "verbatim") BLOCK = "verbatim"
-	bl = ""
-	next
+### Initialize
+# init buffers
+buff="$(mktemp)"
+lbuf="$(mktemp)"
+trap 'rm "$buff" "$lbuf"' EXIT INT KILL
+
+# init state
+prev=
+curr=
+verbatim=false
+# (tuneables)
+nl='::NL::'
+sp='::SP::'
+to=html
+
+### Formats
+## HTML and GMI are given here. Other formats can be defined in their
+## own files and they'll be sourced.
+
+## NOTES
+# should we allow modifying variables from the environment ?
+
+html() {
+	fmtbuff_hd_1="<h1>%s</h1>$nl"
+	fmtline_hd_1="%s"
+	fmtbuff_hd_2="<h2>%s</h2>$nl"
+	fmtline_hd_2="%s"
+	fmtbuff_hd_3="<h3>%s</h3>$nl"
+	fmtline_hd_3="%s"
+	fmtbuff_quot="<blockquote>$nl%s</blockquote>$nl"
+	fmtline_quot="%s$nl"
+	fmtbuff_list="<ul>$nl%s</ul>$nl"
+	fmtline_list="<li>%s</li>$nl"
+	fmtbuff_para="<p>%s</p>$nl"
+	fmtline_para="%s$nl"
+	fmtline_plnk="<a href=\"%s\">%s</a>$nl"
+	fmtbuff_link="<ul class=\"links\">$nl%s</ul>$nl"
+	fmtline_link="<li><a href=\"%s\">%s</li>$nl"
+	fmtbuff_verb="<pre><code>%s</code></pre>$nl"
+	fmtline_verb="%s$nl"
+	fmtbuff_blank="$nl"
+	fmtline_blank="$nl"
 }
 
-BLOCK == "verbatim" {
-	for (s in verbatim_esc) gsub(verbatim_esc[s], verbatim_repl[s])
-	bufpush($0 "\n")
-	next
+gmi() {
+	fmtbuff_hd_1="# %s$nl"
+	fmtline_hd_1="%s"
+	fmtbuff_hd_2="## %s$nl"
+	fmtline_hd_2="%s"
+	fmtbuff_hd_3="### %s$nl"
+	fmtline_hd_3="%s"
+	fmtbuff_quot="> %s$nl"
+	fmtline_quot="%s$sp"
+	fmtbuff_list="%s$nl"
+	fmtline_list="* %s$nl"
+	fmtbuff_para="%s$nl"
+	fmtline_para="%s$sp"
+	fmtline_plnk="$nl=> %s %s$nl"
+	fmtbuff_link="%s$nl"
+	fmtline_link="=> %s %s"
+	fmtbuff_verb="\`\`\`$nl%s\`\`\`$nl"
+	fmtline_verb="%s$nl"
+	fmtbuff_blank="$nl"
+	fmtline_blank="$nl"
 }
 
-/^#/ {
-	close_block()
-	match($0, /^#+/)
-	bufpush(sprintf(linefmt["header", RLENGTH], collect(2)))
-	BLOCK = "header"
-	next
+### Filters
+
+filter_buff() {
+	case "$1" in
+		(html)
+			sed -e "s/$nl/\n/g" -e "s/$sp/ /g" | # fix whitespace
+				sed 's#\*\([^*]*\)\*#<b>\1</b>#g' | # *strong*
+				sed 's#_\([^_]*\)_#<i>\1</i>#g' | # _emph_
+				sed 's#`\([^`]*\)`#<code>\1</code>#' # `code`
+				cat
+			;;
+		(*) cat ;;
+	esac
 }
 
-/^=>/ {
-	if (BLOCK == "paragraph" && !isblock["link"]) {
-		bufpush(sprintf(linefmt["link"], $2, collect(3)))
-		next
-	}
-	if (BLOCK != "linklist") close_block()
-	BLOCK = "linklist"
-	bufpush(sprintf(linefmt[BLOCK], $2, collect(3)))
-	next
+filter_line() {
+	case "$1" in
+		(*) cat ;;
+	esac
 }
 
-/^\*/ {
-	if (BLOCK != "list") close_block()
-	BLOCK = "list"
-	bufpush(sprintf(linefmt[BLOCK], collect(2)))
-	next
-}
+### Processing
 
-/^>/ {
-	if (BLOCK != "quote") close_block()
-	BLOCK = "quote"
-	bufpush(sprintf(linefmt[BLOCK], collect(2)))
-	next
-}
+## Utility functions
 
-/^$/ {
-	if (BLOCK == "verbatim") bufpush("\n")
-	else close_block()
-	next
+buffpush() {
+	tag="$1"; shift
+	printf "$(eval echo "\$fmtline_$tag")" "$@" |
+		filter_line "$to" >> "$buff"
 }
 
-{
-	if (BLOCK != "paragraph") close_block()
-	BLOCK = "paragraph"
-	bufpush(sprintf(linefmt[BLOCK], $0))
-	next
+buffclose() {
+	b="$(cat<"$buff")"
+	test -n "$b" || return
+	printf "$(eval echo "\$fmtbuff_$1")" "$b" | filter_buff "$to"
+	:>"$buff"
 }
 
-END { close_block(); printf "\n" }
-
-function close_block () {
-	if (!BLOCK) {
-		if (collapse_blanks) return
-		else printf "\n"
-	}
-
-	if (!isblock[BLOCK]) return
-	if (fill[BLOCK]) BUFFER = buffill(BUFFER, fill[BLOCK])
-
-	for (e in esc) {
-		if (BLOCK && match(e, "^" BLOCK ".[0-9]")) {
-			gsub(substr(e, RLENGTH+2), esc[e], BUFFER)
-		}
-	}
-
-	printf("%s%s%s\n", opener[BLOCK], BUFFER, closer[BLOCK])
-	BUFFER = BLOCK = ""
+## Where the magic happens
+process() {
+	set -f
+	while read -r sigil line
+	do
+		if $verbatim && test "$sigil" != '```'
+		then
+			buffpush verb "$sigil $line"
+			continue
+		fi
+
+		case "$sigil" in
+			('```')
+				if $verbatim
+				then
+					buffclose verb
+					verbatim=false
+					prev=
+				else
+					buffclose "$prev"
+					verbatim=true
+				fi
+				continue
+				;;
+			('=>')
+				printf '%s\n' "$line" > "$lbuf"
+				read -r url title < "$lbuf"
+				if test "$curr" = para
+				then
+					buffpush plnk "$url" "$title"
+					continue
+				else curr=link
+				fi
+				;;
+			('#'*) curr=hd_${#sigil} ;;
+			('>') curr=quot ;;
+			('*') curr=list ;;
+			('') curr=blank ;;
+			(*)
+				curr=para
+				line="$sigil $line"
+				;;
+		esac
+
+		test "$curr" = "$prev" || buffclose "$prev"
+		prev="$curr"
+
+		if test "$curr" = verb
+		then
+			buffpush "$curr" "$line"
+			continue
+		fi
+
+		if test "$curr" = link
+		then buffpush "$curr" "$url" "$title"
+		else buffpush "$curr" "$line"
+		fi
+	done
+
+	buffclose "$curr"
 }
 
-function collect (begin, end,	out) {
-	for (f = (begin?begin:1); f <= (end?end:NF); f++)
-		out = out (out?" ":"") $f
-	return out
+### Entry point
+
+usage() {
+	cat <<EOF >&2
+jimmy: convert gmi to other formats
+usage: jimmy [-h] [-t FORMAT] [FILE...]
+If no FILE is given on the command line, jimmy reads standard input.
+options:
+ -h	show this help and exit
+ -t FORMAT
+	convert gmi to FORMAT. html is default, gmi is built-in.
+	you can also pass the name of a file that will be sourced.
+EOF
 }
 
-function bufpush (str) {
-	BUFFER = BUFFER str
+main() {
+	while getopts ht:x OPT
+	do
+		case "$OPT" in
+			(h) usage 0 ;;
+			(t) to="$OPTARG" ;;
+			(x) set -x ;;
+			(*) usage 1 ;;
+		esac
+	done
+	shift $((OPTIND - 1))
+
+	case "$to" in
+		(html|gmi) "$to" ;;
+		(*) . "$to" || {
+				  echo >&2 "Can't find file: '$to'"
+				  exit 2
+			  }
+		    ;;
+	esac
+
+	# while read requires a final newline
+	(cat "${@:--}"; echo) | process
 }
 
-function buffill(buf, width,	out) {
-	if (width < 0) {
-		gsub("\n", " ", buf)
-		out = buf
-	}
-	else {
-		split(buf, arr)
-		nline = 0
-		for (w=1;w<=length(arr);w++) {
-			if (nline + length(arr[w]) >= width) {
-				out = out (out?"\n":"") arr[w]
-				nline = length(arr[w])
-			} else {
-				out = out (out?" ":"") arr[w]
-				nline += length(arr[w]) + 1
-			}
-		}
-	}
-	gsub("@NL@", "\n", out)
-	return out
-}
+main "$@"
diff --git a/jimmy.sh b/jimmy.sh
deleted file mode 100755
index f039f4f..0000000
--- a/jimmy.sh
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/bin/sh
-
-BLOCK=
-BUFFER=
-
-process() {
-	while read -r LINE
-	do
-		if test "$BLOCK" = verbatim
-		then
-			printf '%s\n' "$LINE"
-			continue
-		fi
-
-		set -- $LINE
-
-		case "$LINE" in
-			('```')
-				if test "$BLOCK" = verbatim
-				then BLOCK=
-				else BLOCK=verbatim
-				fi
-				;;
-			('')
-				if test "$BLOCK" = verbatim
-				then bufpush $'\n'
-				else bufclose
-				fi
-				;;
-			('=>'*) link "$@" ;;
-			('#'*) header "$@" ;;
-			('*'*) shift; blknew list "$*" ;;
-			('>'*) shift; blknew quote "$*" ;;
-			(*) shift; blknew paragraph "$*" ;;
-		esac
-	done
-	bufclose
-}
-
-blknew() {
-	test "$BLOCK" = "$1" || bufclose
-	bufpush "$(printf "$(eval echo "\$format_$BLOCK")" "$@")"
-	BLOCK="$1"
-}
-
-bufclose() {
-	if test -z "$BLOCK"
-	then "$COLLAPSE_BLANKS" && return
-	else newline; return
-	fi
-
-	# blockp "$BLOCK" || return
-	# fillp $BLOCK && buffill "$(fillp $BLOCK)"
-
-	# TODO: escape shit
-
-	printf '%s%s%s\n' \
-	       "$(echo eval "\$opener_$BLOCK")" \
-	       "$BUFFER" \
-	       "$(echo eval "\$closer_$BLOCK")"
-
-	BLOCK=
-}
-
-buffill() { # buffill WIDTH
-	if test $1 -lt 0
-	then BUFFER="$(printf '%s\n' "$BUFFER" | tr '\n' ' ')"
-	else
-		out=
-		nline=0
-		printf '%s\n' "$BUFFER" | sed 's/[ \t]\+/\n/g' |
-			while read -r word
-			do
-				if test $((nline + ${#word})) -ge "$1"
-				then
-					out="${out}"${out:+$'\n'}"${word}"
-					nline=${#word}
-				else
-					out="${out}${out:+ }${word}"
-					nline=$((nline + ${#word} + 1))
-				fi
-			done
-		BUFFER="$out"
-	fi
-}
-
-bufpush() { BUFFER="${BUFFER}$@"; }
-
-fillp() {
-	:
-}
-
-header() {
-	bufclose
-	lvl=${#1}; shift
-	bufpush "$(printf "$(eval echo "\$format_h$lvl")" "$*")"
-	BLOCK=header
-}
-
-link() {
-	url="$2"; shift 2
-	if test "$BLOCK" = paragraph #&& ! blockp link
-	then bufpush "$(printf "$format_link" "$url" "$*")"
-	else blknew linklist "$url" "$*"
-	fi
-}
-
-newline() { printf '\n'; }
-
-html() {
-	format_link='<a href="%s">%s</a>\n'
-	format_h1='<h1>%s</h1>\n'
-	format_h2='<h2>%s</h2>\n'
-	format_h3='<h3>%s</h3>\n'
-	opener_verbatim='<pre><code>'
-	closer_verbatim='</code></pre>\n'
-	format_verbatim='%s\n'
-	opener_paragraph='<p>'
-	closer_paragraph='</p>\n'
-	format_paragraph='%s\n'
-	opener_quote='<blockquote>'
-	closer_quote='</blockquote>\n'
-	format_quote='%s\n'
-	opener_list='<ul>\n'
-	closer_list='</ul>\n'
-	format_list='<li>%s</li>'
-	opener_linklist='<ul class="linklist">'
-	closer_linklist='</ul>'
-	format_linklist="$(printf "$format_list" "$format_link")"
-}
-
-main() {
-	html
-	process "$@"
-}
-
-main "$@"
diff --git a/test.gmi b/test.gmi
index 8d8e178..f2dc0dc 100644
--- a/test.gmi
+++ b/test.gmi
@@ -24,4 +24,10 @@ for (a=1;a<=4;a++) {
 => example.com link list 2
 => example.com link list 3
 
-ok
+ok, now for another test:
+will *strong* in-line text be converted?
+as well as `code`, _emph_ and such?
+what if *i _nest_ them*
+what if *i _nest them* wrong_ ?
+what about *breaking them
+over two lines?*
\ No newline at end of file
-- 
cgit 1.4.1-21-gabe81