From 75f5c10088f77adece45bf0d1cc6868d4fff95d2 Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Mon, 13 May 2024 22:40:58 -0500
Subject: Add readme stub

---
 README.gmi |   4 ++
 jimmy      | 240 ++++++++++++++++++++++++++++++++++++++-----------------------
 test.gmi   |   6 +-
 3 files changed, 160 insertions(+), 90 deletions(-)
 create mode 100644 README.gmi
diff --git a/README.gmi b/README.gmi
new file mode 100644
index 0000000..3abdd2a
--- /dev/null
+++ b/README.gmi
@@ -0,0 +1,4 @@
+# jimmy --- convert .gmi to .html and other formats
+
+[to be documented]
+
diff --git a/jimmy b/jimmy
index ed6c2b7..f5dfa0f 100755
--- a/jimmy
+++ b/jimmy
@@ -4,16 +4,22 @@
 # init buffers
 buff="$(mktemp)"
 lbuf="$(mktemp)"
-trap 'rm "$buff" "$lbuf"' EXIT INT KILL
+meta="$(mktemp)"
+cleanup() { rm "$buff" "$lbuff" "$meta" 2>/dev/null; }
+trap cleanup EXIT INT KILL
 
 # init state
-prev=
-curr=
-verbatim=false
+prev=				# previous linetype
+curr=				# current linetype
+tmpl=				# template (optional)
+verbatimp=false		# in verbatim block?
+metap=true			# in metadata block?
+IPATH="$PWD"			# inclusion path
 # (tuneables)
-nl='::NL::'
-sp='::SP::'
-to=html
+: "${nl:=::NL::}"		# newline
+: "${sp:=::SP::}"		# space
+: "${te:=::END::}"		# template end
+: "${to:=html}"			# output format
 
 ### Formats
 ## HTML and GMI are given here. Other formats can be defined in their
@@ -23,64 +29,62 @@ to=html
 # should we allow modifying variables from the environment ?
 
 html() {
-	fmtbuff_hd_1="<h1>%s</h1>$nl"
-	fmtline_hd_1="%s"
-	fmtbuff_hd_2="<h2>%s</h2>$nl"
-	fmtline_hd_2="%s"
-	fmtbuff_hd_3="<h3>%s</h3>$nl"
-	fmtline_hd_3="%s"
-	fmtbuff_quot="<blockquote>$nl%s</blockquote>$nl"
-	fmtline_quot="%s$nl"
-	fmtbuff_list="<ul>$nl%s</ul>$nl"
-	fmtline_list="<li>%s</li>$nl"
-	fmtbuff_para="<p>%s</p>$nl"
-	fmtline_para="%s$nl"
-	fmtline_plnk="<a href=\"%s\">%s</a>$nl"
-	fmtbuff_link="<ul class=\"links\">$nl%s</ul>$nl"
-	fmtline_link="<li><a href=\"%s\">%s</li>$nl"
-	fmtbuff_verb="<pre><code>%s</code></pre>$nl"
-	fmtline_verb="%s$nl"
-	fmtbuff_blank="$nl"
-	fmtline_blank="$nl"
+	: "${fmtbuff_hd_1:=<h1>%s</h1>$nl}"
+	: "${fmtline_hd_1:=%s}"
+	: "${fmtbuff_hd_2:=<h2>%s</h2>$nl}"
+	: "${fmtline_hd_2:=%s}"
+	: "${fmtbuff_hd_3:=<h3>%s</h3>$nl}"
+	: "${fmtline_hd_3:=%s}"
+	: "${fmtbuff_quot:=<blockquote>$nl%s</blockquote>$nl}"
+	: "${fmtline_quot:=%s$nl}"
+	: "${fmtbuff_list:=<ul>$nl%s</ul>$nl}"
+	: "${fmtline_list:=<li>%s</li>$nl}"
+	: "${fmtbuff_para:=<p>%s</p>$nl}"
+	: "${fmtline_para:=%s$nl}"
+	: "${fmtline_plnk:=<a href=\"%s\">%s</a>$nl}"
+	: "${fmtbuff_link:=<ul class=\"links\">$nl%s</ul>$nl}"
+	: "${fmtline_link:=<li><a href=\"%s\">%s</li>$nl}"
+	: "${fmtbuff_verb:=<pre><code>%s</code></pre>$nl}"
+	: "${fmtline_verb:=%s$nl}"
+	: "${fmtbuff_blank:=$nl}"
+	: "${fmtline_blank:=$nl}"
 }
 
 gmi() {
-	fmtbuff_hd_1="# %s$nl"
-	fmtline_hd_1="%s"
-	fmtbuff_hd_2="## %s$nl"
-	fmtline_hd_2="%s"
-	fmtbuff_hd_3="### %s$nl"
-	fmtline_hd_3="%s"
-	fmtbuff_quot="> %s$nl"
-	fmtline_quot="%s$sp"
-	fmtbuff_list="%s$nl"
-	fmtline_list="* %s$nl"
-	fmtbuff_para="%s$nl"
-	fmtline_para="%s$sp"
-	fmtline_plnk="$nl=> %s %s$nl"
-	fmtbuff_link="%s$nl"
-	fmtline_link="=> %s %s"
-	fmtbuff_verb="\`\`\`$nl%s\`\`\`$nl"
-	fmtline_verb="%s$nl"
-	fmtbuff_blank="$nl"
-	fmtline_blank="$nl"
+	: "${fmtbuff_hd_1:=# %s$nl}"
+	: "${fmtline_hd_1:=%s}"
+	: "${fmtbuff_hd_2:=## %s$nl}"
+	: "${fmtline_hd_2:=%s}"
+	: "${fmtbuff_hd_3:=### %s$nl}"
+	: "${fmtline_hd_3:=%s}"
+	: "${fmtbuff_quot:=> %s$nl}"
+	: "${fmtline_quot:=%s$sp}"
+	: "${fmtbuff_list:=%s$nl}"
+	: "${fmtline_list:=* %s$nl}"
+	: "${fmtbuff_para:=%s$nl}"
+	: "${fmtline_para:=%s$sp}"
+	: "${fmtline_plnk:=$nl=> %s %s$nl}"
+	: "${fmtbuff_link:=%s$nl}"
+	: "${fmtline_link:==> %s %s}"
+	: "${fmtbuff_verb:=\`\`\`$nl%s\`\`\`$nl}"
+	: "${fmtline_verb:=%s$nl}"
+	: "${fmtbuff_blank:=$nl}"
+	: "${fmtline_blank:=$nl}"
 }
 
 ### Filters
 
-filter_buff_html(){ cat; }
-
 filter_buff() {
 	f="filter_buff_$to"
-	if type "$f" | grep -q function
+	if type "$f" 2>/dev/null | grep -q function
 	then "$f"
 	else cat
-	fi | sed -e "s/$nl/\n/g" -e "s/$sp/ /g" # fix whitespace
+	fi
 }
 
 filter_line() {
 	f="filter_line_$to"
-	if type "$f" | grep -q function
+	if type "$f" 2>/dev/null | grep -q function
 	then printf '%s\n' "$*" | "$f"
 	else printf '%s\n' "$*"
 	fi
@@ -89,12 +93,16 @@ filter_line() {
 filter_line_html() {
 	# s/// : escape <, >, & from html
 	# s### : *bold*, _italic_, `code`
-	sed -e 's/&/\&amp;/g' \
-	    -e 's/</\&lt;/g' \
-	    -e 's/>/\&gt;/g' \
-	    -e 's#\*\([^*]*\)\*#<b>\1</b>#g' \
-	    -e 's#_\([^_]*\)_#<i>\1</i>#g' \
-	    -e 's#`\([^`]*\)`#<code>\1</code>#'
+	# s@@@ : smart versions of things
+	sed \
+		-e 's/&/\&amp;/g' \
+		-e 's/</\&lt;/g' \
+		-e 's/>/\&gt;/g' \
+		-e 's#\*\([^*]*\)\*#<b>\1</b>#g' \
+		-e 's#_\([^_]*\)_#<i>\1</i>#g' \
+		-e 's#`\([^`]*\)`#<code>\1</code>#' \
+		-e 's@---@\&mdash;@g' \
+		-e 's@--@\&ndash;@g'
 
 }
 
@@ -108,46 +116,55 @@ pushline() {
 }
 
 bufprint() {
-	b="$(cat<"$buff")"
-	printf "$(eval echo "\$fmtbuff_$1")" "$b" | filter_buff
+	b="$(filter_buff<"$buff")"
+	printf "$(eval echo "\$fmtbuff_$1")" "$b" |
+		sed -e "s/$nl/\n/g" -e "s/$sp/ /g" # fix whitespace
 	: > "$buff"
 }
 
-## Where the magic happens
+### Where the magic happens
 process() {
 	set -f
 	while read -r sigil line
 	do
-		if $verbatim && test "$sigil" != '```'
+		if $verbatimp && test "$sigil" != '```'
 		then
 			pushline verb "$(filter_line "$sigil $line")"
 			continue
 		fi
 
 		case "$sigil" in
-			('```')
-				if $verbatim
+			(*':') # metadata
+				if $metap
+				then printf 'export %s="%s"\n' \
+					    "${sigil%:}" "$line" >>"$meta"
+				fi
+				;;
+			('```') # verbatim
+				# CONSIDER: "types" of verbatim
+				# designated by extra fields after the
+				# sigil
+				## ``` class_of_content
+				# ^--- change the class of the content,
+				# eg. in html do <pre class="type">
+				# other formats might do other things
+				## ``` | some_program
+				# ^--- pipe the buffer to some_program
+				metap=false
+				if $verbatimp
 				then
 					bufprint verb
-					verbatim=false
+					verbatimp=false
 					prev=
 				else
-					# CONSIDER: "types" of verbatim
-					# designated by extra fields after the
-					# sigil
-					## ``` class_of_content
-					# ^- change the class of the content,
-					# eg. in html do <pre class="type">
-					# other formats might do other things
-					## ``` | some_program
-					# ^- pipe the buffer to some_program
-					## others?
 					bufprint "$prev"
-					verbatim=true
+					verbatimp=true
 				fi
 				continue
 				;;
-			('=>')
+			('=>') # link
+				metap=false
+
 				printf '%s\n' "$line" > "$lbuf"
 				read -r url title < "$lbuf"
 				if test "$curr" = para
@@ -157,11 +174,20 @@ process() {
 				else curr=link
 				fi
 				;;
-			('#'*) curr=hd_${#sigil} ;;
-			('>') curr=quot ;;
-			('*') curr=list ;;
-			('') curr=blank ;;
-			(*)
+			('#'*) # header
+				metap=false
+				curr=hd_${#sigil} ;;
+			('>') # quote
+				metap=false
+				curr=quot ;;
+			('*') # list
+				metap=false
+				curr=list ;;
+			('') # blank line
+				metap=false
+				curr=blank ;;
+			(*) # paragraph
+				metap=false
 				curr=para
 				line="$sigil $line"
 				;;
@@ -185,28 +211,43 @@ process() {
 	bufprint "$curr"
 }
 
+templatize() {
+	eval "cat<<$te
+$(cat $@)
+$te"
+}
+
 ### Entry point
 
 usage() {
 	cat <<EOF >&2
 jimmy: convert gmi to other formats
-usage: jimmy [-h] [-t FORMAT] [FILE...]
+usage: jimmy [-h] [-t FORMAT] [-I DIRECTORY] [-T FILE] [FILE...]
 If no FILE is given on the command line, jimmy reads standard input.
 options:
  -h	show this help and exit
+ -x	enable xtrace (set -x)
  -t FORMAT
 	convert gmi to FORMAT. html is default, gmi is built-in.
 	you can also pass the name of a file that will be sourced.
+ -I DIRECTORY
+	add DIRECTORY to the include path for -t.  the current
+	directory is always in the include path.
+ -T FILE
+	use FILE as a template for the output text.
 EOF
+	exit $1
 }
 
 main() {
-	while getopts ht:x OPT
+	while getopts hxI:t:T: OPT
 	do
 		case "$OPT" in
 			(h) usage 0 ;;
-			(t) to="$OPTARG" ;;
 			(x) set -x ;;
+			(I) IPATH="$OPTARG:$IPATH" ;;
+			(t) to="$OPTARG" ;;
+			(T) tmpl="$OPTARG" ;;
 			(*) usage 1 ;;
 		esac
 	done
@@ -214,15 +255,36 @@ main() {
 
 	case "$to" in
 		(html|gmi) "$to" ;;
-		(*) . "$to" || {
-				  echo >&2 "Can't find file: '$to'"
-				  exit 2
-			  }
-		    ;;
+		(*)
+			found=false
+			for p in $(echo "$IPATH"|tr : ' ')
+			do
+				if test -f "$p/$to"
+				then . "$p/$to"; found=true
+				elif test -f "$p/$to.sh"
+				then . "$p/$to.sh"; found=true
+				fi
+			done
+			if ! $found
+			then
+				echo >&2 "Can't find file: '$to'"
+				echo >&2 "Looked in $IPATH"
+				exit 2
+			fi
+			;;
 	esac
 
 	# while read requires a final newline
-	(cat "${@:--}"; echo) | process
+	(cat "${@:--}"; echo) |
+		process |
+		if test -n "$tmpl"
+		then
+			# use eval cat instead of source for pipe sequencing
+			# reasons
+			eval "$(cat "$meta")"
+			templatize "$tmpl"
+		else cat
+		fi
 }
 
 main "$@"
diff --git a/test.gmi b/test.gmi
index f2dc0dc..62bed2e 100644
--- a/test.gmi
+++ b/test.gmi
@@ -1,3 +1,7 @@
+title: a test document
+date: 2024-05-13T03:02:45Z
+uuid: b3daebf1-440b-4828-a4d9-9089c7bd7c61
+
 # a test document of some kind
 
 here is a test document.
@@ -30,4 +34,4 @@ as well as `code`, _emph_ and such?
 what if *i _nest_ them*
 what if *i _nest them* wrong_ ?
 what about *breaking them
-over two lines?*
\ No newline at end of file
+over two lines?*
-- 
cgit 1.4.1-21-gabe81