From 6b63dad736d44ef0e61f4ca1aa95e07430b89af3 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Thu, 9 May 2024 22:28:47 -0500 Subject: Finish shell impl This version includes - customizable format specifiers for all elements - line and buffer filtering, which makes all sorts of things possible - and more! --- jimmy | 373 ++++++++++++++++++++++++++++++++------------------------------- jimmy.sh | 137 ----------------------- test.gmi | 8 +- 3 files changed, 194 insertions(+), 324 deletions(-) delete mode 100755 jimmy.sh diff --git a/jimmy b/jimmy index aa79183..5632b5e 100755 --- a/jimmy +++ b/jimmy @@ -1,208 +1,209 @@ #!/bin/sh -{ dummy=":" "exec" "awk" "-f" "$0" "$@"; } # -*- awk -*- - -#: # h1 -#: ## h2 -#: ### h3 -#: => link -#: * list -#: > quote -#: ``` ... ``` verbatim - -BEGIN { # configuration - if (!to) to = "html" # default: html - - if (to == "html") { - # blocks - isblock["paragraph"] = 1 - opener["paragraph"] = "

" - closer["paragraph"] = "

\n" - linefmt["paragraph"] = "%s\n" - isblock["verbatim"] = 1 - opener["verbatim"] = "
"
-		closer["verbatim"] = "
\n" - linefmt["verbatim"] = "%s\n" - isblock["quote"] = 1 - opener["quote"] = "
" - closer["quote"] = "
\n" - linefmt["quote"] = "%s\n" - isblock["list"] = 1 - opener["list"] = "\n" - linefmt["list"] = "
  • %s
  • \n" - isblock["linklist"] = 1 - opener["linklist"] = "\n" - linefmt["linklist"] = "
  • %s
  • \n" - # lines - isblock["header"] = 1 - linefmt["header", 1] = "

    %s

    \n" - linefmt["header", 2] = "

    %s

    \n" - linefmt["header", 3] = "

    %s

    \n" - isblock["link"] = 0 - linefmt["link"] = "%s\n" - # escapes -- TODO: rethink these. - ## I think the best solution is to have a pair of arrays -- - ## esc_orig and esc_repl. These will have keys in the - ## [n, block] format to keep them straight. - esc["verbatim", 0, "&"] = "\\&" - esc["verbatim", 8, "<"] = "\\<" - esc["verbatim", 9, ">"] = "\\>" - # fill -- # fill[BLOCK] can be a width, 0, or -1. - ## 0 => do nothing . -1 => all one line - fill["paragraph"] = 0 - fill["quote"] = 0 - # collapse whitespace - collapse_blanks = 1 - # collapse_blanks is a boolean: collapse >1 blank lines? - } else if (to == "gemini") { - # blocks - isblock["paragraph"] = 1 - opener["paragraph"] = "" - closer["paragraph"] = "\n" - linefmt["paragraph"] = "%s\n" - isblock["verbatim"] = 1 - opener["verbatim"] = "```\n" - closer["verbatim"] = "```\n" - linefmt["verbatim"] = "%s\n" - isblock["quote"] = 1 - opener["quote"] = "> " - closer["quote"] = "\n" - linefmt["quote"] = "%s" - isblock["list"] = 1 - opener["list"] = "" - closer["list"] = "" - linefmt["list"] = "* %s\n" - isblock["linklist"] = 1 - opener["linklist"] = "" - closer["linklist"] = "" - linefmt["linklist"] = "=> %s %s\n" - # lines - isblock["header"] = 1 - linefmt["header", 1] = "# %s\n" - linefmt["header", 2] = "## %s\n" - linefmt["header", 3] = "### %s\n" - isblock["link"] = 0 - linefmt["link"] = "@NL@=> %s %s@NL@" - # escapes - # fill -- # fill[BLOCK] can be a width, 0, or -1. - ## 0 => do nothing . -1 => all one line - fill["paragraph"] = -1 - fill["quote"] = -1 - # collapse whitespace - collapse_blanks = 1 - } else die("Unknown `to' type: `" to "'") -} -/^```/ { - bl = BLOCK - close_block() - if (bl != "verbatim") BLOCK = "verbatim" - bl = "" - next +### Initialize +# init buffers +buff="$(mktemp)" +lbuf="$(mktemp)" +trap 'rm "$buff" "$lbuf"' EXIT INT KILL + +# init state +prev= +curr= +verbatim=false +# (tuneables) +nl='::NL::' +sp='::SP::' +to=html + +### Formats +## HTML and GMI are given here. Other formats can be defined in their +## own files and they'll be sourced. + +## NOTES +# should we allow modifying variables from the environment ? + +html() { + fmtbuff_hd_1="

    %s

    $nl" + fmtline_hd_1="%s" + fmtbuff_hd_2="

    %s

    $nl" + fmtline_hd_2="%s" + fmtbuff_hd_3="

    %s

    $nl" + fmtline_hd_3="%s" + fmtbuff_quot="
    $nl%s
    $nl" + fmtline_quot="%s$nl" + fmtbuff_list="$nl" + fmtline_list="
  • %s
  • $nl" + fmtbuff_para="

    %s

    $nl" + fmtline_para="%s$nl" + fmtline_plnk="%s$nl" + fmtbuff_link="$nl" + fmtline_link="
  • %s
  • $nl" + fmtbuff_verb="
    %s
    $nl" + fmtline_verb="%s$nl" + fmtbuff_blank="$nl" + fmtline_blank="$nl" } -BLOCK == "verbatim" { - for (s in verbatim_esc) gsub(verbatim_esc[s], verbatim_repl[s]) - bufpush($0 "\n") - next +gmi() { + fmtbuff_hd_1="# %s$nl" + fmtline_hd_1="%s" + fmtbuff_hd_2="## %s$nl" + fmtline_hd_2="%s" + fmtbuff_hd_3="### %s$nl" + fmtline_hd_3="%s" + fmtbuff_quot="> %s$nl" + fmtline_quot="%s$sp" + fmtbuff_list="%s$nl" + fmtline_list="* %s$nl" + fmtbuff_para="%s$nl" + fmtline_para="%s$sp" + fmtline_plnk="$nl=> %s %s$nl" + fmtbuff_link="%s$nl" + fmtline_link="=> %s %s" + fmtbuff_verb="\`\`\`$nl%s\`\`\`$nl" + fmtline_verb="%s$nl" + fmtbuff_blank="$nl" + fmtline_blank="$nl" } -/^#/ { - close_block() - match($0, /^#+/) - bufpush(sprintf(linefmt["header", RLENGTH], collect(2))) - BLOCK = "header" - next +### Filters + +filter_buff() { + case "$1" in + (html) + sed -e "s/$nl/\n/g" -e "s/$sp/ /g" | # fix whitespace + sed 's#\*\([^*]*\)\*#\1#g' | # *strong* + sed 's#_\([^_]*\)_#\1#g' | # _emph_ + sed 's#`\([^`]*\)`#\1#' # `code` + cat + ;; + (*) cat ;; + esac } -/^=>/ { - if (BLOCK == "paragraph" && !isblock["link"]) { - bufpush(sprintf(linefmt["link"], $2, collect(3))) - next - } - if (BLOCK != "linklist") close_block() - BLOCK = "linklist" - bufpush(sprintf(linefmt[BLOCK], $2, collect(3))) - next +filter_line() { + case "$1" in + (*) cat ;; + esac } -/^\*/ { - if (BLOCK != "list") close_block() - BLOCK = "list" - bufpush(sprintf(linefmt[BLOCK], collect(2))) - next -} +### Processing -/^>/ { - if (BLOCK != "quote") close_block() - BLOCK = "quote" - bufpush(sprintf(linefmt[BLOCK], collect(2))) - next -} +## Utility functions -/^$/ { - if (BLOCK == "verbatim") bufpush("\n") - else close_block() - next +buffpush() { + tag="$1"; shift + printf "$(eval echo "\$fmtline_$tag")" "$@" | + filter_line "$to" >> "$buff" } -{ - if (BLOCK != "paragraph") close_block() - BLOCK = "paragraph" - bufpush(sprintf(linefmt[BLOCK], $0)) - next +buffclose() { + b="$(cat<"$buff")" + test -n "$b" || return + printf "$(eval echo "\$fmtbuff_$1")" "$b" | filter_buff "$to" + :>"$buff" } -END { close_block(); printf "\n" } - -function close_block () { - if (!BLOCK) { - if (collapse_blanks) return - else printf "\n" - } - - if (!isblock[BLOCK]) return - if (fill[BLOCK]) BUFFER = buffill(BUFFER, fill[BLOCK]) - - for (e in esc) { - if (BLOCK && match(e, "^" BLOCK ".[0-9]")) { - gsub(substr(e, RLENGTH+2), esc[e], BUFFER) - } - } - - printf("%s%s%s\n", opener[BLOCK], BUFFER, closer[BLOCK]) - BUFFER = BLOCK = "" +## Where the magic happens +process() { + set -f + while read -r sigil line + do + if $verbatim && test "$sigil" != '```' + then + buffpush verb "$sigil $line" + continue + fi + + case "$sigil" in + ('```') + if $verbatim + then + buffclose verb + verbatim=false + prev= + else + buffclose "$prev" + verbatim=true + fi + continue + ;; + ('=>') + printf '%s\n' "$line" > "$lbuf" + read -r url title < "$lbuf" + if test "$curr" = para + then + buffpush plnk "$url" "$title" + continue + else curr=link + fi + ;; + ('#'*) curr=hd_${#sigil} ;; + ('>') curr=quot ;; + ('*') curr=list ;; + ('') curr=blank ;; + (*) + curr=para + line="$sigil $line" + ;; + esac + + test "$curr" = "$prev" || buffclose "$prev" + prev="$curr" + + if test "$curr" = verb + then + buffpush "$curr" "$line" + continue + fi + + if test "$curr" = link + then buffpush "$curr" "$url" "$title" + else buffpush "$curr" "$line" + fi + done + + buffclose "$curr" } -function collect (begin, end, out) { - for (f = (begin?begin:1); f <= (end?end:NF); f++) - out = out (out?" ":"") $f - return out +### Entry point + +usage() { + cat <&2 +jimmy: convert gmi to other formats +usage: jimmy [-h] [-t FORMAT] [FILE...] +If no FILE is given on the command line, jimmy reads standard input. +options: + -h show this help and exit + -t FORMAT + convert gmi to FORMAT. html is default, gmi is built-in. + you can also pass the name of a file that will be sourced. +EOF } -function bufpush (str) { - BUFFER = BUFFER str +main() { + while getopts ht:x OPT + do + case "$OPT" in + (h) usage 0 ;; + (t) to="$OPTARG" ;; + (x) set -x ;; + (*) usage 1 ;; + esac + done + shift $((OPTIND - 1)) + + case "$to" in + (html|gmi) "$to" ;; + (*) . "$to" || { + echo >&2 "Can't find file: '$to'" + exit 2 + } + ;; + esac + + # while read requires a final newline + (cat "${@:--}"; echo) | process } -function buffill(buf, width, out) { - if (width < 0) { - gsub("\n", " ", buf) - out = buf - } - else { - split(buf, arr) - nline = 0 - for (w=1;w<=length(arr);w++) { - if (nline + length(arr[w]) >= width) { - out = out (out?"\n":"") arr[w] - nline = length(arr[w]) - } else { - out = out (out?" ":"") arr[w] - nline += length(arr[w]) + 1 - } - } - } - gsub("@NL@", "\n", out) - return out -} +main "$@" diff --git a/jimmy.sh b/jimmy.sh deleted file mode 100755 index f039f4f..0000000 --- a/jimmy.sh +++ /dev/null @@ -1,137 +0,0 @@ -#!/bin/sh - -BLOCK= -BUFFER= - -process() { - while read -r LINE - do - if test "$BLOCK" = verbatim - then - printf '%s\n' "$LINE" - continue - fi - - set -- $LINE - - case "$LINE" in - ('```') - if test "$BLOCK" = verbatim - then BLOCK= - else BLOCK=verbatim - fi - ;; - ('') - if test "$BLOCK" = verbatim - then bufpush $'\n' - else bufclose - fi - ;; - ('=>'*) link "$@" ;; - ('#'*) header "$@" ;; - ('*'*) shift; blknew list "$*" ;; - ('>'*) shift; blknew quote "$*" ;; - (*) shift; blknew paragraph "$*" ;; - esac - done - bufclose -} - -blknew() { - test "$BLOCK" = "$1" || bufclose - bufpush "$(printf "$(eval echo "\$format_$BLOCK")" "$@")" - BLOCK="$1" -} - -bufclose() { - if test -z "$BLOCK" - then "$COLLAPSE_BLANKS" && return - else newline; return - fi - - # blockp "$BLOCK" || return - # fillp $BLOCK && buffill "$(fillp $BLOCK)" - - # TODO: escape shit - - printf '%s%s%s\n' \ - "$(echo eval "\$opener_$BLOCK")" \ - "$BUFFER" \ - "$(echo eval "\$closer_$BLOCK")" - - BLOCK= -} - -buffill() { # buffill WIDTH - if test $1 -lt 0 - then BUFFER="$(printf '%s\n' "$BUFFER" | tr '\n' ' ')" - else - out= - nline=0 - printf '%s\n' "$BUFFER" | sed 's/[ \t]\+/\n/g' | - while read -r word - do - if test $((nline + ${#word})) -ge "$1" - then - out="${out}"${out:+$'\n'}"${word}" - nline=${#word} - else - out="${out}${out:+ }${word}" - nline=$((nline + ${#word} + 1)) - fi - done - BUFFER="$out" - fi -} - -bufpush() { BUFFER="${BUFFER}$@"; } - -fillp() { - : -} - -header() { - bufclose - lvl=${#1}; shift - bufpush "$(printf "$(eval echo "\$format_h$lvl")" "$*")" - BLOCK=header -} - -link() { - url="$2"; shift 2 - if test "$BLOCK" = paragraph #&& ! blockp link - then bufpush "$(printf "$format_link" "$url" "$*")" - else blknew linklist "$url" "$*" - fi -} - -newline() { printf '\n'; } - -html() { - format_link='
    %s\n' - format_h1='

    %s

    \n' - format_h2='

    %s

    \n' - format_h3='

    %s

    \n' - opener_verbatim='
    '
    -	closer_verbatim='
    \n' - format_verbatim='%s\n' - opener_paragraph='

    ' - closer_paragraph='

    \n' - format_paragraph='%s\n' - opener_quote='
    ' - closer_quote='
    \n' - format_quote='%s\n' - opener_list='\n' - format_list='
  • %s
  • ' - opener_linklist='' - format_linklist="$(printf "$format_list" "$format_link")" -} - -main() { - html - process "$@" -} - -main "$@" diff --git a/test.gmi b/test.gmi index 8d8e178..f2dc0dc 100644 --- a/test.gmi +++ b/test.gmi @@ -24,4 +24,10 @@ for (a=1;a<=4;a++) { => example.com link list 2 => example.com link list 3 -ok +ok, now for another test: +will *strong* in-line text be converted? +as well as `code`, _emph_ and such? +what if *i _nest_ them* +what if *i _nest them* wrong_ ? +what about *breaking them +over two lines?* \ No newline at end of file -- cgit 1.4.1-21-gabe81