From 155d920801d1881525ceae872430346b27772e57 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 15 Jun 2022 09:53:23 -0500 Subject: First commit --- ht.awk | 246 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.ht | 27 +++++++ test.txt | 24 +++++++ 3 files changed, 297 insertions(+) create mode 100755 ht.awk create mode 100644 test.ht create mode 100644 test.txt diff --git a/ht.awk b/ht.awk new file mode 100755 index 0000000..60e042b --- /dev/null +++ b/ht.awk @@ -0,0 +1,246 @@ +#!/usr/bin/awk -f +# -*- indent-tabs-mode: t; -*- +# HAT TRICK +# (C) 2022 C. Duckworth + +### Commentary: + +# OLDIFS=$IFS; IFS=$'\n'; +# for line in `cat testfile`; do +# test=`echo "$line" | grep -E '[\]$'`; +# if [ $test ]; then +# newline=`echo $line | rev | cut -c 2- | rev`; +# echo -n "$newline"; else echo "$line"; +# fi; done; +# IFS=$OLDIFS + +### Code: +BEGIN { + width = 72 + default_htag = "p" + default_gtag = "" + default_ftag = "" +} + +### Raw formatting +/^>>>/ { + getline first_raw + if (raw_fmt_p("html")) { + raw_html = 1 + html[++hpar] = "
" html_escape(first_raw)
+	}
+	if (raw_fmt_p("gemini")) {
+		raw_gemini = 1
+		gemini[++gpar] = "```"
+		gemini[++gpar] = first_raw
+	}
+	if (raw_fmt_p("gopher")) {
+		raw_gopher = 1
+		gopher[++fpar] = first_raw
+	}
+	raw = 1
+	next
+}
+
+/^<<
" + } + if (raw_gemini) { + gemini[++gpar] = "```" + gemini[++gpar] = "" + } + if (raw_gopher) { + gopher[++fpar] = "" + } + raw_html = 0 + raw_gemini = 0 + raw_gopher = 0 + raw = 0 + next +} + +raw { + if (raw_html) { + html_empty = 0 + html[++hpar] = html_escape($0) + } + if (raw_gemini) { + gemini_empty = 0 + gemini[++gpar] = $0 + } + if (raw_gopher) { + gopher_empty = 0 + gopher[++fpar] = $0 + } + next +} + +# Block types +/^#/ { + match($0, /#+/) + htag = "h" (RLENGTH > 6 ? 6 : RLENGTH) + gtag = substr($0, RSTART, (RLENGTH > 3 ? 3 : RLENGTH)) " " + ftag = substr($0, RSTART, RLENGTH) " " + sub(/^#+[ \t]*/, "", $0) +} + +# Line types +/^=>/ { + title = "" + for (i = 3; i <= NF; i++) { + title = title (title ? " " : "") $i + } + hbuf[++hline] = "" title "" + gbuf[++gline] = "\ngemini\t" $0 + # TODO: gopher + next +} + +### Everything else +/./ { + html_empty = 0 + gemini_empty = 0 + gopher_empty = 0 + hbuf[++hline] = $0 + gbuf[++gline] = $0 + fbuf[++fline] = $0 +} + +/^$/ { + bufput() +} + +END { + bufput() + printarr(html, "html") + printarr(gemini, "gemini") + printarr(gopher, "gopher") +} + + +function bufput() +{ + hbufput() + gbufput() + fbufput() +} + +function clear(arr) +{ + for (x in arr) { + delete arr[x] + } +} + +function fbufput() +{ + if (! length(fbuf)) { + next + } + for (ln in fbuf) { # XXX: gopher line types + paragraph = paragraph (paragraph ? " " : "") fbuf[ln] + } + fill(paragraph) + for (ln in fp) { + gopher[++fpar] = ((ln == 1) ? ftag : "") fp[ln] + } + gopher[++fpar] = "" + paragraph = "" + ftag = default_ftag + clear(fp) + clear(fbuf) +} + +function fill(paragraph) +{ + char = 0 + ln = 1 + split(paragraph, words, FS) + for (word in words) { + char += length(words[word]) + if (char <= width) { + fp[ln] = fp[ln] (fp[ln] ? " " : "") words[word] + } else { + fp[++ln] = words[word] + char = length(words[word]) + } + } +} + +function gbufput() +{ + if (! length(gbuf)) { + next + } + for (ln in gbuf) { + paragraph = paragraph (paragraph ? " " : "") gbuf[ln] + } + gemini[++gpar] = gtag paragraph + gemini[++gpar] = "" + gtag = default_gtag + paragraph = "" + clear(gbuf) +} + +function gopher_line(type, display, selector, hostname, port) +{ + return (type display "\t" selector "\t" hostname "\t" port) +} + +function hbufput() +{ + if (! length(hbuf)) { + next + } + for (ln in hbuf) { + paragraph = paragraph (paragraph ? " " : "") hbuf[ln] + } + fill(paragraph) + for (ln in fp) { + html[++hpar] = ((ln == 1) ? "<" (htag ? htag : default_htag) ">" : "") fp[ln] + } + html[hpar] = html[hpar] (htag_end ? htag_end : "") + paragraph = "" + htag = default_htag + clear(fp) + clear(hbuf) +} + +function html_escape(text) +{ + gsub(/&/, "\\&", text) + gsub(//, "\\>", text) + return text +} + +function printarr(arr, prefix) +{ + if (prefix) { + fmt = "%s\t%s\n" + } else { + fmt = "%s%s\n" + } + for (x in arr) { + printf fmt, prefix, arr[x] + } +} + +function raw_fmt_p(format) +{ + if (NF < 2) { + return 1 + } + if ($2 ~ /-/) { + if ($2 ~ ("-" format)) { + return 0 + } else { + return 1 + } + } + if ($2 ~ format) { + return 1 + } + return 0 +} diff --git a/test.ht b/test.ht new file mode 100644 index 0000000..0208568 --- /dev/null +++ b/test.ht @@ -0,0 +1,27 @@ +# a test + +here's a test for ht.awk. +it's got paragraphs (these bad boys), long lines and such, and also raw blocks. +=> https://example.com and links! + +>>> +rawblock example1: all of them, & more +## fee fi fo fum +<<< + +## just html +but over two lines + +>>> html +rawblock example2: just html +hey adora +<<< + +### not html + +>>> -html +rawblock example3: everything /but/ html +# with a header inside, blah +<<< + +and finally, the end of the file. diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..8c47543 --- /dev/null +++ b/test.txt @@ -0,0 +1,24 @@ +html

+html here's a test for ht.awk. it's got paragraphs (these bad boys), long lines and such, +html and also raw blocks. +html

+html
+html +html

+html and finally, the end of the file. +html

+gemini here's a test for ht.awk. it's got paragraphs (these bad boys), long lines and such, and also raw blocks. +gemini +gemini ``` +gemini rawblock example1: all of them. +gemini fee fi fo fum +gemini ``` +gemini +gemini and finally, the end of the file. +gemini +gopher here's a test for ht.awk. it's got paragraphs (these bad boys), long lines and such, +gopher and also raw blocks. +gopher +gopher +gopher and finally, the end of the file. +gopher -- cgit 1.4.1-21-gabe81