From 155d920801d1881525ceae872430346b27772e57 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 15 Jun 2022 09:53:23 -0500 Subject: First commit --- ht.awk | 246 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.ht | 27 +++++++ test.txt | 24 +++++++ 3 files changed, 297 insertions(+) create mode 100755 ht.awk create mode 100644 test.ht create mode 100644 test.txt diff --git a/ht.awk b/ht.awk new file mode 100755 index 0000000..60e042b --- /dev/null +++ b/ht.awk @@ -0,0 +1,246 @@ +#!/usr/bin/awk -f +# -*- indent-tabs-mode: t; -*- +# HAT TRICK +# (C) 2022 C. Duckworth + +### Commentary: + +# OLDIFS=$IFS; IFS=$'\n'; +# for line in `cat testfile`; do +# test=`echo "$line" | grep -E '[\]$'`; +# if [ $test ]; then +# newline=`echo $line | rev | cut -c 2- | rev`; +# echo -n "$newline"; else echo "$line"; +# fi; done; +# IFS=$OLDIFS + +### Code: +BEGIN { + width = 72 + default_htag = "p" + default_gtag = "" + default_ftag = "" +} + +### Raw formatting +/^>>>/ { + getline first_raw + if (raw_fmt_p("html")) { + raw_html = 1 + html[++hpar] = "
" html_escape(first_raw)
+ }
+ if (raw_fmt_p("gemini")) {
+ raw_gemini = 1
+ gemini[++gpar] = "```"
+ gemini[++gpar] = first_raw
+ }
+ if (raw_fmt_p("gopher")) {
+ raw_gopher = 1
+ gopher[++fpar] = first_raw
+ }
+ raw = 1
+ next
+}
+
+/^<< {
+ if (raw_html) {
+ html[hpar] = html[hpar] "
"
+ }
+ if (raw_gemini) {
+ gemini[++gpar] = "```"
+ gemini[++gpar] = ""
+ }
+ if (raw_gopher) {
+ gopher[++fpar] = ""
+ }
+ raw_html = 0
+ raw_gemini = 0
+ raw_gopher = 0
+ raw = 0
+ next
+}
+
+raw {
+ if (raw_html) {
+ html_empty = 0
+ html[++hpar] = html_escape($0)
+ }
+ if (raw_gemini) {
+ gemini_empty = 0
+ gemini[++gpar] = $0
+ }
+ if (raw_gopher) {
+ gopher_empty = 0
+ gopher[++fpar] = $0
+ }
+ next
+}
+
+# Block types
+/^#/ {
+ match($0, /#+/)
+ htag = "h" (RLENGTH > 6 ? 6 : RLENGTH)
+ gtag = substr($0, RSTART, (RLENGTH > 3 ? 3 : RLENGTH)) " "
+ ftag = substr($0, RSTART, RLENGTH) " "
+ sub(/^#+[ \t]*/, "", $0)
+}
+
+# Line types
+/^=>/ {
+ title = ""
+ for (i = 3; i <= NF; i++) {
+ title = title (title ? " " : "") $i
+ }
+ hbuf[++hline] = "" title ""
+ gbuf[++gline] = "\ngemini\t" $0
+ # TODO: gopher
+ next
+}
+
+### Everything else
+/./ {
+ html_empty = 0
+ gemini_empty = 0
+ gopher_empty = 0
+ hbuf[++hline] = $0
+ gbuf[++gline] = $0
+ fbuf[++fline] = $0
+}
+
+/^$/ {
+ bufput()
+}
+
+END {
+ bufput()
+ printarr(html, "html")
+ printarr(gemini, "gemini")
+ printarr(gopher, "gopher")
+}
+
+
+function bufput()
+{
+ hbufput()
+ gbufput()
+ fbufput()
+}
+
+function clear(arr)
+{
+ for (x in arr) {
+ delete arr[x]
+ }
+}
+
+function fbufput()
+{
+ if (! length(fbuf)) {
+ next
+ }
+ for (ln in fbuf) { # XXX: gopher line types
+ paragraph = paragraph (paragraph ? " " : "") fbuf[ln]
+ }
+ fill(paragraph)
+ for (ln in fp) {
+ gopher[++fpar] = ((ln == 1) ? ftag : "") fp[ln]
+ }
+ gopher[++fpar] = ""
+ paragraph = ""
+ ftag = default_ftag
+ clear(fp)
+ clear(fbuf)
+}
+
+function fill(paragraph)
+{
+ char = 0
+ ln = 1
+ split(paragraph, words, FS)
+ for (word in words) {
+ char += length(words[word])
+ if (char <= width) {
+ fp[ln] = fp[ln] (fp[ln] ? " " : "") words[word]
+ } else {
+ fp[++ln] = words[word]
+ char = length(words[word])
+ }
+ }
+}
+
+function gbufput()
+{
+ if (! length(gbuf)) {
+ next
+ }
+ for (ln in gbuf) {
+ paragraph = paragraph (paragraph ? " " : "") gbuf[ln]
+ }
+ gemini[++gpar] = gtag paragraph
+ gemini[++gpar] = ""
+ gtag = default_gtag
+ paragraph = ""
+ clear(gbuf)
+}
+
+function gopher_line(type, display, selector, hostname, port)
+{
+ return (type display "\t" selector "\t" hostname "\t" port)
+}
+
+function hbufput()
+{
+ if (! length(hbuf)) {
+ next
+ }
+ for (ln in hbuf) {
+ paragraph = paragraph (paragraph ? " " : "") hbuf[ln]
+ }
+ fill(paragraph)
+ for (ln in fp) {
+ html[++hpar] = ((ln == 1) ? "<" (htag ? htag : default_htag) ">" : "") fp[ln]
+ }
+ html[hpar] = html[hpar] (htag_end ? htag_end : "" (htag ? htag : default_htag) ">")
+ paragraph = ""
+ htag = default_htag
+ clear(fp)
+ clear(hbuf)
+}
+
+function html_escape(text)
+{
+ gsub(/&/, "\\&", text)
+ gsub(/, "\\<", text)
+ gsub(/>/, "\\>", text)
+ return text
+}
+
+function printarr(arr, prefix)
+{
+ if (prefix) {
+ fmt = "%s\t%s\n"
+ } else {
+ fmt = "%s%s\n"
+ }
+ for (x in arr) {
+ printf fmt, prefix, arr[x]
+ }
+}
+
+function raw_fmt_p(format)
+{
+ if (NF < 2) {
+ return 1
+ }
+ if ($2 ~ /-/) {
+ if ($2 ~ ("-" format)) {
+ return 0
+ } else {
+ return 1
+ }
+ }
+ if ($2 ~ format) {
+ return 1
+ }
+ return 0
+}
diff --git a/test.ht b/test.ht
new file mode 100644
index 0000000..0208568
--- /dev/null
+++ b/test.ht
@@ -0,0 +1,27 @@
+# a test
+
+here's a test for ht.awk.
+it's got paragraphs (these bad boys), long lines and such, and also raw blocks.
+=> https://example.com and links!
+
+>>>
+rawblock example1: all of them, & more +html here's a test for ht.awk. it's got paragraphs (these bad boys), long lines and such, +html and also raw blocks. +html
+html +html +html+html and finally, the end of the file. +html
+gemini here's a test for ht.awk. it's got paragraphs (these bad boys), long lines and such, and also raw blocks. +gemini +gemini ``` +gemini rawblock example1: all of them. +gemini fee fi fo fum +gemini ``` +gemini +gemini and finally, the end of the file. +gemini +gopher here's a test for ht.awk. it's got paragraphs (these bad boys), long lines and such, +gopher and also raw blocks. +gopher +gopher +gopher and finally, the end of the file. +gopher -- cgit 1.4.1-21-gabe81