From 1c8e5f1ca9bf0eb6dad8a62bc87428610d96c350 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 3 Aug 2022 23:36:07 -0500 Subject: Begin anew I think this might really be something! --- ht.awk | 385 ++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 187 insertions(+), 198 deletions(-) (limited to 'ht.awk') diff --git a/ht.awk b/ht.awk index 60e042b..b9ae377 100755 --- a/ht.awk +++ b/ht.awk @@ -1,246 +1,235 @@ -#!/usr/bin/awk -f -# -*- indent-tabs-mode: t; -*- +#!/bin/awk -f # HAT TRICK -# (C) 2022 C. Duckworth - -### Commentary: - -# OLDIFS=$IFS; IFS=$'\n'; -# for line in `cat testfile`; do -# test=`echo "$line" | grep -E '[\]$'`; -# if [ $test ]; then -# newline=`echo $line | rev | cut -c 2- | rev`; -# echo -n "$newline"; else echo "$line"; -# fi; done; -# IFS=$OLDIFS - -### Code: +# Copyright (C) 2022 Case Duckworth +# BEGIN { - width = 72 - default_htag = "p" - default_gtag = "" - default_ftag = "" -} - -### Raw formatting -/^>>>/ { - getline first_raw - if (raw_fmt_p("html")) { - raw_html = 1 - html[++hpar] = "
" html_escape(first_raw)
-	}
-	if (raw_fmt_p("gemini")) {
-		raw_gemini = 1
-		gemini[++gpar] = "```"
-		gemini[++gpar] = first_raw
-	}
-	if (raw_fmt_p("gopher")) {
-		raw_gopher = 1
-		gopher[++fpar] = first_raw
+	# Configuration
+	DEFAULT_CONFIG_MODE = "config"
+	config_initialize()
+	config_parse(ENVIRON["HT_CONFIG"] ? ENVIRON["HT_CONFIG"] : "ht.conf")
+	# State
+	DEFTAG = CONFIG["default_tag"]
+	DEFATTR = CONFIG["default_attr"]
+	TAG = DEFTAG
+	ATTR = DEFATTR
+}
+
+# Mutliple-file awareness
+FNR == 1 {
+	fileflush()
+}
+
+# Handle raw sections
+$0 ~ CONFIG["raw_delim"] {
+	RAW = ! RAW
+	if (RAW) {
+		buflush()
+		bufpush(CONFIG["raw_beg"], -1)
+	} else {
+		bufpush(CONFIG["raw_end"], -1)
+		print BUFFER
+		BUFFER = ""
 	}
-	raw = 1
 	next
 }
 
-/^<<
" - } - if (raw_gemini) { - gemini[++gpar] = "```" - gemini[++gpar] = "" - } - if (raw_gopher) { - gopher[++fpar] = "" - } - raw_html = 0 - raw_gemini = 0 - raw_gopher = 0 - raw = 0 +RAW { + bufpush($0) next } -raw { - if (raw_html) { - html_empty = 0 - html[++hpar] = html_escape($0) - } - if (raw_gemini) { - gemini_empty = 0 - gemini[++gpar] = $0 - } - if (raw_gopher) { - gopher_empty = 0 - gopher[++fpar] = $0 - } +# Comments +$0 ~ ("^" COMMENT_DELIM) { next } -# Block types -/^#/ { - match($0, /#+/) - htag = "h" (RLENGTH > 6 ? 6 : RLENGTH) - gtag = substr($0, RSTART, (RLENGTH > 3 ? 3 : RLENGTH)) " " - ftag = substr($0, RSTART, RLENGTH) " " - sub(/^#+[ \t]*/, "", $0) +# HTML escape hatch +/^/ { - title = "" - for (i = 3; i <= NF; i++) { - title = title (title ? " " : "") $i - } - hbuf[++hline] = "" title "" - gbuf[++gline] = "\ngemini\t" $0 - # TODO: gopher - next +# Sure, let's do templating! This makes it less... weird. +/\$/ { + # XXX: This is probably the dumbest way to do it. + gsub(/\$\$/, "$\a", $0) + gsub(/\$[^\a]/, "\\\\&", $0) + gsub(/\$\a/, "$", $0) } -### Everything else +# Blocks of text /./ { - html_empty = 0 - gemini_empty = 0 - gopher_empty = 0 - hbuf[++hline] = $0 - gbuf[++gline] = $0 - fbuf[++fline] = $0 + # EOL escape + if (match($0, /\\$/)) { + sep = -1 + $0 = substr($0, 1, RSTART - 1) + } else { + sep = "\n" + } + # Loop through BLOCK_TYPES + for (bt in BLOCK_TYPES) { + if (match($0, "^" bt "[ \t]*")) { + $0 = substr($0, RSTART + RLENGTH) + if (match(BLOCK_TYPES[bt], "[ \t]*>[ \t]*")) { + parent = substr(BLOCK_TYPES[bt], 1, RSTART - 1) + child = substr(BLOCK_TYPES[bt], RSTART + RLENGTH) + } + if (parent) { + split(parent, pa, FS) + split(child, bl, FS) + if (! IN_PARENT) { + IN_PARENT = pa[1] + } + TAG = IN_PARENT + ATTR = "" + for (i = 2; i <= length(pa); i++) { + ATTR = ATTR (ATTR ? " " : "") pa[i] + } + bufpush("<" child ">" $0 "") + next # XXX: This is messy. + } else { + split(BLOCK_TYPES[bt], bl, FS) + if (IN_PARENT) { + bufpush("") + IN_PARENT = "" + } + if (! BUFFER) { + TAG = bl[1] + for (b = 2; b <= length(bl); b++) { + ATTR = ATTR (ATTR ? " " : "") bl[b] + } + } else { + $0 = "<" BLOCK_TYPES[bt] ">" $0 "" + } + } + } + } + # Loop through LINE_TYPES + for (lt in LINE_TYPES) { + if (match($0, "^" lt "[ \t]*")) { + $0 = substr($0, RSTART + RLENGTH) + templ = LINE_TYPES[lt] + while (match(templ, /\$[0-9]+/)) { + sub(/\$[0-9]+/, $(substr(templ, RSTART + 1, RLENGTH - 1)), templ) + } + $0 = templ + } + } + # Push to buffer + bufpush($0, sep) } +# Blank lines end blocks /^$/ { - bufput() + if (HTML) { + html_end() + } + if (! RAW) { + buflush() + } } +# Clean up END { - bufput() - printarr(html, "html") - printarr(gemini, "gemini") - printarr(gopher, "gopher") -} - - -function bufput() -{ - hbufput() - gbufput() - fbufput() -} - -function clear(arr) -{ - for (x in arr) { - delete arr[x] + if (HTML) { + html_end() + } else if (RAW) { + bufpush(CONFIG["raw_end"], -1) + print BUFFER + } else { + buflush() } } -function fbufput() -{ - if (! length(fbuf)) { - next - } - for (ln in fbuf) { # XXX: gopher line types - paragraph = paragraph (paragraph ? " " : "") fbuf[ln] - } - fill(paragraph) - for (ln in fp) { - gopher[++fpar] = ((ln == 1) ? ftag : "") fp[ln] - } - gopher[++fpar] = "" - paragraph = "" - ftag = default_ftag - clear(fp) - clear(fbuf) -} -function fill(paragraph) +### Buffer-y functions +function buflush() { - char = 0 - ln = 1 - split(paragraph, words, FS) - for (word in words) { - char += length(words[word]) - if (char <= width) { - fp[ln] = fp[ln] (fp[ln] ? " " : "") words[word] - } else { - fp[++ln] = words[word] - char = length(words[word]) + buftrim() + if (BUFFER) { + if (TAG) { + TAG_BEG = "<" TAG (ATTR ? " " ATTR : "") ">" + TAG_END = "" } + print TAG_BEG BUFFER TAG_END + BUFFER = "" + TAG = DEFTAG + ATTR = DEFATTR + IN_PARENT = "" } } -function gbufput() +function bufpush(text, separator) { - if (! length(gbuf)) { - next + if (! separator) { + separator = "\n" } - for (ln in gbuf) { - paragraph = paragraph (paragraph ? " " : "") gbuf[ln] + if (separator == -1) { + separator = "" } - gemini[++gpar] = gtag paragraph - gemini[++gpar] = "" - gtag = default_gtag - paragraph = "" - clear(gbuf) + BUFFER = BUFFER text (separator ? separator : "") } -function gopher_line(type, display, selector, hostname, port) +function buftrim() { - return (type display "\t" selector "\t" hostname "\t" port) -} - -function hbufput() -{ - if (! length(hbuf)) { - next - } - for (ln in hbuf) { - paragraph = paragraph (paragraph ? " " : "") hbuf[ln] - } - fill(paragraph) - for (ln in fp) { - html[++hpar] = ((ln == 1) ? "<" (htag ? htag : default_htag) ">" : "") fp[ln] + if (match(BUFFER, "\n+$")) { + BUFFER = substr(BUFFER, 1, RSTART - 1) } - html[hpar] = html[hpar] (htag_end ? htag_end : "") - paragraph = "" - htag = default_htag - clear(fp) - clear(hbuf) } -function html_escape(text) +### Config functions +function config_initialize() { - gsub(/&/, "\\&", text) - gsub(//, "\\>", text) - return text -} - -function printarr(arr, prefix) + COMMENT_DELIM = ";" + CONFIG["raw_delim"] = "```" + CONFIG["raw_beg"] = "
"
+	CONFIG["raw_end"] = "
" + CONFIG["default_tag"] = "p" + CONFIG["default_attr"] = "" + LINE_TYPES["@"] = "$2" + LINE_TYPES["`"] = "$0" + BLOCK_TYPES["#"] = "h1" + BLOCK_TYPES["##"] = "h2" + BLOCK_TYPES["###"] = "h3" + BLOCK_TYPES["-"] = "ul>li" +} + +function config_parse(file) { - if (prefix) { - fmt = "%s\t%s\n" - } else { - fmt = "%s%s\n" - } - for (x in arr) { - printf fmt, prefix, arr[x] + mode = DEFAULT_CONFIG_MODE + while ((getline < file) > 0) { + if (match($0, /^#/) || ! $0) { + continue + } + if (match($0, /^\\/)) { + $0 = substr($0, 2) + } + if (match($0, /\[[^\]]+\]/)) { + mode = substr($0, RSTART + 1, RLENGTH - 2) + continue + } else { + var = $1 + val = "" + for (i = 2; i <= NF; i++) { + val = val (val ? " " : "") $i + } + if (mode == "config") { + CONFIG[var] = val + } else if (mode == "block") { + BLOCK_TYPES[var] = val + } else if (mode == "line") { + LINE_TYPES[var] = val + } + } } } -function raw_fmt_p(format) +### Other functions +function html_end() { - if (NF < 2) { - return 1 - } - if ($2 ~ /-/) { - if ($2 ~ ("-" format)) { - return 0 - } else { - return 1 - } - } - if ($2 ~ format) { - return 1 - } - return 0 + buftrim() + print BUFFER + BUFFER = "" + HTML = 0 } -- cgit 1.4.1-21-gabe81