1 files changed, 213 insertions, 0 deletions
diff --git a/doc.awk b/doc.awk
new file mode 100755
index 0000000..4735f9d
--- /dev/null
+++ b/doc.awk

@@ -0,0 +1,213 @@
+#!/bin/awk -f
+# DOC AWK
+# ======
+#
+# A quick-and-dirty literate-programming-style documentation generator
+# inspired by [docco][].
+#
+# by Case Duckworth <acdw@acdw.net>
+#
+# Source available under the [Good Choices License][gcl].
+#
+# [gcl]: https://acdw.casa/gcl Good Choices License
+#
+# There's a lot of quick-and-dirty "literate programming tools" out there, many
+# of which were inspired by, and also borrowed from, docco.  I was particularly
+# interested in [shocco][], written in POSIX shell (of which I am a fan).
+#
+# Notably missing, however, was a converter of some kind written in AWK.  Thus,
+# DOC AWK was born.
+#
+# This page is the result of DOC AWK working on itself.  Not bad for < 250 lines
+# including commentary!  You can pick up the raw source code of doc.awk [in its
+# git repository][git] to use it yourself.
+#
+# [docco]: https://ashkenas.com/docco/
+# [shocco]: https://rtomayko.github.io/shocco/
+# [git]: https://git.acdw.net/docawk
+#
+# Code
+# ----
+BEGIN {
+        # All the best awk scripts start with a `BEGIN` block.  In this one, we
+        # set a few variables from the environment, with defaults.  I use the
+        # convenience function `getenv`, further down this script, to make it
+        # easier.
+        #
+        # First, the comment regex.  This regex detects a comment *line*, not an
+        # inline comment.  By default, it's set up for awk, shell, and other
+        # languages that use `#` as a comment delimiter, but you can make it
+        # whatever you want.
+        COMMENT = getenv("DOCAWK_COMMENT", COMMENT, "^[ \t]*#+[ \t]*")
+        # You can set `DOCAWK_TEXTPROC` to any text processor you want, but the
+        # default is the vendored `mdown.awk` script in this repo.  It's from
+        # [d.awk](https://github.com/wernsey/d.awk).
+        TEXTPROC = getenv("DOCAWK_TEXTPROC", TEXTPROC, "./mdown.awk")
+        # You can also set the processor for code sections of the source file;
+        # the included `htmlsafe.awk` simply escapes <, &, and >.
+        CODEPROC = getenv("DOCAWK_CODEPROC", CODEPROC, "./htmlsafe.awk")
+        # Usually, a file header and footer are enough for most documents.  The
+        # defaults here are the included header.html and footer.html, since the
+        # default output type is html.
+        #
+        # Each of these documents are actually *templates*, with keys that can
+        # expand to variables inside of `@@VARIABLE@@`.  This is mostly
+        # for title expansion.
+        HEADER = getenv("DOCAWK_HEADER", HEADER, "./header.html")
+        FOOTER = getenv("DOCAWK_FOOTER", FOOTER, "./footer.html")
+}
+# Because `FILENAME` is unset during `BEGIN`, template expansion that attempts
+# to view the filename doesn't work.  Thus, I need a state variable to track
+# whether we've started or not (so that I don't print a header with every new
+# file).
+! begun {
+        # The template array is initialized with the document's title.
+        TV["TITLE"] = get_title()
+        # Print the header here, since if multiple files are passed to DOC AWK
+        # they'll all be concatenated anyway.
+        file_print(HEADER)
+}
+# `doc.awk` is multi-file aware.  It also removes the shebang line from the
+# script if it exists, because you probably don't want that in the output.
+#
+# It wouldn't be a *bad* idea to make a heuristic for determining the type of
+# source file we're converting here.
+FNR == 1 {
+        begun = 1
+        if ($0 ~ COMMENT) {
+                lt = "text"
+        } else {
+                lt = "code"
+        }
+        if ($0 !~ /^#!/) {
+                bufadd(lt)
+        }
+        next
+}
+# The main logic is quite simple: if a given line is a comment as defined by
+# `DOCAWK_COMMENT`, it's in a text block and should be treated as such;
+# otherwise, it's in a code block.  Accumulate each part in a dedicated buffer,
+# and on a switch-over between code and text, print the buffer and reset.
+$0 !~ COMMENT {
+        lt = "code"
+        bufprint("text")
+}
+$0 ~ COMMENT {
+        lt = "text"
+        bufprint("code")
+        sub(COMMENT, "", $0)
+}
+{
+        bufadd(lt)
+}
+# Of course, at the end there might be something in either buffer, so print that
+# out too.  I've decided to put text last for the possibility of ending commentary.
+END {
+        bufprint("code")
+        bufprint("text")
+        file_print(FOOTER)
+}
+# Functions
+# ---------
+#
+# *bufadd*: Add a STR to buffer TYPE.  STR defaults to $0, the input record.
+function bufadd(type, str)
+{
+        buf[type] = buf[type] (str ? str : $0) "\n"
+}
+# *bufprint*: Print a buffer of TYPE.  Automatically wrap the code blocks in a
+# little HTML code block.  I could maybe have a DOCAWK_CODE_PRE/POST and maybe
+# even one for text too, to make it more extensible (to other markup languages,
+# for example).
+function bufprint(type)
+{
+        buf[type] = trim(buf[type])
+        if (buf[type]) {
+                if (type == "code") {
+                        printf "<pre><code>"
+                        printf(buf[type]) | CODEPROC
+                        close(CODEPROC)
+                        print "</code></pre>"
+                } else if (type == "text") {
+                        print(buf[type]) | TEXTPROC
+                        close(TEXTPROC)
+                }
+                buf[type] = ""
+        }
+}
+# *file_print*: Print FILE line-by-line.  The `> 0` check here ensures that it
+# bails on error (-1).
+function file_print(file)
+{
+        if (file) {
+                while ((getline l < file) > 0) {
+                        print template_expand(l)
+                }
+                close(file)
+        }
+}
+# *get_title*: get the title of the current script, for the expanded document.
+# If variables are set, use those; otherwise try to figure out the title from
+# the document's basename.
+function get_title()
+{
+        title = getenv("DOCAWK_TITLE", TITLE)
+        if (! title) {
+                title = FILENAME
+                sub(/.*\//, "", title)
+        }
+        return title
+}
+# *getenv*: a convenience function for pulling values out of the environment.
+# If an environment variable ENV isn't found, test if VAR is set (i.e., `doc.awk
+# -v var=foo`.) and return it if it's set.  Otherwise, return the default value
+# DEF.
+function getenv(env, var, def)
+{
+        if (ENVIRON[env]) {
+                return ENVIRON[env]
+        } else if (var) {
+                return var
+        } else {
+                return def
+        }
+}
+# *template_expand*: expand templates of the form `@@template@@` in the text.
+# Currently it only does variables, and works by line.
+#
+# Due to the way awk works, template variables need to live in their own special
+# array, `TV`.  I'd love it if awk had some kind of `eval` functionality, but at
+# least POSIX awk doesn't.
+function template_expand(text)
+{
+        if (match(text, /@@[^@]*@@/)) {
+                var = substr(text, RSTART + 2, RLENGTH - 4)
+                new = substr(text, 1, RSTART - 1)
+                new = new TV[var]
+                new = new substr(text, RSTART + RLENGTH)
+        } else {
+                new = text
+        }
+        return new
+}
+# *trim*: remove whitespace from either end of a string.
+function trim(str)
+{
+        sub(/^[ \n]*/, "", str)
+        sub(/[ \n]*$/, "", str)
+        return str
+}

diff --git a/doc.awk b/doc.awk new file mode 100755 index 0000000..4735f9d --- /dev/null +++ b/doc.awk
@@ -0,0 +1,213 @@
	1	#!/bin/awk -f
	2	# DOC AWK
	3	# ======
	4	#
	5	# A quick-and-dirty literate-programming-style documentation generator
	6	# inspired by [docco][].
	7	#
	8	# by Case Duckworth <acdw@acdw.net>
	9	#
	10	# Source available under the [Good Choices License][gcl].
	11	#
	12	# [gcl]: https://acdw.casa/gcl Good Choices License
	13	#
	14	# There's a lot of quick-and-dirty "literate programming tools" out there, many
	15	# of which were inspired by, and also borrowed from, docco. I was particularly
	16	# interested in [shocco][], written in POSIX shell (of which I am a fan).
	17	#
	18	# Notably missing, however, was a converter of some kind written in AWK. Thus,
	19	# DOC AWK was born.
	20	#
	21	# This page is the result of DOC AWK working on itself. Not bad for < 250 lines
	22	# including commentary! You can pick up the raw source code of doc.awk [in its
	23	# git repository][git] to use it yourself.
	24	#
	25	# [docco]: https://ashkenas.com/docco/
	26	# [shocco]: https://rtomayko.github.io/shocco/
	27	# [git]: https://git.acdw.net/docawk
	28	#
	29	# Code
	30	# ----
	31	BEGIN {
	32	# All the best awk scripts start with a `BEGIN` block. In this one, we
	33	# set a few variables from the environment, with defaults. I use the
	34	# convenience function `getenv`, further down this script, to make it
	35	# easier.
	36	#
	37	# First, the comment regex. This regex detects a comment line, not an
	38	# inline comment. By default, it's set up for awk, shell, and other
	39	# languages that use `#` as a comment delimiter, but you can make it
	40	# whatever you want.
	41	COMMENT = getenv("DOCAWK_COMMENT", COMMENT, "^[ \t]#+[ \t]")
	42	# You can set `DOCAWK_TEXTPROC` to any text processor you want, but the
	43	# default is the vendored `mdown.awk` script in this repo. It's from
	44	# [d.awk](https://github.com/wernsey/d.awk).
	45	TEXTPROC = getenv("DOCAWK_TEXTPROC", TEXTPROC, "./mdown.awk")
	46	# You can also set the processor for code sections of the source file;
	47	# the included `htmlsafe.awk` simply escapes <, &, and >.
	48	CODEPROC = getenv("DOCAWK_CODEPROC", CODEPROC, "./htmlsafe.awk")
	49	# Usually, a file header and footer are enough for most documents. The
	50	# defaults here are the included header.html and footer.html, since the
	51	# default output type is html.
	52	#
	53	# Each of these documents are actually templates, with keys that can
	54	# expand to variables inside of `@@VARIABLE@@`. This is mostly
	55	# for title expansion.
	56	HEADER = getenv("DOCAWK_HEADER", HEADER, "./header.html")
	57	FOOTER = getenv("DOCAWK_FOOTER", FOOTER, "./footer.html")
	58	}
	59
	60	# Because `FILENAME` is unset during `BEGIN`, template expansion that attempts
	61	# to view the filename doesn't work. Thus, I need a state variable to track
	62	# whether we've started or not (so that I don't print a header with every new
	63	# file).
	64	! begun {
	65	# The template array is initialized with the document's title.
	66	TV["TITLE"] = get_title()
	67	# Print the header here, since if multiple files are passed to DOC AWK
	68	# they'll all be concatenated anyway.
	69	file_print(HEADER)
	70	}
	71
	72	# `doc.awk` is multi-file aware. It also removes the shebang line from the
	73	# script if it exists, because you probably don't want that in the output.
	74	#
	75	# It wouldn't be a bad idea to make a heuristic for determining the type of
	76	# source file we're converting here.
	77	FNR == 1 {
	78	begun = 1
	79	if ($0 ~ COMMENT) {
	80	lt = "text"
	81	} else {
	82	lt = "code"
	83	}
	84	if ($0 !~ /^#!/) {
	85	bufadd(lt)
	86	}
	87	next
	88	}
	89
	90	# The main logic is quite simple: if a given line is a comment as defined by
	91	# `DOCAWK_COMMENT`, it's in a text block and should be treated as such;
	92	# otherwise, it's in a code block. Accumulate each part in a dedicated buffer,
	93	# and on a switch-over between code and text, print the buffer and reset.
	94	$0 !~ COMMENT {
	95	lt = "code"
	96	bufprint("text")
	97	}
	98
	99	$0 ~ COMMENT {
	100	lt = "text"
	101	bufprint("code")
	102	sub(COMMENT, "", $0)
	103	}
	104
	105	{
	106	bufadd(lt)
	107	}
	108
	109	# Of course, at the end there might be something in either buffer, so print that
	110	# out too. I've decided to put text last for the possibility of ending commentary.
	111	END {
	112	bufprint("code")
	113	bufprint("text")
	114	file_print(FOOTER)
	115	}
	116
	117
	118	# Functions
	119	# ---------
	120	#
	121	# bufadd: Add a STR to buffer TYPE. STR defaults to $0, the input record.
	122	function bufadd(type, str)
	123	{
	124	buf[type] = buf[type] (str ? str : $0) "\n"
	125	}
	126
	127	# bufprint: Print a buffer of TYPE. Automatically wrap the code blocks in a
	128	# little HTML code block. I could maybe have a DOCAWK_CODE_PRE/POST and maybe
	129	# even one for text too, to make it more extensible (to other markup languages,
	130	# for example).
	131	function bufprint(type)
	132	{
	133	buf[type] = trim(buf[type])
	134	if (buf[type]) {
	135	if (type == "code") {
	136	printf "<pre><code>"
	137	printf(buf[type]) \| CODEPROC
	138	close(CODEPROC)
	139	print "</code></pre>"
	140	} else if (type == "text") {
	141	print(buf[type]) \| TEXTPROC
	142	close(TEXTPROC)
	143	}
	144	buf[type] = ""
	145	}
	146	}
	147
	148	# file_print: Print FILE line-by-line. The `> 0` check here ensures that it
	149	# bails on error (-1).
	150	function file_print(file)
	151	{
	152	if (file) {
	153	while ((getline l < file) > 0) {
	154	print template_expand(l)
	155	}
	156	close(file)
	157	}
	158	}
	159
	160	# get_title: get the title of the current script, for the expanded document.
	161	# If variables are set, use those; otherwise try to figure out the title from
	162	# the document's basename.
	163	function get_title()
	164	{
	165	title = getenv("DOCAWK_TITLE", TITLE)
	166	if (! title) {
	167	title = FILENAME
	168	sub(/.*\//, "", title)
	169	}
	170	return title
	171	}
	172
	173	# getenv: a convenience function for pulling values out of the environment.
	174	# If an environment variable ENV isn't found, test if VAR is set (i.e., `doc.awk
	175	# -v var=foo`.) and return it if it's set. Otherwise, return the default value
	176	# DEF.
	177	function getenv(env, var, def)
	178	{
	179	if (ENVIRON[env]) {
	180	return ENVIRON[env]
	181	} else if (var) {
	182	return var
	183	} else {
	184	return def
	185	}
	186	}
	187
	188	# template_expand: expand templates of the form `@@template@@` in the text.
	189	# Currently it only does variables, and works by line.
	190	#
	191	# Due to the way awk works, template variables need to live in their own special
	192	# array, `TV`. I'd love it if awk had some kind of `eval` functionality, but at
	193	# least POSIX awk doesn't.
	194	function template_expand(text)
	195	{
	196	if (match(text, /@@[^@]*@@/)) {
	197	var = substr(text, RSTART + 2, RLENGTH - 4)
	198	new = substr(text, 1, RSTART - 1)
	199	new = new TV[var]
	200	new = new substr(text, RSTART + RLENGTH)
	201	} else {
	202	new = text
	203	}
	204	return new
	205	}
	206
	207	# trim: remove whitespace from either end of a string.
	208	function trim(str)
	209	{
	210	sub(/^[ \n]*/, "", str)
	211	sub(/[ \n]*$/, "", str)
	212	return str
	213	}