#!/bin/awk -f # DOC AWK # ====== # # A quick-and-dirty literate-programming-style documentation generator # inspired by [docco][]. # # by Case Duckworth # # Source available under the [Good Choices License][gcl]. # # [gcl]: https://acdw.casa/gcl Good Choices License # # There's a lot of quick-and-dirty "literate programming tools" out there, many # of which were inspired by, and also borrowed from, docco. I was particularly # interested in [shocco][], written in POSIX shell (of which I am a fan). # # Notably missing, however, was a converter of some kind written in AWK. Thus, # DOC AWK was born. # # This page is the result of DOC AWK working on itself. Not bad for < 250 lines # including commentary! You can pick up the raw source code of doc.awk [in its # git repository][git] to use it yourself. # # [docco]: https://ashkenas.com/docco/ # [shocco]: https://rtomayko.github.io/shocco/ # [git]: https://git.acdw.net/docawk # # Code # ---- BEGIN { # All the best awk scripts start with a `BEGIN` block. In this one, we # set a few variables from the environment, with defaults. I use the # convenience function `getenv`, further down this script, to make it # easier. # # First, the comment regex. This regex detects a comment *line*, not an # inline comment. By default, it's set up for awk, shell, and other # languages that use `#` as a comment delimiter, but you can make it # whatever you want. COMMENT = getenv("DOCAWK_COMMENT", COMMENT, "^[ \t]*#+[ \t]*") # You can set `DOCAWK_TEXTPROC` to any text processor you want, but the # default is the vendored `mdown.awk` script in this repo. It's from # [d.awk](https://github.com/wernsey/d.awk). TEXTPROC = getenv("DOCAWK_TEXTPROC", TEXTPROC, "./mdown.awk") # You can also set the processor for code sections of the source file; # the included `htmlsafe.awk` simply escapes <, &, and >. CODEPROC = getenv("DOCAWK_CODEPROC", CODEPROC, "./htmlsafe.awk") # Usually, a file header and footer are enough for most documents. The # defaults here are the included header.html and footer.html, since the # default output type is html. # # Each of these documents are actually *templates*, with keys that can # expand to variables inside of `@@VARIABLE@@`. This is mostly # for title expansion. HEADER = getenv("DOCAWK_HEADER", HEADER, "./header.html") FOOTER = getenv("DOCAWK_FOOTER", FOOTER, "./footer.html") } # Because `FILENAME` is unset during `BEGIN`, template expansion that attempts # to view the filename doesn't work. Thus, I need a state variable to track # whether we've started or not (so that I don't print a header with every new # file). ! begun { # The template array is initialized with the document's title. TV["TITLE"] = get_title() # Print the header here, since if multiple files are passed to DOC AWK # they'll all be concatenated anyway. file_print(HEADER) } # `doc.awk` is multi-file aware. It also removes the shebang line from the # script if it exists, because you probably don't want that in the output. # # It wouldn't be a *bad* idea to make a heuristic for determining the type of # source file we're converting here. FNR == 1 { begun = 1 if ($0 ~ COMMENT) { lt = "text" } else { lt = "code" } if ($0 !~ /^#!/) { bufadd(lt) } next } # The main logic is quite simple: if a given line is a comment as defined by # `DOCAWK_COMMENT`, it's in a text block and should be treated as such; # otherwise, it's in a code block. Accumulate each part in a dedicated buffer, # and on a switch-over between code and text, print the buffer and reset. $0 !~ COMMENT { lt = "code" bufprint("text") } $0 ~ COMMENT { lt = "text" bufprint("code") sub(COMMENT, "", $0) } { bufadd(lt) } # Of course, at the end there might be something in either buffer, so print that # out too. I've decided to put text last for the possibility of ending commentary. END { bufprint("code") bufprint("text") file_print(FOOTER) } # Functions # --------- # # *bufadd*: Add a STR to buffer TYPE. STR defaults to $0, the input record. function bufadd(type, str) { buf[type] = buf[type] (str ? str : $0) "\n" } # *bufprint*: Print a buffer of TYPE. Automatically wrap the code blocks in a # little HTML code block. I could maybe have a DOCAWK_CODE_PRE/POST and maybe # even one for text too, to make it more extensible (to other markup languages, # for example). function bufprint(type) { buf[type] = trim(buf[type]) if (buf[type]) { if (type == "code") { printf "
"
			printf(buf[type]) | CODEPROC
			close(CODEPROC)
			print "
" } else if (type == "text") { print(buf[type]) | TEXTPROC close(TEXTPROC) } buf[type] = "" } } # *file_print*: Print FILE line-by-line. The `> 0` check here ensures that it # bails on error (-1). function file_print(file) { if (file) { while ((getline l < file) > 0) { print template_expand(l) } close(file) } } # *get_title*: get the title of the current script, for the expanded document. # If variables are set, use those; otherwise try to figure out the title from # the document's basename. function get_title() { title = getenv("DOCAWK_TITLE", TITLE) if (! title) { title = FILENAME sub(/.*\//, "", title) } return title } # *getenv*: a convenience function for pulling values out of the environment. # If an environment variable ENV isn't found, test if VAR is set (i.e., `doc.awk # -v var=foo`.) and return it if it's set. Otherwise, return the default value # DEF. function getenv(env, var, def) { if (ENVIRON[env]) { return ENVIRON[env] } else if (var) { return var } else { return def } } # *template_expand*: expand templates of the form `@@template@@` in the text. # Currently it only does variables, and works by line. # # Due to the way awk works, template variables need to live in their own special # array, `TV`. I'd love it if awk had some kind of `eval` functionality, but at # least POSIX awk doesn't. function template_expand(text) { if (match(text, /@@[^@]*@@/)) { var = substr(text, RSTART + 2, RLENGTH - 4) new = substr(text, 1, RSTART - 1) new = new TV[var] new = new substr(text, RSTART + RLENGTH) } else { new = text } return new } # *trim*: remove whitespace from either end of a string. function trim(str) { sub(/^[ \n]*/, "", str) sub(/[ \n]*$/, "", str) return str }