about summary refs log tree commit diff stats
path: root/doc.awk
diff options
context:
space:
mode:
Diffstat (limited to 'doc.awk')
-rwxr-xr-xdoc.awk213
1 files changed, 213 insertions, 0 deletions
diff --git a/doc.awk b/doc.awk new file mode 100755 index 0000000..4735f9d --- /dev/null +++ b/doc.awk
@@ -0,0 +1,213 @@
1#!/bin/awk -f
2# DOC AWK
3# ======
4#
5# A quick-and-dirty literate-programming-style documentation generator
6# inspired by [docco][].
7#
8# by Case Duckworth <acdw@acdw.net>
9#
10# Source available under the [Good Choices License][gcl].
11#
12# [gcl]: https://acdw.casa/gcl Good Choices License
13#
14# There's a lot of quick-and-dirty "literate programming tools" out there, many
15# of which were inspired by, and also borrowed from, docco. I was particularly
16# interested in [shocco][], written in POSIX shell (of which I am a fan).
17#
18# Notably missing, however, was a converter of some kind written in AWK. Thus,
19# DOC AWK was born.
20#
21# This page is the result of DOC AWK working on itself. Not bad for < 250 lines
22# including commentary! You can pick up the raw source code of doc.awk [in its
23# git repository][git] to use it yourself.
24#
25# [docco]: https://ashkenas.com/docco/
26# [shocco]: https://rtomayko.github.io/shocco/
27# [git]: https://git.acdw.net/docawk
28#
29# Code
30# ----
31BEGIN {
32 # All the best awk scripts start with a `BEGIN` block. In this one, we
33 # set a few variables from the environment, with defaults. I use the
34 # convenience function `getenv`, further down this script, to make it
35 # easier.
36 #
37 # First, the comment regex. This regex detects a comment *line*, not an
38 # inline comment. By default, it's set up for awk, shell, and other
39 # languages that use `#` as a comment delimiter, but you can make it
40 # whatever you want.
41 COMMENT = getenv("DOCAWK_COMMENT", COMMENT, "^[ \t]*#+[ \t]*")
42 # You can set `DOCAWK_TEXTPROC` to any text processor you want, but the
43 # default is the vendored `mdown.awk` script in this repo. It's from
44 # [d.awk](https://github.com/wernsey/d.awk).
45 TEXTPROC = getenv("DOCAWK_TEXTPROC", TEXTPROC, "./mdown.awk")
46 # You can also set the processor for code sections of the source file;
47 # the included `htmlsafe.awk` simply escapes <, &, and >.
48 CODEPROC = getenv("DOCAWK_CODEPROC", CODEPROC, "./htmlsafe.awk")
49 # Usually, a file header and footer are enough for most documents. The
50 # defaults here are the included header.html and footer.html, since the
51 # default output type is html.
52 #
53 # Each of these documents are actually *templates*, with keys that can
54 # expand to variables inside of `@@VARIABLE@@`. This is mostly
55 # for title expansion.
56 HEADER = getenv("DOCAWK_HEADER", HEADER, "./header.html")
57 FOOTER = getenv("DOCAWK_FOOTER", FOOTER, "./footer.html")
58}
59
60# Because `FILENAME` is unset during `BEGIN`, template expansion that attempts
61# to view the filename doesn't work. Thus, I need a state variable to track
62# whether we've started or not (so that I don't print a header with every new
63# file).
64! begun {
65 # The template array is initialized with the document's title.
66 TV["TITLE"] = get_title()
67 # Print the header here, since if multiple files are passed to DOC AWK
68 # they'll all be concatenated anyway.
69 file_print(HEADER)
70}
71
72# `doc.awk` is multi-file aware. It also removes the shebang line from the
73# script if it exists, because you probably don't want that in the output.
74#
75# It wouldn't be a *bad* idea to make a heuristic for determining the type of
76# source file we're converting here.
77FNR == 1 {
78 begun = 1
79 if ($0 ~ COMMENT) {
80 lt = "text"
81 } else {
82 lt = "code"
83 }
84 if ($0 !~ /^#!/) {
85 bufadd(lt)
86 }
87 next
88}
89
90# The main logic is quite simple: if a given line is a comment as defined by
91# `DOCAWK_COMMENT`, it's in a text block and should be treated as such;
92# otherwise, it's in a code block. Accumulate each part in a dedicated buffer,
93# and on a switch-over between code and text, print the buffer and reset.
94$0 !~ COMMENT {
95 lt = "code"
96 bufprint("text")
97}
98
99$0 ~ COMMENT {
100 lt = "text"
101 bufprint("code")
102 sub(COMMENT, "", $0)
103}
104
105{
106 bufadd(lt)
107}
108
109# Of course, at the end there might be something in either buffer, so print that
110# out too. I've decided to put text last for the possibility of ending commentary.
111END {
112 bufprint("code")
113 bufprint("text")
114 file_print(FOOTER)
115}
116
117
118# Functions
119# ---------
120#
121# *bufadd*: Add a STR to buffer TYPE. STR defaults to $0, the input record.
122function bufadd(type, str)
123{
124 buf[type] = buf[type] (str ? str : $0) "\n"
125}
126
127# *bufprint*: Print a buffer of TYPE. Automatically wrap the code blocks in a
128# little HTML code block. I could maybe have a DOCAWK_CODE_PRE/POST and maybe
129# even one for text too, to make it more extensible (to other markup languages,
130# for example).
131function bufprint(type)
132{
133 buf[type] = trim(buf[type])
134 if (buf[type]) {
135 if (type == "code") {
136 printf "<pre><code>"
137 printf(buf[type]) | CODEPROC
138 close(CODEPROC)
139 print "</code></pre>"
140 } else if (type == "text") {
141 print(buf[type]) | TEXTPROC
142 close(TEXTPROC)
143 }
144 buf[type] = ""
145 }
146}
147
148# *file_print*: Print FILE line-by-line. The `> 0` check here ensures that it
149# bails on error (-1).
150function file_print(file)
151{
152 if (file) {
153 while ((getline l < file) > 0) {
154 print template_expand(l)
155 }
156 close(file)
157 }
158}
159
160# *get_title*: get the title of the current script, for the expanded document.
161# If variables are set, use those; otherwise try to figure out the title from
162# the document's basename.
163function get_title()
164{
165 title = getenv("DOCAWK_TITLE", TITLE)
166 if (! title) {
167 title = FILENAME
168 sub(/.*\//, "", title)
169 }
170 return title
171}
172
173# *getenv*: a convenience function for pulling values out of the environment.
174# If an environment variable ENV isn't found, test if VAR is set (i.e., `doc.awk
175# -v var=foo`.) and return it if it's set. Otherwise, return the default value
176# DEF.
177function getenv(env, var, def)
178{
179 if (ENVIRON[env]) {
180 return ENVIRON[env]
181 } else if (var) {
182 return var
183 } else {
184 return def
185 }
186}
187
188# *template_expand*: expand templates of the form `@@template@@` in the text.
189# Currently it only does variables, and works by line.
190#
191# Due to the way awk works, template variables need to live in their own special
192# array, `TV`. I'd love it if awk had some kind of `eval` functionality, but at
193# least POSIX awk doesn't.
194function template_expand(text)
195{
196 if (match(text, /@@[^@]*@@/)) {
197 var = substr(text, RSTART + 2, RLENGTH - 4)
198 new = substr(text, 1, RSTART - 1)
199 new = new TV[var]
200 new = new substr(text, RSTART + RLENGTH)
201 } else {
202 new = text
203 }
204 return new
205}
206
207# *trim*: remove whitespace from either end of a string.
208function trim(str)
209{
210 sub(/^[ \n]*/, "", str)
211 sub(/[ \n]*$/, "", str)
212 return str
213}