diff options
Diffstat (limited to 'doc.awk')
-rwxr-xr-x | doc.awk | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/doc.awk b/doc.awk new file mode 100755 index 0000000..4735f9d --- /dev/null +++ b/doc.awk | |||
@@ -0,0 +1,213 @@ | |||
1 | #!/bin/awk -f | ||
2 | # DOC AWK | ||
3 | # ====== | ||
4 | # | ||
5 | # A quick-and-dirty literate-programming-style documentation generator | ||
6 | # inspired by [docco][]. | ||
7 | # | ||
8 | # by Case Duckworth <acdw@acdw.net> | ||
9 | # | ||
10 | # Source available under the [Good Choices License][gcl]. | ||
11 | # | ||
12 | # [gcl]: https://acdw.casa/gcl Good Choices License | ||
13 | # | ||
14 | # There's a lot of quick-and-dirty "literate programming tools" out there, many | ||
15 | # of which were inspired by, and also borrowed from, docco. I was particularly | ||
16 | # interested in [shocco][], written in POSIX shell (of which I am a fan). | ||
17 | # | ||
18 | # Notably missing, however, was a converter of some kind written in AWK. Thus, | ||
19 | # DOC AWK was born. | ||
20 | # | ||
21 | # This page is the result of DOC AWK working on itself. Not bad for < 250 lines | ||
22 | # including commentary! You can pick up the raw source code of doc.awk [in its | ||
23 | # git repository][git] to use it yourself. | ||
24 | # | ||
25 | # [docco]: https://ashkenas.com/docco/ | ||
26 | # [shocco]: https://rtomayko.github.io/shocco/ | ||
27 | # [git]: https://git.acdw.net/docawk | ||
28 | # | ||
29 | # Code | ||
30 | # ---- | ||
31 | BEGIN { | ||
32 | # All the best awk scripts start with a `BEGIN` block. In this one, we | ||
33 | # set a few variables from the environment, with defaults. I use the | ||
34 | # convenience function `getenv`, further down this script, to make it | ||
35 | # easier. | ||
36 | # | ||
37 | # First, the comment regex. This regex detects a comment *line*, not an | ||
38 | # inline comment. By default, it's set up for awk, shell, and other | ||
39 | # languages that use `#` as a comment delimiter, but you can make it | ||
40 | # whatever you want. | ||
41 | COMMENT = getenv("DOCAWK_COMMENT", COMMENT, "^[ \t]*#+[ \t]*") | ||
42 | # You can set `DOCAWK_TEXTPROC` to any text processor you want, but the | ||
43 | # default is the vendored `mdown.awk` script in this repo. It's from | ||
44 | # [d.awk](https://github.com/wernsey/d.awk). | ||
45 | TEXTPROC = getenv("DOCAWK_TEXTPROC", TEXTPROC, "./mdown.awk") | ||
46 | # You can also set the processor for code sections of the source file; | ||
47 | # the included `htmlsafe.awk` simply escapes <, &, and >. | ||
48 | CODEPROC = getenv("DOCAWK_CODEPROC", CODEPROC, "./htmlsafe.awk") | ||
49 | # Usually, a file header and footer are enough for most documents. The | ||
50 | # defaults here are the included header.html and footer.html, since the | ||
51 | # default output type is html. | ||
52 | # | ||
53 | # Each of these documents are actually *templates*, with keys that can | ||
54 | # expand to variables inside of `@@VARIABLE@@`. This is mostly | ||
55 | # for title expansion. | ||
56 | HEADER = getenv("DOCAWK_HEADER", HEADER, "./header.html") | ||
57 | FOOTER = getenv("DOCAWK_FOOTER", FOOTER, "./footer.html") | ||
58 | } | ||
59 | |||
60 | # Because `FILENAME` is unset during `BEGIN`, template expansion that attempts | ||
61 | # to view the filename doesn't work. Thus, I need a state variable to track | ||
62 | # whether we've started or not (so that I don't print a header with every new | ||
63 | # file). | ||
64 | ! begun { | ||
65 | # The template array is initialized with the document's title. | ||
66 | TV["TITLE"] = get_title() | ||
67 | # Print the header here, since if multiple files are passed to DOC AWK | ||
68 | # they'll all be concatenated anyway. | ||
69 | file_print(HEADER) | ||
70 | } | ||
71 | |||
72 | # `doc.awk` is multi-file aware. It also removes the shebang line from the | ||
73 | # script if it exists, because you probably don't want that in the output. | ||
74 | # | ||
75 | # It wouldn't be a *bad* idea to make a heuristic for determining the type of | ||
76 | # source file we're converting here. | ||
77 | FNR == 1 { | ||
78 | begun = 1 | ||
79 | if ($0 ~ COMMENT) { | ||
80 | lt = "text" | ||
81 | } else { | ||
82 | lt = "code" | ||
83 | } | ||
84 | if ($0 !~ /^#!/) { | ||
85 | bufadd(lt) | ||
86 | } | ||
87 | next | ||
88 | } | ||
89 | |||
90 | # The main logic is quite simple: if a given line is a comment as defined by | ||
91 | # `DOCAWK_COMMENT`, it's in a text block and should be treated as such; | ||
92 | # otherwise, it's in a code block. Accumulate each part in a dedicated buffer, | ||
93 | # and on a switch-over between code and text, print the buffer and reset. | ||
94 | $0 !~ COMMENT { | ||
95 | lt = "code" | ||
96 | bufprint("text") | ||
97 | } | ||
98 | |||
99 | $0 ~ COMMENT { | ||
100 | lt = "text" | ||
101 | bufprint("code") | ||
102 | sub(COMMENT, "", $0) | ||
103 | } | ||
104 | |||
105 | { | ||
106 | bufadd(lt) | ||
107 | } | ||
108 | |||
109 | # Of course, at the end there might be something in either buffer, so print that | ||
110 | # out too. I've decided to put text last for the possibility of ending commentary. | ||
111 | END { | ||
112 | bufprint("code") | ||
113 | bufprint("text") | ||
114 | file_print(FOOTER) | ||
115 | } | ||
116 | |||
117 | |||
118 | # Functions | ||
119 | # --------- | ||
120 | # | ||
121 | # *bufadd*: Add a STR to buffer TYPE. STR defaults to $0, the input record. | ||
122 | function bufadd(type, str) | ||
123 | { | ||
124 | buf[type] = buf[type] (str ? str : $0) "\n" | ||
125 | } | ||
126 | |||
127 | # *bufprint*: Print a buffer of TYPE. Automatically wrap the code blocks in a | ||
128 | # little HTML code block. I could maybe have a DOCAWK_CODE_PRE/POST and maybe | ||
129 | # even one for text too, to make it more extensible (to other markup languages, | ||
130 | # for example). | ||
131 | function bufprint(type) | ||
132 | { | ||
133 | buf[type] = trim(buf[type]) | ||
134 | if (buf[type]) { | ||
135 | if (type == "code") { | ||
136 | printf "<pre><code>" | ||
137 | printf(buf[type]) | CODEPROC | ||
138 | close(CODEPROC) | ||
139 | print "</code></pre>" | ||
140 | } else if (type == "text") { | ||
141 | print(buf[type]) | TEXTPROC | ||
142 | close(TEXTPROC) | ||
143 | } | ||
144 | buf[type] = "" | ||
145 | } | ||
146 | } | ||
147 | |||
148 | # *file_print*: Print FILE line-by-line. The `> 0` check here ensures that it | ||
149 | # bails on error (-1). | ||
150 | function file_print(file) | ||
151 | { | ||
152 | if (file) { | ||
153 | while ((getline l < file) > 0) { | ||
154 | print template_expand(l) | ||
155 | } | ||
156 | close(file) | ||
157 | } | ||
158 | } | ||
159 | |||
160 | # *get_title*: get the title of the current script, for the expanded document. | ||
161 | # If variables are set, use those; otherwise try to figure out the title from | ||
162 | # the document's basename. | ||
163 | function get_title() | ||
164 | { | ||
165 | title = getenv("DOCAWK_TITLE", TITLE) | ||
166 | if (! title) { | ||
167 | title = FILENAME | ||
168 | sub(/.*\//, "", title) | ||
169 | } | ||
170 | return title | ||
171 | } | ||
172 | |||
173 | # *getenv*: a convenience function for pulling values out of the environment. | ||
174 | # If an environment variable ENV isn't found, test if VAR is set (i.e., `doc.awk | ||
175 | # -v var=foo`.) and return it if it's set. Otherwise, return the default value | ||
176 | # DEF. | ||
177 | function getenv(env, var, def) | ||
178 | { | ||
179 | if (ENVIRON[env]) { | ||
180 | return ENVIRON[env] | ||
181 | } else if (var) { | ||
182 | return var | ||
183 | } else { | ||
184 | return def | ||
185 | } | ||
186 | } | ||
187 | |||
188 | # *template_expand*: expand templates of the form `@@template@@` in the text. | ||
189 | # Currently it only does variables, and works by line. | ||
190 | # | ||
191 | # Due to the way awk works, template variables need to live in their own special | ||
192 | # array, `TV`. I'd love it if awk had some kind of `eval` functionality, but at | ||
193 | # least POSIX awk doesn't. | ||
194 | function template_expand(text) | ||
195 | { | ||
196 | if (match(text, /@@[^@]*@@/)) { | ||
197 | var = substr(text, RSTART + 2, RLENGTH - 4) | ||
198 | new = substr(text, 1, RSTART - 1) | ||
199 | new = new TV[var] | ||
200 | new = new substr(text, RSTART + RLENGTH) | ||
201 | } else { | ||
202 | new = text | ||
203 | } | ||
204 | return new | ||
205 | } | ||
206 | |||
207 | # *trim*: remove whitespace from either end of a string. | ||
208 | function trim(str) | ||
209 | { | ||
210 | sub(/^[ \n]*/, "", str) | ||
211 | sub(/[ \n]*$/, "", str) | ||
212 | return str | ||
213 | } | ||