about summary refs log tree commit diff stats
path: root/trunk/hapax.lua
diff options
context:
space:
mode:
authorCase Duckworth2015-03-26 19:46:45 -0700
committerCase Duckworth2015-03-26 19:46:45 -0700
commitc654151582a77a0b459e3f55b687e43a32d4b67d (patch)
tree6d5d5d2b5f5650b494318e66cef6135b32c0cc72 /trunk/hapax.lua
parentFlatten directory structure (diff)
downloadautocento-c654151582a77a0b459e3f55b687e43a32d4b67d.tar.gz
autocento-c654151582a77a0b459e3f55b687e43a32d4b67d.zip
Add HAPAX LEGOMENA support & flatten structure
The list of hapax legomena for this project is available at /hapax.html.

In addition, the directory structure has been further flattened.
All assets (javascript, lua, images, fonts) are in /trunk/.

One other thing was to update the makefile.
It compiles hapax.txt from rivers.
Diffstat (limited to 'trunk/hapax.lua')
-rw-r--r--trunk/hapax.lua256
1 files changed, 256 insertions, 0 deletions
diff --git a/trunk/hapax.lua b/trunk/hapax.lua new file mode 100644 index 0000000..7e8410c --- /dev/null +++ b/trunk/hapax.lua
@@ -0,0 +1,256 @@
1-- Pandoc River writer
2-- it takes out all formatting, leaving only a river of text
3-- running down the page: one word per line
4-- vim: fdm=marker
5-- invoke with: pandoc -t river.lua
6
7os.setlocale("en_US.UTF-8")
8
9function hapax(s)
10 local function tablify (s, p)
11 local t={}
12 for w in s:gmatch(p) do
13 table.insert(t, w)
14 end
15 return t
16 end
17 local function stripDupes (t)
18 local seen = {}
19 local remove = {}
20 for i = 1, #t do
21 element = t[i]
22 if seen[element] then
23 remove[element] = true
24 else
25 seen[element] = true
26 end
27 end
28 for i = #t, 1, -1 do
29 if remove[t[i]] then
30 table.remove(t, i)
31 end
32 end
33 return t
34 end
35 return table.concat(stripDupes(tablify(s, "%S+")), "\n")
36end
37
38function flow(s)
39 return s:gsub("%s+", "\n")
40end
41
42function nude(s)
43 -- Expand contractions
44 s = s:gsub("'%a+%s", function (x)
45 if x == "'ll" then
46 return " will "
47 elseif x == "'ve" then
48 return " have "
49 elseif x == "'re" then
50 return " are "
51 else
52 return x
53 end
54 end)
55 s = s:gsub("it's", "it is")
56 s = s:gsub("n't", " not ")
57 -- Get rid of quotes around words
58 s = s:gsub('"', ' ')
59 s = s:gsub("%s'", ' ')
60 s = s:gsub("'%s", ' ')
61 -- Remove HTML entities
62 s = s:gsub('&.-;', ' ')
63 s = s:gsub('%b<>', ' ')
64 -- Remove end-of-line backslashes
65 s = s:gsub('\\$', ' ')
66 -- Remove dashes (not hyphens)
67 s = s:gsub('%-[%-%s]+', ' ')
68 -- Remove everything that is not letters or numbers
69 s = s:gsub('[%.!%?:;,%[%]%(%)<>]', ' ')
70 -- Remove extra spaces
71 s = s:gsub('%s+', ' ')
72 return s:lower()
73end
74
75-- This function is called once for the whole document. Parameters:
76-- body is a string, metadata is a table, variables is a table.
77-- One could use some kind of templating
78-- system here; this just gives you a simple standalone HTML file.
79function Doc(body, metadata, variables)
80 local buffer = ""
81 local function add(s)
82 buffer = buffer .. nude(s) .. "\n"
83 end
84 if metadata['title'] then
85 add(metadata['title'])
86 end
87 if metadata['subtitle'] then
88 add(metadata['subtitle'])
89 end
90 add(body)
91 return hapax(flow(buffer))
92end
93
94-- Remove all formatting {{{
95function Note(s)
96 return nude(s)
97end
98
99function Blocksep()
100 return "\n"
101end
102function Emph(s)
103 return nude(s)
104end
105
106function Strong(s)
107 return nude(s)
108end
109
110function Subscript(s)
111 return nude(s)
112end
113
114function Superscript(s)
115 return nude(s)
116end
117
118function SmallCaps(s)
119 return nude(s)
120end
121
122function Strikeout(s)
123 return nude(s)
124end
125
126function Code(s, attr)
127 return nude(s)
128end
129
130function CodeBlock(s, attr)
131 return nude(s)
132end
133
134function InlineMath(s)
135 return nude(s)
136end
137
138function DisplayMath(s)
139 return nude(s)
140end
141
142function Span(s, attr)
143 return nude(s)
144end
145
146function Cite(s)
147 return nude(s)
148end
149
150function Plain(s)
151 return nude(s)
152end
153
154-- Links only include the link text
155function Link(s, src, tit)
156 return nude(s)
157end
158
159-- Images have nothing to give us
160-- (but add a space just in case)
161function Image(s, src, tit)
162 return "\n"
163end
164
165function CaptionedImage(s, src, tit)
166 return "\n"
167end
168
169function Str(s)
170 return nude(s)
171end
172
173function Div(s, attr)
174 return nude(s)
175end
176
177function Space(s)
178 return "\n"
179end
180
181function LineBreak()
182 return "\n"
183end
184
185function Para(s)
186 return nude(s)
187end
188
189function Header(lev, s, attr)
190 return nude(s)
191end
192
193function BlockQuote(s)
194 return nude(s)
195end
196
197function HorizontalRule()
198 return "\n"
199end
200
201function BulletList(items)
202 local buffer = ""
203 for _, item in pairs(items) do
204 buffer = buffer .. nude(item) .. "\n"
205 end
206 return buffer .. "\n"
207end
208
209function OrderedList(items)
210 local buffer = ""
211 for _, item in pairs(items) do
212 buffer = buffer .. nude(item) .. "\n"
213 end
214 return buffer .. "\n"
215end
216
217function DefinitionList(items)
218 local buffer = ""
219 for _, item in pairs(items) do
220 for k, v in pairs(item) do
221 buffer = buffer .. nude(k) .. "\n" .. nude(v) .. "\n"
222 end
223 end
224 return buffer .. "\n"
225end
226
227function Table(caption, aligns, widths, headers, rows)
228 local buffer = ""
229 local function add(s)
230 buffer = buffer .. nude(s) .. "\n"
231 end
232 if caption ~= "" then
233 add(caption)
234 end
235 for _,h in pairs(headers) do
236 add(h)
237 end
238 for _, row in pairs(rows) do
239 for _, cell in pairs(row) do
240 add(cell)
241 end
242 end
243 return buffer
244end
245-- }}}
246
247-- The following code will produce runtime warnings when you haven't defined
248-- all of the functions you need for the custom writer, so it's useful
249-- to include when you're working on a writer.
250local meta = {}
251meta.__index =
252 function(_, key)
253 io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key))
254 return function() return "" end
255 end
256setmetatable(_G, meta)