about summary refs log tree commit diff stats
path: root/scripts/hapax.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/hapax.lua')
-rw-r--r--scripts/hapax.lua252
1 files changed, 252 insertions, 0 deletions
diff --git a/scripts/hapax.lua b/scripts/hapax.lua new file mode 100644 index 0000000..1cabbaa --- /dev/null +++ b/scripts/hapax.lua
@@ -0,0 +1,252 @@
1-- Pandoc Hapax writer
2-- it takes out all formatting, leaving only a river of text
3-- running down the page: one word per line, stripping all duplicates
4-- vim: fdm=marker
5-- invoke with: pandoc -t hapax.lua
6
7os.setlocale("en_US.UTF-8")
8
9function hapax(s)
10 local function tablify (s, p)
11 local t={}
12 for w in s:gmatch(p) do
13 table.insert(t, w)
14 end
15 return t
16 end
17 local function stripDupes (t)
18 local seen = {}
19 local remove = {}
20 for i = 1, #t do
21 element = t[i]
22 if seen[element] then
23 remove[element] = true
24 else
25 seen[element] = true
26 end
27 end
28 for i = #t, 1, -1 do
29 if remove[t[i]] then
30 table.remove(t, i)
31 end
32 end
33 return t
34 end
35 return table.concat(stripDupes(tablify(s, "%S+")), "\n")
36end
37
38function flow(s)
39 return s:gsub("%s+", "\n")
40end
41
42function nude(s)
43 s = s:lower()
44 -- Expand contractions
45 s = s:gsub("'ll", " will ")
46 s = s:gsub("'ve", " have ")
47 s = s:gsub("'re", " are ")
48 s = s:gsub("i'm", " i am ")
49 s = s:gsub("it's", "it is")
50 s = s:gsub("n't", " not ")
51 s = s:gsub("'d", " would ") -- can be "had", but still no hapax
52 s = s:gsub("&", " and ")
53 -- -- Remove dashes (not hyphens)
54 s = s:gsub('%-[%-%s]+', ' ')
55 -- Remove everything that is not letters or numbers
56 s = s:gsub('[^A-Za-z0-9/\'-]', ' ')
57 -- Remove extra spaces
58 s = s:gsub('%s+', ' ')
59 return " "..s.." "
60end
61
62-- This function is called once for the whole document. Parameters:
63-- body is a string, metadata is a table, variables is a table.
64-- One could use some kind of templating
65-- system here; this just gives you a simple standalone HTML file.
66function Doc(body, metadata, variables)
67 local buffer = ""
68 local function add(s)
69 buffer = buffer .. nude(s) .. "\n"
70 end
71 if metadata['title'] then
72 add(metadata['title'])
73 end
74 if metadata['subtitle'] then
75 add(metadata['subtitle'])
76 end
77 add(body)
78 return hapax(flow(buffer))
79 -- return flow(buffer)
80end
81
82-- Remove all formatting {{{
83function Note(s)
84 return s
85end
86
87function Blocksep()
88 return "\n"
89end
90function Emph(s)
91 return s
92end
93
94function Strong(s)
95 return s
96end
97
98function Subscript(s)
99 return s
100end
101
102function Superscript(s)
103 return s
104end
105
106function SmallCaps(s)
107 return s
108end
109
110function Strikeout(s)
111 return s
112end
113
114function Code(s, attr)
115 return s
116end
117
118function CodeBlock(s, attr)
119 return s
120end
121
122function InlineMath(s)
123 return s
124end
125
126function DisplayMath(s)
127 return s
128end
129
130function Span(s, attr)
131 return s
132end
133
134function Cite(s)
135 return s
136end
137
138function Plain(s)
139 return s
140end
141
142-- Links only include the link text
143function Link(s, src, tit)
144 return s
145end
146
147-- Images have nothing to give us
148-- (but add a space just in case)
149function Image(s, src, tit)
150 return "\n"
151end
152
153function RawBlock(s)
154 return s
155end
156
157function RawInline(s)
158 return s
159end
160
161function CaptionedImage(s, src, tit)
162 return "\n"
163end
164
165function Str(s)
166 return s
167end
168
169function Div(s, attr)
170 return s
171end
172
173function Space(s)
174 return "\n"
175end
176
177function LineBreak()
178 return "\n"
179end
180
181function Para(s)
182 return s
183end
184
185function Header(lev, s, attr)
186 return s
187end
188
189function BlockQuote(s)
190 return s
191end
192
193function HorizontalRule()
194 return "\n"
195end
196
197function BulletList(items)
198 local buffer = ""
199 for _, item in pairs(items) do
200 buffer = buffer .. " " .. item .. "\n"
201 end
202 return buffer .. "\n"
203end
204
205function OrderedList(items)
206 local buffer = ""
207 for _, item in pairs(items) do
208 buffer = buffer .. " " .. item .. "\n"
209 end
210 return buffer .. "\n"
211end
212
213function DefinitionList(items)
214 local buffer = ""
215 for _, item in pairs(items) do
216 for k, v in pairs(item) do
217 buffer = buffer .. " " .. k .. "\n" .. v .. "\n"
218 end
219 end
220 return buffer .. "\n"
221end
222
223function Table(caption, aligns, widths, headers, rows)
224 local buffer = ""
225 local function add(s)
226 buffer = buffer .. " " .. s .. "\n"
227 end
228 if caption ~= "" then
229 add(caption)
230 end
231 for _,h in pairs(headers) do
232 add(h)
233 end
234 for _, row in pairs(rows) do
235 for _, cell in pairs(row) do
236 add(cell)
237 end
238 end
239 return buffer
240end
241-- }}}
242
243-- The following code will produce runtime warnings when you haven't defined
244-- all of the functions you need for the custom writer, so it's useful
245-- to include when you're working on a writer.
246local meta = {}
247meta.__index =
248 function(_, key)
249 io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key))
250 return function() return "" end
251 end
252setmetatable(_G, meta)