diff options
Diffstat (limited to 'trunk/hapax.lua')
-rw-r--r-- | trunk/hapax.lua | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/trunk/hapax.lua b/trunk/hapax.lua new file mode 100644 index 0000000..7e8410c --- /dev/null +++ b/trunk/hapax.lua | |||
@@ -0,0 +1,256 @@ | |||
1 | -- Pandoc River writer | ||
2 | -- it takes out all formatting, leaving only a river of text | ||
3 | -- running down the page: one word per line | ||
4 | -- vim: fdm=marker | ||
5 | -- invoke with: pandoc -t river.lua | ||
6 | |||
7 | os.setlocale("en_US.UTF-8") | ||
8 | |||
9 | function hapax(s) | ||
10 | local function tablify (s, p) | ||
11 | local t={} | ||
12 | for w in s:gmatch(p) do | ||
13 | table.insert(t, w) | ||
14 | end | ||
15 | return t | ||
16 | end | ||
17 | local function stripDupes (t) | ||
18 | local seen = {} | ||
19 | local remove = {} | ||
20 | for i = 1, #t do | ||
21 | element = t[i] | ||
22 | if seen[element] then | ||
23 | remove[element] = true | ||
24 | else | ||
25 | seen[element] = true | ||
26 | end | ||
27 | end | ||
28 | for i = #t, 1, -1 do | ||
29 | if remove[t[i]] then | ||
30 | table.remove(t, i) | ||
31 | end | ||
32 | end | ||
33 | return t | ||
34 | end | ||
35 | return table.concat(stripDupes(tablify(s, "%S+")), "\n") | ||
36 | end | ||
37 | |||
38 | function flow(s) | ||
39 | return s:gsub("%s+", "\n") | ||
40 | end | ||
41 | |||
42 | function nude(s) | ||
43 | -- Expand contractions | ||
44 | s = s:gsub("'%a+%s", function (x) | ||
45 | if x == "'ll" then | ||
46 | return " will " | ||
47 | elseif x == "'ve" then | ||
48 | return " have " | ||
49 | elseif x == "'re" then | ||
50 | return " are " | ||
51 | else | ||
52 | return x | ||
53 | end | ||
54 | end) | ||
55 | s = s:gsub("it's", "it is") | ||
56 | s = s:gsub("n't", " not ") | ||
57 | -- Get rid of quotes around words | ||
58 | s = s:gsub('"', ' ') | ||
59 | s = s:gsub("%s'", ' ') | ||
60 | s = s:gsub("'%s", ' ') | ||
61 | -- Remove HTML entities | ||
62 | s = s:gsub('&.-;', ' ') | ||
63 | s = s:gsub('%b<>', ' ') | ||
64 | -- Remove end-of-line backslashes | ||
65 | s = s:gsub('\\$', ' ') | ||
66 | -- Remove dashes (not hyphens) | ||
67 | s = s:gsub('%-[%-%s]+', ' ') | ||
68 | -- Remove everything that is not letters or numbers | ||
69 | s = s:gsub('[%.!%?:;,%[%]%(%)<>]', ' ') | ||
70 | -- Remove extra spaces | ||
71 | s = s:gsub('%s+', ' ') | ||
72 | return s:lower() | ||
73 | end | ||
74 | |||
75 | -- This function is called once for the whole document. Parameters: | ||
76 | -- body is a string, metadata is a table, variables is a table. | ||
77 | -- One could use some kind of templating | ||
78 | -- system here; this just gives you a simple standalone HTML file. | ||
79 | function Doc(body, metadata, variables) | ||
80 | local buffer = "" | ||
81 | local function add(s) | ||
82 | buffer = buffer .. nude(s) .. "\n" | ||
83 | end | ||
84 | if metadata['title'] then | ||
85 | add(metadata['title']) | ||
86 | end | ||
87 | if metadata['subtitle'] then | ||
88 | add(metadata['subtitle']) | ||
89 | end | ||
90 | add(body) | ||
91 | return hapax(flow(buffer)) | ||
92 | end | ||
93 | |||
94 | -- Remove all formatting {{{ | ||
95 | function Note(s) | ||
96 | return nude(s) | ||
97 | end | ||
98 | |||
99 | function Blocksep() | ||
100 | return "\n" | ||
101 | end | ||
102 | function Emph(s) | ||
103 | return nude(s) | ||
104 | end | ||
105 | |||
106 | function Strong(s) | ||
107 | return nude(s) | ||
108 | end | ||
109 | |||
110 | function Subscript(s) | ||
111 | return nude(s) | ||
112 | end | ||
113 | |||
114 | function Superscript(s) | ||
115 | return nude(s) | ||
116 | end | ||
117 | |||
118 | function SmallCaps(s) | ||
119 | return nude(s) | ||
120 | end | ||
121 | |||
122 | function Strikeout(s) | ||
123 | return nude(s) | ||
124 | end | ||
125 | |||
126 | function Code(s, attr) | ||
127 | return nude(s) | ||
128 | end | ||
129 | |||
130 | function CodeBlock(s, attr) | ||
131 | return nude(s) | ||
132 | end | ||
133 | |||
134 | function InlineMath(s) | ||
135 | return nude(s) | ||
136 | end | ||
137 | |||
138 | function DisplayMath(s) | ||
139 | return nude(s) | ||
140 | end | ||
141 | |||
142 | function Span(s, attr) | ||
143 | return nude(s) | ||
144 | end | ||
145 | |||
146 | function Cite(s) | ||
147 | return nude(s) | ||
148 | end | ||
149 | |||
150 | function Plain(s) | ||
151 | return nude(s) | ||
152 | end | ||
153 | |||
154 | -- Links only include the link text | ||
155 | function Link(s, src, tit) | ||
156 | return nude(s) | ||
157 | end | ||
158 | |||
159 | -- Images have nothing to give us | ||
160 | -- (but add a space just in case) | ||
161 | function Image(s, src, tit) | ||
162 | return "\n" | ||
163 | end | ||
164 | |||
165 | function CaptionedImage(s, src, tit) | ||
166 | return "\n" | ||
167 | end | ||
168 | |||
169 | function Str(s) | ||
170 | return nude(s) | ||
171 | end | ||
172 | |||
173 | function Div(s, attr) | ||
174 | return nude(s) | ||
175 | end | ||
176 | |||
177 | function Space(s) | ||
178 | return "\n" | ||
179 | end | ||
180 | |||
181 | function LineBreak() | ||
182 | return "\n" | ||
183 | end | ||
184 | |||
185 | function Para(s) | ||
186 | return nude(s) | ||
187 | end | ||
188 | |||
189 | function Header(lev, s, attr) | ||
190 | return nude(s) | ||
191 | end | ||
192 | |||
193 | function BlockQuote(s) | ||
194 | return nude(s) | ||
195 | end | ||
196 | |||
197 | function HorizontalRule() | ||
198 | return "\n" | ||
199 | end | ||
200 | |||
201 | function BulletList(items) | ||
202 | local buffer = "" | ||
203 | for _, item in pairs(items) do | ||
204 | buffer = buffer .. nude(item) .. "\n" | ||
205 | end | ||
206 | return buffer .. "\n" | ||
207 | end | ||
208 | |||
209 | function OrderedList(items) | ||
210 | local buffer = "" | ||
211 | for _, item in pairs(items) do | ||
212 | buffer = buffer .. nude(item) .. "\n" | ||
213 | end | ||
214 | return buffer .. "\n" | ||
215 | end | ||
216 | |||
217 | function DefinitionList(items) | ||
218 | local buffer = "" | ||
219 | for _, item in pairs(items) do | ||
220 | for k, v in pairs(item) do | ||
221 | buffer = buffer .. nude(k) .. "\n" .. nude(v) .. "\n" | ||
222 | end | ||
223 | end | ||
224 | return buffer .. "\n" | ||
225 | end | ||
226 | |||
227 | function Table(caption, aligns, widths, headers, rows) | ||
228 | local buffer = "" | ||
229 | local function add(s) | ||
230 | buffer = buffer .. nude(s) .. "\n" | ||
231 | end | ||
232 | if caption ~= "" then | ||
233 | add(caption) | ||
234 | end | ||
235 | for _,h in pairs(headers) do | ||
236 | add(h) | ||
237 | end | ||
238 | for _, row in pairs(rows) do | ||
239 | for _, cell in pairs(row) do | ||
240 | add(cell) | ||
241 | end | ||
242 | end | ||
243 | return buffer | ||
244 | end | ||
245 | -- }}} | ||
246 | |||
247 | -- The following code will produce runtime warnings when you haven't defined | ||
248 | -- all of the functions you need for the custom writer, so it's useful | ||
249 | -- to include when you're working on a writer. | ||
250 | local meta = {} | ||
251 | meta.__index = | ||
252 | function(_, key) | ||
253 | io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key)) | ||
254 | return function() return "" end | ||
255 | end | ||
256 | setmetatable(_G, meta) | ||