diff options
author | Case Duckworth | 2015-04-14 16:36:17 -0700 |
---|---|---|
committer | Case Duckworth | 2015-04-14 16:36:17 -0700 |
commit | 9fce418b46c9f0894f429384ef9e3dabaeffbeb4 (patch) | |
tree | b2339220ee50cf48b8887f0cc1fed4813a95901b /scripts/hapax.lua | |
parent | Add toc metadata (diff) | |
download | autocento-9fce418b46c9f0894f429384ef9e3dabaeffbeb4.tar.gz autocento-9fce418b46c9f0894f429384ef9e3dabaeffbeb4.zip |
Change file hierarchy and rewrite makefile
- File hierarchy is now as follows: - / - appendix/ < appendix source files - backlinks/ < backlink sources & builds - hapax/ < *.hapax source files - scripts/ < scripts, like *.js, *.hs, etc. - templates/ < templates for outputs - text/ < source files - trunk/ < assets, like css, images, heads, etc. - index.html - *.html - Makefile
Diffstat (limited to 'scripts/hapax.lua')
-rw-r--r-- | scripts/hapax.lua | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/scripts/hapax.lua b/scripts/hapax.lua new file mode 100644 index 0000000..1cabbaa --- /dev/null +++ b/scripts/hapax.lua | |||
@@ -0,0 +1,252 @@ | |||
1 | -- Pandoc Hapax writer | ||
2 | -- it takes out all formatting, leaving only a river of text | ||
3 | -- running down the page: one word per line, stripping all duplicates | ||
4 | -- vim: fdm=marker | ||
5 | -- invoke with: pandoc -t hapax.lua | ||
6 | |||
7 | os.setlocale("en_US.UTF-8") | ||
8 | |||
9 | function hapax(s) | ||
10 | local function tablify (s, p) | ||
11 | local t={} | ||
12 | for w in s:gmatch(p) do | ||
13 | table.insert(t, w) | ||
14 | end | ||
15 | return t | ||
16 | end | ||
17 | local function stripDupes (t) | ||
18 | local seen = {} | ||
19 | local remove = {} | ||
20 | for i = 1, #t do | ||
21 | element = t[i] | ||
22 | if seen[element] then | ||
23 | remove[element] = true | ||
24 | else | ||
25 | seen[element] = true | ||
26 | end | ||
27 | end | ||
28 | for i = #t, 1, -1 do | ||
29 | if remove[t[i]] then | ||
30 | table.remove(t, i) | ||
31 | end | ||
32 | end | ||
33 | return t | ||
34 | end | ||
35 | return table.concat(stripDupes(tablify(s, "%S+")), "\n") | ||
36 | end | ||
37 | |||
38 | function flow(s) | ||
39 | return s:gsub("%s+", "\n") | ||
40 | end | ||
41 | |||
42 | function nude(s) | ||
43 | s = s:lower() | ||
44 | -- Expand contractions | ||
45 | s = s:gsub("'ll", " will ") | ||
46 | s = s:gsub("'ve", " have ") | ||
47 | s = s:gsub("'re", " are ") | ||
48 | s = s:gsub("i'm", " i am ") | ||
49 | s = s:gsub("it's", "it is") | ||
50 | s = s:gsub("n't", " not ") | ||
51 | s = s:gsub("'d", " would ") -- can be "had", but still no hapax | ||
52 | s = s:gsub("&", " and ") | ||
53 | -- -- Remove dashes (not hyphens) | ||
54 | s = s:gsub('%-[%-%s]+', ' ') | ||
55 | -- Remove everything that is not letters or numbers | ||
56 | s = s:gsub('[^A-Za-z0-9/\'-]', ' ') | ||
57 | -- Remove extra spaces | ||
58 | s = s:gsub('%s+', ' ') | ||
59 | return " "..s.." " | ||
60 | end | ||
61 | |||
62 | -- This function is called once for the whole document. Parameters: | ||
63 | -- body is a string, metadata is a table, variables is a table. | ||
64 | -- One could use some kind of templating | ||
65 | -- system here; this just gives you a simple standalone HTML file. | ||
66 | function Doc(body, metadata, variables) | ||
67 | local buffer = "" | ||
68 | local function add(s) | ||
69 | buffer = buffer .. nude(s) .. "\n" | ||
70 | end | ||
71 | if metadata['title'] then | ||
72 | add(metadata['title']) | ||
73 | end | ||
74 | if metadata['subtitle'] then | ||
75 | add(metadata['subtitle']) | ||
76 | end | ||
77 | add(body) | ||
78 | return hapax(flow(buffer)) | ||
79 | -- return flow(buffer) | ||
80 | end | ||
81 | |||
82 | -- Remove all formatting {{{ | ||
83 | function Note(s) | ||
84 | return s | ||
85 | end | ||
86 | |||
87 | function Blocksep() | ||
88 | return "\n" | ||
89 | end | ||
90 | function Emph(s) | ||
91 | return s | ||
92 | end | ||
93 | |||
94 | function Strong(s) | ||
95 | return s | ||
96 | end | ||
97 | |||
98 | function Subscript(s) | ||
99 | return s | ||
100 | end | ||
101 | |||
102 | function Superscript(s) | ||
103 | return s | ||
104 | end | ||
105 | |||
106 | function SmallCaps(s) | ||
107 | return s | ||
108 | end | ||
109 | |||
110 | function Strikeout(s) | ||
111 | return s | ||
112 | end | ||
113 | |||
114 | function Code(s, attr) | ||
115 | return s | ||
116 | end | ||
117 | |||
118 | function CodeBlock(s, attr) | ||
119 | return s | ||
120 | end | ||
121 | |||
122 | function InlineMath(s) | ||
123 | return s | ||
124 | end | ||
125 | |||
126 | function DisplayMath(s) | ||
127 | return s | ||
128 | end | ||
129 | |||
130 | function Span(s, attr) | ||
131 | return s | ||
132 | end | ||
133 | |||
134 | function Cite(s) | ||
135 | return s | ||
136 | end | ||
137 | |||
138 | function Plain(s) | ||
139 | return s | ||
140 | end | ||
141 | |||
142 | -- Links only include the link text | ||
143 | function Link(s, src, tit) | ||
144 | return s | ||
145 | end | ||
146 | |||
147 | -- Images have nothing to give us | ||
148 | -- (but add a space just in case) | ||
149 | function Image(s, src, tit) | ||
150 | return "\n" | ||
151 | end | ||
152 | |||
153 | function RawBlock(s) | ||
154 | return s | ||
155 | end | ||
156 | |||
157 | function RawInline(s) | ||
158 | return s | ||
159 | end | ||
160 | |||
161 | function CaptionedImage(s, src, tit) | ||
162 | return "\n" | ||
163 | end | ||
164 | |||
165 | function Str(s) | ||
166 | return s | ||
167 | end | ||
168 | |||
169 | function Div(s, attr) | ||
170 | return s | ||
171 | end | ||
172 | |||
173 | function Space(s) | ||
174 | return "\n" | ||
175 | end | ||
176 | |||
177 | function LineBreak() | ||
178 | return "\n" | ||
179 | end | ||
180 | |||
181 | function Para(s) | ||
182 | return s | ||
183 | end | ||
184 | |||
185 | function Header(lev, s, attr) | ||
186 | return s | ||
187 | end | ||
188 | |||
189 | function BlockQuote(s) | ||
190 | return s | ||
191 | end | ||
192 | |||
193 | function HorizontalRule() | ||
194 | return "\n" | ||
195 | end | ||
196 | |||
197 | function BulletList(items) | ||
198 | local buffer = "" | ||
199 | for _, item in pairs(items) do | ||
200 | buffer = buffer .. " " .. item .. "\n" | ||
201 | end | ||
202 | return buffer .. "\n" | ||
203 | end | ||
204 | |||
205 | function OrderedList(items) | ||
206 | local buffer = "" | ||
207 | for _, item in pairs(items) do | ||
208 | buffer = buffer .. " " .. item .. "\n" | ||
209 | end | ||
210 | return buffer .. "\n" | ||
211 | end | ||
212 | |||
213 | function DefinitionList(items) | ||
214 | local buffer = "" | ||
215 | for _, item in pairs(items) do | ||
216 | for k, v in pairs(item) do | ||
217 | buffer = buffer .. " " .. k .. "\n" .. v .. "\n" | ||
218 | end | ||
219 | end | ||
220 | return buffer .. "\n" | ||
221 | end | ||
222 | |||
223 | function Table(caption, aligns, widths, headers, rows) | ||
224 | local buffer = "" | ||
225 | local function add(s) | ||
226 | buffer = buffer .. " " .. s .. "\n" | ||
227 | end | ||
228 | if caption ~= "" then | ||
229 | add(caption) | ||
230 | end | ||
231 | for _,h in pairs(headers) do | ||
232 | add(h) | ||
233 | end | ||
234 | for _, row in pairs(rows) do | ||
235 | for _, cell in pairs(row) do | ||
236 | add(cell) | ||
237 | end | ||
238 | end | ||
239 | return buffer | ||
240 | end | ||
241 | -- }}} | ||
242 | |||
243 | -- The following code will produce runtime warnings when you haven't defined | ||
244 | -- all of the functions you need for the custom writer, so it's useful | ||
245 | -- to include when you're working on a writer. | ||
246 | local meta = {} | ||
247 | meta.__index = | ||
248 | function(_, key) | ||
249 | io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key)) | ||
250 | return function() return "" end | ||
251 | end | ||
252 | setmetatable(_G, meta) | ||