about summary refs log tree commit diff stats
path: root/trunk/hapax.lua
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/hapax.lua')
-rw-r--r--trunk/hapax.lua95
1 files changed, 45 insertions, 50 deletions
diff --git a/trunk/hapax.lua b/trunk/hapax.lua index 7e8410c..af59e59 100644 --- a/trunk/hapax.lua +++ b/trunk/hapax.lua
@@ -1,8 +1,8 @@
1-- Pandoc River writer 1-- Pandoc Hapax writer
2-- it takes out all formatting, leaving only a river of text 2-- it takes out all formatting, leaving only a river of text
3-- running down the page: one word per line 3-- running down the page: one word per line, stripping all duplicates
4-- vim: fdm=marker 4-- vim: fdm=marker
5-- invoke with: pandoc -t river.lua 5-- invoke with: pandoc -t hapax.lua
6 6
7os.setlocale("en_US.UTF-8") 7os.setlocale("en_US.UTF-8")
8 8
@@ -40,36 +40,22 @@ function flow(s)
40end 40end
41 41
42function nude(s) 42function nude(s)
43 s = s:lower()
43 -- Expand contractions 44 -- Expand contractions
44 s = s:gsub("'%a+%s", function (x) 45 s = s:gsub("'ll", " will ")
45 if x == "'ll" then 46 s = s:gsub("'ve", " have ")
46 return " will " 47 s = s:gsub("'re", " are ")
47 elseif x == "'ve" then 48 s = s:gsub("i'm", " i am ")
48 return " have "
49 elseif x == "'re" then
50 return " are "
51 else
52 return x
53 end
54 end)
55 s = s:gsub("it's", "it is") 49 s = s:gsub("it's", "it is")
56 s = s:gsub("n't", " not ") 50 s = s:gsub("n't", " not ")
57 -- Get rid of quotes around words 51 s = s:gsub("&", " and ")
58 s = s:gsub('"', ' ') 52 -- -- Remove dashes (not hyphens)
59 s = s:gsub("%s'", ' ')
60 s = s:gsub("'%s", ' ')
61 -- Remove HTML entities
62 s = s:gsub('&.-;', ' ')
63 s = s:gsub('%b<>', ' ')
64 -- Remove end-of-line backslashes
65 s = s:gsub('\\$', ' ')
66 -- Remove dashes (not hyphens)
67 s = s:gsub('%-[%-%s]+', ' ') 53 s = s:gsub('%-[%-%s]+', ' ')
68 -- Remove everything that is not letters or numbers 54 -- Remove everything that is not letters or numbers
69 s = s:gsub('[%.!%?:;,%[%]%(%)<>]', ' ') 55 s = s:gsub('[^A-Za-z0-9/\'-]', ' ')
70 -- Remove extra spaces 56 -- Remove extra spaces
71 s = s:gsub('%s+', ' ') 57 s = s:gsub('%s+', ' ')
72 return s:lower() 58 return " "..s.." "
73end 59end
74 60
75-- This function is called once for the whole document. Parameters: 61-- This function is called once for the whole document. Parameters:
@@ -89,71 +75,72 @@ function Doc(body, metadata, variables)
89 end 75 end
90 add(body) 76 add(body)
91 return hapax(flow(buffer)) 77 return hapax(flow(buffer))
78 -- return flow(buffer)
92end 79end
93 80
94-- Remove all formatting {{{ 81-- Remove all formatting {{{
95function Note(s) 82function Note(s)
96 return nude(s) 83 return s
97end 84end
98 85
99function Blocksep() 86function Blocksep()
100 return "\n" 87 return "\n"
101end 88end
102function Emph(s) 89function Emph(s)
103 return nude(s) 90 return s
104end 91end
105 92
106function Strong(s) 93function Strong(s)
107 return nude(s) 94 return s
108end 95end
109 96
110function Subscript(s) 97function Subscript(s)
111 return nude(s) 98 return s
112end 99end
113 100
114function Superscript(s) 101function Superscript(s)
115 return nude(s) 102 return s
116end 103end
117 104
118function SmallCaps(s) 105function SmallCaps(s)
119 return nude(s) 106 return s
120end 107end
121 108
122function Strikeout(s) 109function Strikeout(s)
123 return nude(s) 110 return s
124end 111end
125 112
126function Code(s, attr) 113function Code(s, attr)
127 return nude(s) 114 return s
128end 115end
129 116
130function CodeBlock(s, attr) 117function CodeBlock(s, attr)
131 return nude(s) 118 return s
132end 119end
133 120
134function InlineMath(s) 121function InlineMath(s)
135 return nude(s) 122 return s
136end 123end
137 124
138function DisplayMath(s) 125function DisplayMath(s)
139 return nude(s) 126 return s
140end 127end
141 128
142function Span(s, attr) 129function Span(s, attr)
143 return nude(s) 130 return s
144end 131end
145 132
146function Cite(s) 133function Cite(s)
147 return nude(s) 134 return s
148end 135end
149 136
150function Plain(s) 137function Plain(s)
151 return nude(s) 138 return s
152end 139end
153 140
154-- Links only include the link text 141-- Links only include the link text
155function Link(s, src, tit) 142function Link(s, src, tit)
156 return nude(s) 143 return s
157end 144end
158 145
159-- Images have nothing to give us 146-- Images have nothing to give us
@@ -162,16 +149,24 @@ function Image(s, src, tit)
162 return "\n" 149 return "\n"
163end 150end
164 151
152function RawBlock(s)
153 return s
154end
155
156function RawInline(s)
157 return s
158end
159
165function CaptionedImage(s, src, tit) 160function CaptionedImage(s, src, tit)
166 return "\n" 161 return "\n"
167end 162end
168 163
169function Str(s) 164function Str(s)
170 return nude(s) 165 return s
171end 166end
172 167
173function Div(s, attr) 168function Div(s, attr)
174 return nude(s) 169 return s
175end 170end
176 171
177function Space(s) 172function Space(s)
@@ -183,15 +178,15 @@ function LineBreak()
183end 178end
184 179
185function Para(s) 180function Para(s)
186 return nude(s) 181 return s
187end 182end
188 183
189function Header(lev, s, attr) 184function Header(lev, s, attr)
190 return nude(s) 185 return s
191end 186end
192 187
193function BlockQuote(s) 188function BlockQuote(s)
194 return nude(s) 189 return s
195end 190end
196 191
197function HorizontalRule() 192function HorizontalRule()
@@ -201,7 +196,7 @@ end
201function BulletList(items) 196function BulletList(items)
202 local buffer = "" 197 local buffer = ""
203 for _, item in pairs(items) do 198 for _, item in pairs(items) do
204 buffer = buffer .. nude(item) .. "\n" 199 buffer = buffer .. " " .. item .. "\n"
205 end 200 end
206 return buffer .. "\n" 201 return buffer .. "\n"
207end 202end
@@ -209,7 +204,7 @@ end
209function OrderedList(items) 204function OrderedList(items)
210 local buffer = "" 205 local buffer = ""
211 for _, item in pairs(items) do 206 for _, item in pairs(items) do
212 buffer = buffer .. nude(item) .. "\n" 207 buffer = buffer .. " " .. item .. "\n"
213 end 208 end
214 return buffer .. "\n" 209 return buffer .. "\n"
215end 210end
@@ -218,7 +213,7 @@ function DefinitionList(items)
218 local buffer = "" 213 local buffer = ""
219 for _, item in pairs(items) do 214 for _, item in pairs(items) do
220 for k, v in pairs(item) do 215 for k, v in pairs(item) do
221 buffer = buffer .. nude(k) .. "\n" .. nude(v) .. "\n" 216 buffer = buffer .. " " .. k .. "\n" .. v .. "\n"
222 end 217 end
223 end 218 end
224 return buffer .. "\n" 219 return buffer .. "\n"
@@ -227,7 +222,7 @@ end
227function Table(caption, aligns, widths, headers, rows) 222function Table(caption, aligns, widths, headers, rows)
228 local buffer = "" 223 local buffer = ""
229 local function add(s) 224 local function add(s)
230 buffer = buffer .. nude(s) .. "\n" 225 buffer = buffer .. " " .. s .. "\n"
231 end 226 end
232 if caption ~= "" then 227 if caption ~= "" then
233 add(caption) 228 add(caption)