diff options
Diffstat (limited to 'trunk/hapax.lua')
-rw-r--r-- | trunk/hapax.lua | 95 |
1 files changed, 45 insertions, 50 deletions
diff --git a/trunk/hapax.lua b/trunk/hapax.lua index 7e8410c..af59e59 100644 --- a/trunk/hapax.lua +++ b/trunk/hapax.lua | |||
@@ -1,8 +1,8 @@ | |||
1 | -- Pandoc River writer | 1 | -- Pandoc Hapax writer |
2 | -- it takes out all formatting, leaving only a river of text | 2 | -- it takes out all formatting, leaving only a river of text |
3 | -- running down the page: one word per line | 3 | -- running down the page: one word per line, stripping all duplicates |
4 | -- vim: fdm=marker | 4 | -- vim: fdm=marker |
5 | -- invoke with: pandoc -t river.lua | 5 | -- invoke with: pandoc -t hapax.lua |
6 | 6 | ||
7 | os.setlocale("en_US.UTF-8") | 7 | os.setlocale("en_US.UTF-8") |
8 | 8 | ||
@@ -40,36 +40,22 @@ function flow(s) | |||
40 | end | 40 | end |
41 | 41 | ||
42 | function nude(s) | 42 | function nude(s) |
43 | s = s:lower() | ||
43 | -- Expand contractions | 44 | -- Expand contractions |
44 | s = s:gsub("'%a+%s", function (x) | 45 | s = s:gsub("'ll", " will ") |
45 | if x == "'ll" then | 46 | s = s:gsub("'ve", " have ") |
46 | return " will " | 47 | s = s:gsub("'re", " are ") |
47 | elseif x == "'ve" then | 48 | s = s:gsub("i'm", " i am ") |
48 | return " have " | ||
49 | elseif x == "'re" then | ||
50 | return " are " | ||
51 | else | ||
52 | return x | ||
53 | end | ||
54 | end) | ||
55 | s = s:gsub("it's", "it is") | 49 | s = s:gsub("it's", "it is") |
56 | s = s:gsub("n't", " not ") | 50 | s = s:gsub("n't", " not ") |
57 | -- Get rid of quotes around words | 51 | s = s:gsub("&", " and ") |
58 | s = s:gsub('"', ' ') | 52 | -- -- Remove dashes (not hyphens) |
59 | s = s:gsub("%s'", ' ') | ||
60 | s = s:gsub("'%s", ' ') | ||
61 | -- Remove HTML entities | ||
62 | s = s:gsub('&.-;', ' ') | ||
63 | s = s:gsub('%b<>', ' ') | ||
64 | -- Remove end-of-line backslashes | ||
65 | s = s:gsub('\\$', ' ') | ||
66 | -- Remove dashes (not hyphens) | ||
67 | s = s:gsub('%-[%-%s]+', ' ') | 53 | s = s:gsub('%-[%-%s]+', ' ') |
68 | -- Remove everything that is not letters or numbers | 54 | -- Remove everything that is not letters or numbers |
69 | s = s:gsub('[%.!%?:;,%[%]%(%)<>]', ' ') | 55 | s = s:gsub('[^A-Za-z0-9/\'-]', ' ') |
70 | -- Remove extra spaces | 56 | -- Remove extra spaces |
71 | s = s:gsub('%s+', ' ') | 57 | s = s:gsub('%s+', ' ') |
72 | return s:lower() | 58 | return " "..s.." " |
73 | end | 59 | end |
74 | 60 | ||
75 | -- This function is called once for the whole document. Parameters: | 61 | -- This function is called once for the whole document. Parameters: |
@@ -89,71 +75,72 @@ function Doc(body, metadata, variables) | |||
89 | end | 75 | end |
90 | add(body) | 76 | add(body) |
91 | return hapax(flow(buffer)) | 77 | return hapax(flow(buffer)) |
78 | -- return flow(buffer) | ||
92 | end | 79 | end |
93 | 80 | ||
94 | -- Remove all formatting {{{ | 81 | -- Remove all formatting {{{ |
95 | function Note(s) | 82 | function Note(s) |
96 | return nude(s) | 83 | return s |
97 | end | 84 | end |
98 | 85 | ||
99 | function Blocksep() | 86 | function Blocksep() |
100 | return "\n" | 87 | return "\n" |
101 | end | 88 | end |
102 | function Emph(s) | 89 | function Emph(s) |
103 | return nude(s) | 90 | return s |
104 | end | 91 | end |
105 | 92 | ||
106 | function Strong(s) | 93 | function Strong(s) |
107 | return nude(s) | 94 | return s |
108 | end | 95 | end |
109 | 96 | ||
110 | function Subscript(s) | 97 | function Subscript(s) |
111 | return nude(s) | 98 | return s |
112 | end | 99 | end |
113 | 100 | ||
114 | function Superscript(s) | 101 | function Superscript(s) |
115 | return nude(s) | 102 | return s |
116 | end | 103 | end |
117 | 104 | ||
118 | function SmallCaps(s) | 105 | function SmallCaps(s) |
119 | return nude(s) | 106 | return s |
120 | end | 107 | end |
121 | 108 | ||
122 | function Strikeout(s) | 109 | function Strikeout(s) |
123 | return nude(s) | 110 | return s |
124 | end | 111 | end |
125 | 112 | ||
126 | function Code(s, attr) | 113 | function Code(s, attr) |
127 | return nude(s) | 114 | return s |
128 | end | 115 | end |
129 | 116 | ||
130 | function CodeBlock(s, attr) | 117 | function CodeBlock(s, attr) |
131 | return nude(s) | 118 | return s |
132 | end | 119 | end |
133 | 120 | ||
134 | function InlineMath(s) | 121 | function InlineMath(s) |
135 | return nude(s) | 122 | return s |
136 | end | 123 | end |
137 | 124 | ||
138 | function DisplayMath(s) | 125 | function DisplayMath(s) |
139 | return nude(s) | 126 | return s |
140 | end | 127 | end |
141 | 128 | ||
142 | function Span(s, attr) | 129 | function Span(s, attr) |
143 | return nude(s) | 130 | return s |
144 | end | 131 | end |
145 | 132 | ||
146 | function Cite(s) | 133 | function Cite(s) |
147 | return nude(s) | 134 | return s |
148 | end | 135 | end |
149 | 136 | ||
150 | function Plain(s) | 137 | function Plain(s) |
151 | return nude(s) | 138 | return s |
152 | end | 139 | end |
153 | 140 | ||
154 | -- Links only include the link text | 141 | -- Links only include the link text |
155 | function Link(s, src, tit) | 142 | function Link(s, src, tit) |
156 | return nude(s) | 143 | return s |
157 | end | 144 | end |
158 | 145 | ||
159 | -- Images have nothing to give us | 146 | -- Images have nothing to give us |
@@ -162,16 +149,24 @@ function Image(s, src, tit) | |||
162 | return "\n" | 149 | return "\n" |
163 | end | 150 | end |
164 | 151 | ||
152 | function RawBlock(s) | ||
153 | return s | ||
154 | end | ||
155 | |||
156 | function RawInline(s) | ||
157 | return s | ||
158 | end | ||
159 | |||
165 | function CaptionedImage(s, src, tit) | 160 | function CaptionedImage(s, src, tit) |
166 | return "\n" | 161 | return "\n" |
167 | end | 162 | end |
168 | 163 | ||
169 | function Str(s) | 164 | function Str(s) |
170 | return nude(s) | 165 | return s |
171 | end | 166 | end |
172 | 167 | ||
173 | function Div(s, attr) | 168 | function Div(s, attr) |
174 | return nude(s) | 169 | return s |
175 | end | 170 | end |
176 | 171 | ||
177 | function Space(s) | 172 | function Space(s) |
@@ -183,15 +178,15 @@ function LineBreak() | |||
183 | end | 178 | end |
184 | 179 | ||
185 | function Para(s) | 180 | function Para(s) |
186 | return nude(s) | 181 | return s |
187 | end | 182 | end |
188 | 183 | ||
189 | function Header(lev, s, attr) | 184 | function Header(lev, s, attr) |
190 | return nude(s) | 185 | return s |
191 | end | 186 | end |
192 | 187 | ||
193 | function BlockQuote(s) | 188 | function BlockQuote(s) |
194 | return nude(s) | 189 | return s |
195 | end | 190 | end |
196 | 191 | ||
197 | function HorizontalRule() | 192 | function HorizontalRule() |
@@ -201,7 +196,7 @@ end | |||
201 | function BulletList(items) | 196 | function BulletList(items) |
202 | local buffer = "" | 197 | local buffer = "" |
203 | for _, item in pairs(items) do | 198 | for _, item in pairs(items) do |
204 | buffer = buffer .. nude(item) .. "\n" | 199 | buffer = buffer .. " " .. item .. "\n" |
205 | end | 200 | end |
206 | return buffer .. "\n" | 201 | return buffer .. "\n" |
207 | end | 202 | end |
@@ -209,7 +204,7 @@ end | |||
209 | function OrderedList(items) | 204 | function OrderedList(items) |
210 | local buffer = "" | 205 | local buffer = "" |
211 | for _, item in pairs(items) do | 206 | for _, item in pairs(items) do |
212 | buffer = buffer .. nude(item) .. "\n" | 207 | buffer = buffer .. " " .. item .. "\n" |
213 | end | 208 | end |
214 | return buffer .. "\n" | 209 | return buffer .. "\n" |
215 | end | 210 | end |
@@ -218,7 +213,7 @@ function DefinitionList(items) | |||
218 | local buffer = "" | 213 | local buffer = "" |
219 | for _, item in pairs(items) do | 214 | for _, item in pairs(items) do |
220 | for k, v in pairs(item) do | 215 | for k, v in pairs(item) do |
221 | buffer = buffer .. nude(k) .. "\n" .. nude(v) .. "\n" | 216 | buffer = buffer .. " " .. k .. "\n" .. v .. "\n" |
222 | end | 217 | end |
223 | end | 218 | end |
224 | return buffer .. "\n" | 219 | return buffer .. "\n" |
@@ -227,7 +222,7 @@ end | |||
227 | function Table(caption, aligns, widths, headers, rows) | 222 | function Table(caption, aligns, widths, headers, rows) |
228 | local buffer = "" | 223 | local buffer = "" |
229 | local function add(s) | 224 | local function add(s) |
230 | buffer = buffer .. nude(s) .. "\n" | 225 | buffer = buffer .. " " .. s .. "\n" |
231 | end | 226 | end |
232 | if caption ~= "" then | 227 | if caption ~= "" then |
233 | add(caption) | 228 | add(caption) |