From d161be120c22faf222ec15ca618ee367e4d56575 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 1 Apr 2015 23:16:18 -0700 Subject: Refactor makefile; Add hapax preprocessor --- trunk/hapax.lua | 95 +++++++++++++++++++++++++++------------------------------ 1 file changed, 45 insertions(+), 50 deletions(-) (limited to 'trunk/hapax.lua') diff --git a/trunk/hapax.lua b/trunk/hapax.lua index 7e8410c..af59e59 100644 --- a/trunk/hapax.lua +++ b/trunk/hapax.lua @@ -1,8 +1,8 @@ --- Pandoc River writer +-- Pandoc Hapax writer -- it takes out all formatting, leaving only a river of text --- running down the page: one word per line +-- running down the page: one word per line, stripping all duplicates -- vim: fdm=marker --- invoke with: pandoc -t river.lua +-- invoke with: pandoc -t hapax.lua os.setlocale("en_US.UTF-8") @@ -40,36 +40,22 @@ function flow(s) end function nude(s) + s = s:lower() -- Expand contractions - s = s:gsub("'%a+%s", function (x) - if x == "'ll" then - return " will " - elseif x == "'ve" then - return " have " - elseif x == "'re" then - return " are " - else - return x - end - end) + s = s:gsub("'ll", " will ") + s = s:gsub("'ve", " have ") + s = s:gsub("'re", " are ") + s = s:gsub("i'm", " i am ") s = s:gsub("it's", "it is") s = s:gsub("n't", " not ") - -- Get rid of quotes around words - s = s:gsub('"', ' ') - s = s:gsub("%s'", ' ') - s = s:gsub("'%s", ' ') - -- Remove HTML entities - s = s:gsub('&.-;', ' ') - s = s:gsub('%b<>', ' ') - -- Remove end-of-line backslashes - s = s:gsub('\\$', ' ') - -- Remove dashes (not hyphens) + s = s:gsub("&", " and ") + -- -- Remove dashes (not hyphens) s = s:gsub('%-[%-%s]+', ' ') -- Remove everything that is not letters or numbers - s = s:gsub('[%.!%?:;,%[%]%(%)<>]', ' ') + s = s:gsub('[^A-Za-z0-9/\'-]', ' ') -- Remove extra spaces s = s:gsub('%s+', ' ') - return s:lower() + return " "..s.." " end -- This function is called once for the whole document. Parameters: @@ -89,71 +75,72 @@ function Doc(body, metadata, variables) end add(body) return hapax(flow(buffer)) + -- return flow(buffer) end -- Remove all formatting {{{ function Note(s) - return nude(s) + return s end function Blocksep() return "\n" end function Emph(s) - return nude(s) + return s end function Strong(s) - return nude(s) + return s end function Subscript(s) - return nude(s) + return s end function Superscript(s) - return nude(s) + return s end function SmallCaps(s) - return nude(s) + return s end function Strikeout(s) - return nude(s) + return s end function Code(s, attr) - return nude(s) + return s end function CodeBlock(s, attr) - return nude(s) + return s end function InlineMath(s) - return nude(s) + return s end function DisplayMath(s) - return nude(s) + return s end function Span(s, attr) - return nude(s) + return s end function Cite(s) - return nude(s) + return s end function Plain(s) - return nude(s) + return s end -- Links only include the link text function Link(s, src, tit) - return nude(s) + return s end -- Images have nothing to give us @@ -162,16 +149,24 @@ function Image(s, src, tit) return "\n" end +function RawBlock(s) + return s +end + +function RawInline(s) + return s +end + function CaptionedImage(s, src, tit) return "\n" end function Str(s) - return nude(s) + return s end function Div(s, attr) - return nude(s) + return s end function Space(s) @@ -183,15 +178,15 @@ function LineBreak() end function Para(s) - return nude(s) + return s end function Header(lev, s, attr) - return nude(s) + return s end function BlockQuote(s) - return nude(s) + return s end function HorizontalRule() @@ -201,7 +196,7 @@ end function BulletList(items) local buffer = "" for _, item in pairs(items) do - buffer = buffer .. nude(item) .. "\n" + buffer = buffer .. " " .. item .. "\n" end return buffer .. "\n" end @@ -209,7 +204,7 @@ end function OrderedList(items) local buffer = "" for _, item in pairs(items) do - buffer = buffer .. nude(item) .. "\n" + buffer = buffer .. " " .. item .. "\n" end return buffer .. "\n" end @@ -218,7 +213,7 @@ function DefinitionList(items) local buffer = "" for _, item in pairs(items) do for k, v in pairs(item) do - buffer = buffer .. nude(k) .. "\n" .. nude(v) .. "\n" + buffer = buffer .. " " .. k .. "\n" .. v .. "\n" end end return buffer .. "\n" @@ -227,7 +222,7 @@ end function Table(caption, aligns, widths, headers, rows) local buffer = "" local function add(s) - buffer = buffer .. nude(s) .. "\n" + buffer = buffer .. " " .. s .. "\n" end if caption ~= "" then add(caption) -- cgit 1.4.1-21-gabe81