From 9fce418b46c9f0894f429384ef9e3dabaeffbeb4 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Tue, 14 Apr 2015 16:36:17 -0700 Subject: Change file hierarchy and rewrite makefile - File hierarchy is now as follows: - / - appendix/ < appendix source files - backlinks/ < backlink sources & builds - hapax/ < *.hapax source files - scripts/ < scripts, like *.js, *.hs, etc. - templates/ < templates for outputs - text/ < source files - trunk/ < assets, like css, images, heads, etc. - index.html - *.html - Makefile --- scripts/hapax.lua | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 scripts/hapax.lua (limited to 'scripts/hapax.lua') diff --git a/scripts/hapax.lua b/scripts/hapax.lua new file mode 100644 index 0000000..1cabbaa --- /dev/null +++ b/scripts/hapax.lua @@ -0,0 +1,252 @@ +-- Pandoc Hapax writer +-- it takes out all formatting, leaving only a river of text +-- running down the page: one word per line, stripping all duplicates +-- vim: fdm=marker +-- invoke with: pandoc -t hapax.lua + +os.setlocale("en_US.UTF-8") + +function hapax(s) + local function tablify (s, p) + local t={} + for w in s:gmatch(p) do + table.insert(t, w) + end + return t + end + local function stripDupes (t) + local seen = {} + local remove = {} + for i = 1, #t do + element = t[i] + if seen[element] then + remove[element] = true + else + seen[element] = true + end + end + for i = #t, 1, -1 do + if remove[t[i]] then + table.remove(t, i) + end + end + return t + end + return table.concat(stripDupes(tablify(s, "%S+")), "\n") +end + +function flow(s) + return s:gsub("%s+", "\n") +end + +function nude(s) + s = s:lower() + -- Expand contractions + s = s:gsub("'ll", " will ") + s = s:gsub("'ve", " have ") + s = s:gsub("'re", " are ") + s = s:gsub("i'm", " i am ") + s = s:gsub("it's", "it is") + s = s:gsub("n't", " not ") + s = s:gsub("'d", " would ") -- can be "had", but still no hapax + s = s:gsub("&", " and ") + -- -- Remove dashes (not hyphens) + s = s:gsub('%-[%-%s]+', ' ') + -- Remove everything that is not letters or numbers + s = s:gsub('[^A-Za-z0-9/\'-]', ' ') + -- Remove extra spaces + s = s:gsub('%s+', ' ') + return " "..s.." " +end + +-- This function is called once for the whole document. Parameters: +-- body is a string, metadata is a table, variables is a table. +-- One could use some kind of templating +-- system here; this just gives you a simple standalone HTML file. +function Doc(body, metadata, variables) + local buffer = "" + local function add(s) + buffer = buffer .. nude(s) .. "\n" + end + if metadata['title'] then + add(metadata['title']) + end + if metadata['subtitle'] then + add(metadata['subtitle']) + end + add(body) + return hapax(flow(buffer)) + -- return flow(buffer) +end + +-- Remove all formatting {{{ +function Note(s) + return s +end + +function Blocksep() + return "\n" +end +function Emph(s) + return s +end + +function Strong(s) + return s +end + +function Subscript(s) + return s +end + +function Superscript(s) + return s +end + +function SmallCaps(s) + return s +end + +function Strikeout(s) + return s +end + +function Code(s, attr) + return s +end + +function CodeBlock(s, attr) + return s +end + +function InlineMath(s) + return s +end + +function DisplayMath(s) + return s +end + +function Span(s, attr) + return s +end + +function Cite(s) + return s +end + +function Plain(s) + return s +end + +-- Links only include the link text +function Link(s, src, tit) + return s +end + +-- Images have nothing to give us +-- (but add a space just in case) +function Image(s, src, tit) + return "\n" +end + +function RawBlock(s) + return s +end + +function RawInline(s) + return s +end + +function CaptionedImage(s, src, tit) + return "\n" +end + +function Str(s) + return s +end + +function Div(s, attr) + return s +end + +function Space(s) + return "\n" +end + +function LineBreak() + return "\n" +end + +function Para(s) + return s +end + +function Header(lev, s, attr) + return s +end + +function BlockQuote(s) + return s +end + +function HorizontalRule() + return "\n" +end + +function BulletList(items) + local buffer = "" + for _, item in pairs(items) do + buffer = buffer .. " " .. item .. "\n" + end + return buffer .. "\n" +end + +function OrderedList(items) + local buffer = "" + for _, item in pairs(items) do + buffer = buffer .. " " .. item .. "\n" + end + return buffer .. "\n" +end + +function DefinitionList(items) + local buffer = "" + for _, item in pairs(items) do + for k, v in pairs(item) do + buffer = buffer .. " " .. k .. "\n" .. v .. "\n" + end + end + return buffer .. "\n" +end + +function Table(caption, aligns, widths, headers, rows) + local buffer = "" + local function add(s) + buffer = buffer .. " " .. s .. "\n" + end + if caption ~= "" then + add(caption) + end + for _,h in pairs(headers) do + add(h) + end + for _, row in pairs(rows) do + for _, cell in pairs(row) do + add(cell) + end + end + return buffer +end +-- }}} + +-- The following code will produce runtime warnings when you haven't defined +-- all of the functions you need for the custom writer, so it's useful +-- to include when you're working on a writer. +local meta = {} +meta.__index = + function(_, key) + io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key)) + return function() return "" end + end +setmetatable(_G, meta) -- cgit 1.4.1-21-gabe81