--- lam.read local m = {} local t = require "type" local utf8 = require "utf8" local pop = require("util").pop -- TODO: -- - string reading -- - probably more m.eof = setmetatable({}, { __type = "EOF", __tostring = function () return "#" end, }) local function inport_next_token (port) local tok, toktype while true do if #port.line == 0 then if port.file then local ln = port.file:read() if ln == nil then return m.eof end port.line = m.tochars(ln) else return nil end end tok, toktype, port.line = m.scan(port.line)() port.line = port.line or {} if tok ~= nil then return tok, toktype end end end function m.inport (source, kind) -- KIND can be one of "file", "string"; defaults to "file" -- SOURCE is the name of the file or the string to read, or nil; if nil, -- read from standard input. local f, l local k = kind or "file" if source then if k == "file" then f = io.open(source, "r") elseif k == "string" then l = m.tochars(source) end else -- KIND is ignored here f = io.input() end local t = { file = f, filename = source, kind = kind, line = l or {}, next_token = inport_next_token, } if t.file then t.close = function (self) self.file:close() end; end local mt = { __type = "port", __tostring = function (self) return string.format("#", self.file or "(string)") end, } return setmetatable(t, mt) end function m.tochars (s) local chars = {} for _, code in utf8.codes(s) do table.insert(chars, code) end return chars end --- Consumers -- These take a table of characters (cs) and return: -- a token, its type, and the rest of the characters local token_separator = "[^%s#()\"'`,@;]" local function consume_token (cs) local token = {} while #cs > 0 and cs[1]:match(token_separator) do table.insert(token, pop(cs)) end return table.concat(token), "symbol", cs end local function consume_whitespace (cs) while #cs > 0 and cs[1]:match("%s") do pop(cs) end return nil, nil, cs end local function consume_comment (cs) local comment = {} repeat table.insert(comment, pop(cs)) until #cs == 0 or cs[1]:match("\n") return table.concat(comment), "comment", cs end local function idf (x) return function () return x end end local function numf (base) return function (token) local n = tonumber(token:sub(3), base) assert(n, "Can't read number: " .. token) return n end end local literals = { literal = { ["#t"] = idf(true), ["#true"] = idf(true), ["#f"] = idf(false), ["#false"] = idf(false), ["#\\space"] = idf(t.character(" ")), ["#\\tab"] = idf(t.character("\t")), ["#\\newline"] = idf(t.character("\n")), }, match = { ["^#b"] = numf(2), ["^#o"] = numf(8), ["^#d"] = numf(10), ["^#x"] = numf(16), ["^#\\"] = function (tok) return t.character(tok:sub(3)) end, } } local function consume_literal (cs) -- whitespace and parantheses character literals. -- reverse the match test b/c it's already a complement if cs[2] == "\\" and not cs[3]:match(token_separator) then return type.character(cs[3]) end pop(cs) -- discard '#' local token, value, cs = consume_token(cs) -- todo: vectors #(...) token = "#" .. token -- put '#' back if literals.literal[token] then value = literals.literal[token]() else for re, fn in pairs(literals.match) do if token:match(re) then value = fn(token) end end end -- TODO : if `nil' is to be a value in lam i'm going to have to figure -- out some kind of 'lam nil' and 'lua nil' or something.. assert(value~=nil, "Can't read literal: " .. token) return value, "literal", cs end --- Reading from a port m.readtable = { ["("] = function (cs) return pop(cs), "open", cs end, [")"] = function (cs) return pop(cs), "close", cs end, ["'"] = function (cs) return pop(cs), "quote", cs end, ["`"] = function (cs) return pop(cs), "quote", cs end, [","] = -- unquote function (cs) pop(cs) -- remove ',' if cs[1] == "@" then pop(cs) -- remove '@' return ",@", "quote", cs else return ",", "quote", cs end end, [";"] = consume_comment, ["#"] = consume_literal, } --- TODO: Figure out how to read #f and #n properly -- Return an iterator over a character table, so you can do: -- for token, chars in scan(cs) do ... end function m.scan (cs) local cs = cs return function () if not next(cs) then return nil end local token, toktype while true do if m.readtable[cs[1]] then token, toktype, cs = m.readtable[cs[1]](cs) return token, toktype, cs elseif cs[1]:match("%s") then --- should this just continue the loop? -- i.e., remove `return' return consume_whitespace(cs) elseif cs[1]:match("[%d.+-]") then -- numbers, +, -, ., ... local token, _, cs = consume_token(cs) if token:match("[-+]") or token == "..." then return token, "symbol", cs elseif token == "." then return token, "dot", cs else local n = tonumber(token) assert (n ~= nil, "Bad number: "..n) return n, "number", cs end else return consume_token(cs) end end end end function m.readchar (port) if #port.line > 0 then local ch = pop(port.line) return ch else return port.file and port.file.read(1) end end m.readmacros = { quote = function (tok, toktype, port) local qs = { ["'"] = "quote", ["`"] = "quasiquote", [","] = "unquote", [",@"] = "unquote-splicing", } if not qs[tok] then error(string.format("Bad quote: '%s'\n", tok)) end local Q = {qs[tok]} table.insert(Q, m.read(port)) return t.list(Q) end, comment = idf(nil) } function m.read (port) local function read_ahead (tok, toktype) if tok == m.eof then error("Unexpected EOF") end if toktype == "open" then local L = {} while true do local tok, toktype = port:next_token() if toktype == "close" then return t.list(L) elseif toktype == "dot" then local fin = m.read(port) port:next_token() -- throw away ')' return t.list(L, fin) else table.insert(L, read_ahead(tok, toktype)) end end elseif toktype == "close" then error("Unexpected ')'") elseif m.readmacros[toktype] then return m.readmacros[toktype](tok, toktype, port) else return tok end end -- body of read local tok1, toktype1 = port:next_token() if tok1 == m.eof then return m.eof else return read_ahead(tok1, toktype1) end end function m.read_string (str) return m.read(m.inport(str, "string")) end --- return m