--- lam.read local m = {} local t = require "type" local utf8 = require "utf8" local pop = require("util").pop local pp = require("pp").pp -- TODO: -- - string reading -- - # syntax -- - comments -- - probably more function m.inport (source, kind) -- KIND can be one of "file", "string"; defaults to "file" -- SOURCE is the name of the file or the string to read, or nil; if nil, -- read from standard input. local f, l local k = kind or "file" if source then if kind == "file" then f = io.open(source, "r") elseif kind == "string" then l = m.tochars(source) end else -- KIND is ignored here f = io.input() end local t = { file = f, line = l or {}, next_token = function (self) local tok, toktype while true do if #self.line == 0 and self.file then self.line = m.tochars( self.file:read("*l")) end if not self.line or #self.line == 0 then return nil end tok, toktype, self.line = m.scan(self.line)() if tok then return tok, toktype end end end, } if t.file then t.close = function (self) self.file:close() end; end local mt = { __type = "port", __tostring = function (self) return string.format("#", self.file or "(string)") end, } return setmetatable(t, mt) end function m.tochars (s) local chars = {} for _, code in utf8.codes(s) do table.insert(chars, code) end return chars end --- Consumers -- These take a table of characters (cs) and return a token and the rest of the -- chars local function consume_token (cs) local token = {} while #cs > 0 and cs[1]:match("[^%s()\"#'`,@;]") do table.insert(token, pop(cs)) end return table.concat(token), cs end local function consume_whitespace (cs) while #cs > 0 and cs[1]:match("%s") do pop(cs) end return nil, cs end --- Reading from a port m.readtable = { ["("] = function (cs) return pop(cs), "open", cs end, [")"] = function (cs) return pop(cs), "close", cs end, ["'"] = function (cs) return pop(cs), "quote", cs end, ["`"] = function (cs) return pop(cs), "quote", cs end, [","] = function (cs) pop(cs) -- remove ',' if cs[1] == "@" then pop(cs) -- remove '@' return ",@", "quote", cs else return ",", "quote", cs end end, } -- Return an iterator over a character table, so you can do: -- for token, chars in scan(cs) do ... end function m.scan (cs) local cs = cs return function () if not next(cs) then return nil end local token, toktype while true do if m.readtable[cs[1]] then token, toktype, cs = m.readtable[cs[1]](cs) return token, toktype, cs elseif cs[1]:match("%s") then _, cs = consume_whitespace(cs) return nil, nil, cs -- return nil, cs elseif cs[1]:match("[%d.+-]") then -- numbers, +, -, ., ... local token, cs = consume_token(cs) if token:match("[-+]") or token == "..." then return token, "symbol", cs elseif token == "." then return token, "dot", cs else local n = tonumber(token) assert (n ~= nil, "Bad number: "..n) return n, "number", cs end else token, cs = consume_token(cs) return token, "symbol", cs end end end end function m.readchar (port) if #port.line > 0 then local ch = pop(port.line) return ch else return port.file.read(1) end end m.readmacros = { quote = function (tok, toktype, port) local qs = { ["'"] = "quote", ["`"] = "quasiquote", [","] = "unquote", [",@"] = "unquote-splicing", } if not qs[tok] then error(string.format("Bad quote: '%s'\n", tok)) end local Q = {qs[tok]} table.insert(Q, m.read(port)) return t.list(Q) end, } function m.read (port) local function read_ahead (tok, toktype) if not tok then error("Unexpected EOF") end if toktype == "open" then local L = {} while true do local tok, toktype = port:next_token() if toktype == "close" then return t.list(L) elseif toktype == "dot" then local fin = m.read(port) port:next_token() -- throw away ')' return t.list(L, fin) else table.insert(L, read_ahead(tok, toktype)) end end elseif toktype == "close" then error("Unexpected ')'") elseif m.readmacros[toktype] then return m.readmacros[toktype](tok, toktype, port) else return tok end end -- body of read local tok1, toktype1 = port:next_token() if not tok1 then return nil else return read_ahead(tok1, toktype1) end end function m.read_string (str) return m.read(m.inport(str, "string")) end --- return m