--- lam.read local m = {} local t = require "type" local utf8 = require "utf8" local pop = require("util").pop -- TODO: -- - string reading -- - # syntax -- - comments -- - probably more function m.inport (source, kind) -- KIND can be one of "file", "string"; defaults to "file" -- SOURCE is the name of the file or the string to read, or nil; if nil, -- read from standard input. local f, l local k = kind or "file" if source then if kind == "file" then f = io.open(source, "r") elseif kind == "string" then l = m.tochars(source) end else -- KIND is ignored here f = io.input() end local t = { file = f, line = l or {}, next_token = function (self) local tok, toktype while true do if #self.line == 0 and self.file then self.line = m.tochars( self.file:read("*l")) end if not self.line or #self.line == 0 then return nil end tok, toktype, self.line = m.scan(self.line)() if tok ~= nil then print(tok, toktype) return tok, toktype end end end, } if t.file then t.close = function (self) self.file:close() end; end local mt = { __type = "port", __tostring = function (self) return string.format("#", self.file or "(string)") end, } return setmetatable(t, mt) end function m.tochars (s) local chars = {} for _, code in utf8.codes(s) do table.insert(chars, code) end return chars end --- Consumers -- These take a table of characters (cs) and return: -- a token, its type, and the rest of the characters local token_separator = "[^%s#()\"'`,@;]" local function consume_token (cs) local token = {} while #cs > 0 and cs[1]:match(token_separator) do table.insert(token, pop(cs)) end return table.concat(token), "symbol", cs end local function consume_whitespace (cs) while #cs > 0 and cs[1]:match("%s") do pop(cs) end return nil, nil, cs end local function consume_comment (cs) local comment = {} repeat table.insert(comment, pop(cs)) until #cs == 0 or cs[1]:match("\n") return table.concat(comment), "comment", cs end local function consume_literal (cs) -- whitespace and parantheses character literals. -- reverse the match test b/c it's already a complement if cs[2] == "\\" and not cs[3]:match(token_separator) then return type.character(cs[3]) end pop(cs) -- discard '#' local token, value, cs = consume_token(cs) -- todo: vectors #(...) token = "#" .. token -- put '#' back -- tokens! if token == "#t" or token == "#true" then -- booleans value = true elseif token == "#f" or token == "#false" then value = false --[[ To actually *read* nil, I need to change ports from returning `nil' on eof to an `eof' symbol, i think. elseif token == "#n" or token == "#nil" then value = nil --]] elseif token == "#\\space" then -- characters value = type.character(" ") elseif token == "#\\newline" then value = type.character("\n") elseif token:match("^#\\") then value = type.character(token:sub(3)) else error("Bad literal notation: " .. token) end return value, "literal", cs end --- Reading from a port m.readtable = { ["("] = function (cs) return pop(cs), "open", cs end, [")"] = function (cs) return pop(cs), "close", cs end, ["'"] = function (cs) return pop(cs), "quote", cs end, ["`"] = function (cs) return pop(cs), "quote", cs end, [","] = -- unquote function (cs) pop(cs) -- remove ',' if cs[1] == "@" then pop(cs) -- remove '@' return ",@", "quote", cs else return ",", "quote", cs end end, [";"] = consume_comment, ["#"] = consume_literal, } --- TODO: Figure out how to read #f and #n properly -- Return an iterator over a character table, so you can do: -- for token, chars in scan(cs) do ... end function m.scan (cs) local cs = cs return function () if not next(cs) then return nil end local token, toktype while true do if m.readtable[cs[1]] then token, toktype, cs = m.readtable[cs[1]](cs) return token, toktype, cs elseif cs[1]:match("%s") then return consume_whitespace(cs) elseif cs[1]:match("[%d.+-]") then -- numbers, +, -, ., ... local token, _, cs = consume_token(cs) if token:match("[-+]") or token == "..." then return token, "symbol", cs elseif token == "." then return token, "dot", cs else local n = tonumber(token) assert (n ~= nil, "Bad number: "..n) return n, "number", cs end else return consume_token(cs) end end end end function m.readchar (port) if #port.line > 0 then local ch = pop(port.line) return ch else return port.file and port.file.read(1) end end m.readmacros = { quote = function (tok, toktype, port) local qs = { ["'"] = "quote", ["`"] = "quasiquote", [","] = "unquote", [",@"] = "unquote-splicing", } if not qs[tok] then error(string.format("Bad quote: '%s'\n", tok)) end local Q = {qs[tok]} table.insert(Q, m.read(port)) return t.list(Q) end, comment = function () return nil end, } function m.read (port) local function read_ahead (tok, toktype) if tok == nil then error("Unexpected EOF") end if toktype == "open" then local L = {} while true do local tok, toktype = port:next_token() if toktype == "close" then return t.list(L) elseif toktype == "dot" then local fin = m.read(port) port:next_token() -- throw away ')' return t.list(L, fin) else table.insert(L, read_ahead(tok, toktype)) end end elseif toktype == "close" then error("Unexpected ')'") elseif m.readmacros[toktype] then return m.readmacros[toktype](tok, toktype, port) else return tok end end -- body of read local tok1, toktype1 = port:next_token() if tok1 == nil then return nil else return read_ahead(tok1, toktype1) end end function m.read_string (str) return m.read(m.inport(str, "string")) end --- return m