--- lam.read local m = {} local type = require("type") local eof, input_port = type.eof, type.input_port local util = require("util") local error, pop = util.error, util.pop local utf8_char = require("utf8").char local token_separators = "[%s#()\"'`,@;]" local function consume_token (cs) local tok = {} while #cs > 0 and not cs[1]:match(token_separators) do local c = pop(cs) table.insert(tok, c) end return table.concat(tok), cs end ---[[ READ TABLE ]]--- --- helper functions local function consume_unquote (cs) pop(cs) -- remove ',' if cs[1] == "@" then pop(cs) -- remove '@' return ",@", "quote", cs else return ",", "quote", cs end end local function consume_comment (cs) local comment = {} while #cs > 0 and not cs[1]:match("\n") do table.insert(comment, pop(cs)) end return table.concat(comment), "comment", cs end local function consume_literal (cs) local tok -- read '#\ ' and such correctly if cs[2] == "\\" then if not cs[3] then error("bad literal", "#\\") end if cs[3]:match(token_separators) then pop(cs) -- remove '\' pop(cs) -- remove next character return type.character(cs[1]) end end pop(cs) -- discard '#' ... tok, cs = consume_token(cs) tok = "#" .. tok -- ... then put it back print(tok) local val if m.readtable.literals.lit[tok] ~= nil then val = m.readtable.literals.lit[tok] else for re, fn in pairs(m.readtable.literals.regex) do if tok:match(re) then val = fn(tok) end end end if val == nil then error("bad literal", tok) end return val, "literal", cs end local function consume_whitespace (cs) while #cs > 0 and cs[1]:match("%s") do pop(cs) end return false, nil, cs end local function consume_number_etc (cs) -- Since numbers can start with +, -, and ., those symbols and ... are -- handled along with numbers. local tok tok, cs = consume_token(cs) if tok:match("^[-+]$") or tok == "..." then return tok, "symbol", cs elseif tok == "." then return tok, "dot", cs else -- number local n = tonumber(tok) if not n then error("bad number", n) end return n, "number", cs end end -- strings local function consume_string_whitespace (cs) -- \* * : nothing local s = {"\\"} while cs[1]:match("[ \t]") do table.insert(s, pop(cs)) end if cs[1] ~= "\n" then table.insert(s, cs[1]) return table.concat(s), cs end while cs[1]:match("%s") do pop(cs) end return cs[1], cs end local function consume_string_hexvalue (cs) -- \x; : specified character local u8ch = {} repeat local c = pop(cs) table.insert(u8ch, c) until c == ";" table.remove(u8ch) -- discard ';' return utf8_char(tonumber(table.concat(u8ch), 16)), cs end local function consume_string (cs) local str = {} local escapes = { a = "\a", b = "\b", t = "\t", n = "\n", r = "\r", ["\""] = "\"", ["\\"] = "\\", ["|"] = "|", [" "] = consume_string_whitespace, ["\t"] = consume_string_whitespace, ["\n"] = consume_string_whitespace, x = consume_string_hexvalue, } pop(cs) -- discard '"' repeat local c = pop(cs) if c == "\\" then c = cs[1] if escapes[c] then if type.luatype(escapes[c]) == "function" then c, cs = escapes[c](cs) table.insert(str, c) else table.insert(str, escapes[c]) end else table.insert(str, "\\"..c) end pop(cs) elseif c == "\"" then break else table.insert(str, c) end until #cs == 0 return type.string(str), "string", cs end local function consume_char_as (token_type) -- return a function that pops a character and returns it with -- TOKEN_TYPE return function (cs) return pop(cs), token_type, cs end end -- each function should take a list of characters and return the token, its -- type, and the rest of the characters m.readtable = {} m.readtable.chars = { ["("] = consume_char_as("open"), [")"] = consume_char_as("close"), ["'"] = consume_char_as("quote"), ["`"] = consume_char_as("quote"), [","] = consume_unquote, ["\""] = consume_string, [";"] = consume_comment, ["#"] = consume_literal, } m.readtable.regex = { ["%s"] = consume_whitespace, ["[%d.+-]"] = consume_number_etc, } m.readtable.default = -- default action if nothing else matches function (cs) local tok, cs = consume_token(cs) return tok, "symbol", cs end -- convenience function to make writing the regexen rules easier below local function based_num (base) return function (token) local n = tonumber(token:sub(3), base) if not n then error("bad number", token) end return n end end m.readtable.literals = { lit = { ["#t"] = true, ["#true"] = true, ["#f"] = false, ["#false"] = false, }, regex = { ["^#b"] = based_num(2), ["^#o"] = based_num(8), ["^#d"] = based_num(10), ["^#x"] = based_num(16), ["^#\\."] = function (tok) return type.character(tok:sub(3)) end, }, } -- add named characters for char, name in pairs(type.character_names) do m.readtable.literals.lit["#\\"..name] = type.character(char) end ---[[ READER MACROS ]]--- -- Each of these are named after the type of the token read and contain -- function taking (TOKEN, TYPE, PORT) and returning a lisp object m.readmacros = { close = function (token, _, _) error("unexpected", token) end, quote = function (token, _, port) local qs = { ["'"] = "quote", ["`"] = "quasiquote", [","] = "unquote", [",@"] = "unquote-splicing", } if not qs[token] then error("bad quote", token) end local Q = {qs[token]} table.insert(Q, m.read(port)) return type.list(Q) end, comment = function (_,_,_) return nil end, } ---[[ READ ]]--- function m.read (port) local function read_ahead(token, token_type) if token == eof then error("unexpected", token) end if token_type == "open" then -- this must be defined here because it calls read_ahead -- recursively. local L = {} repeat token, token_type = port:next(m.readtable) if token_type == "close" then return type.list(L) elseif token_type == "dot" then local fin = m.read(port) port:next(m.readtable) -- discard ')' return type.list(L, fin) else table.insert(L, read_ahead(token, token_type)) end until nil elseif m.readmacros[token_type] then return m.readmacros[token_type](token, token_type, port) else return token end end --- local token1, type1 = port:next(m.readtable) if token1 == eof then return eof else return read_ahead(token1, type1) end end function m.read_string (str) return m.read(input_port(str, "string")) end -------- return m