--- lam.read local read = {} local utf8 = require "utf8" table.unpack = table.unpack or unpack local string_to_table = function(str) local tbl = {} for p, c in utf8.codes(str) do table.insert(tbl, c) end return tbl end local bslash = { -- backslash characters a = "\a", b = "\b", t = "\t", n = "\n", r = "\r", ["\""] = "\"", ["\\"] = "\\", ["|"] = "|", -- TODO: whitespace -- \* * : -- nothing x = -- \x; : specified character function (chars) local u8ch = {} repeat local c = util.pop(chars) table.insert(u8ch,c) until c == ";" table.remove(u8ch) -- remove semicolon return utf8.char(tonumber(table.concat(u8ch), 16)), chars end, } local consume_string = function(chars) local str = {} repeat local c = util.pop(chars) if c == "\\" then c = util.pop(chars) if bslash[c] then if type(bslash[c]) == "function" then c, chars = bslash[c](chars) table.insert(str, c) else table.insert(str, bslash[c]) end else table.insert(str, "\\"..c) end elseif c == "\"" then break else table.insert(str, c) end until #chars == 0 return table.concat(str), chars end read.tokenize = function (program) if not program or program == "" then return nil end local tokens = {} local token = "" local token_type = nil local push_token = function (type, tok) type = type or token_type token = tok or token if token:len() > 0 then table.insert(tokens, { type = type, value = token, }) token = "" token_type = nil end end local chars = string_to_table(program) while #chars > 0 do local c = util.pop(chars) if c == "(" then push_token() push_token("begin_list", "(") elseif c == ")" then push_token() push_token("end_list", ")") elseif c:match("%s") then -- whitespace push_token() elseif c == "\"" then -- string str, chars = consume_string(chars) push_token("string", str) elseif c:match("%d") then -- numbers token = token .. c token_type = token_type or "number" else token = token .. c token_type = token_type or "symbol" end end push_token() return tokens end read.tokentable = { string = function (tok) return tok.value end, number = function (tok) return tonumber(tok.value) end, symbol = function (tok) -- TODO need to return a Symbol from types... return tok.value end, } read.parse = function (tokens) assert(next(tokens), "Unexpected EOF") tok = util.pop(tokens) if tok.value == "(" then local L = {} while tokens[1].value ~= ")" do table.insert(L, read.parse(tokens)) end util.pop(tokens) -- remove ")" return L elseif tok.value == ")" then error("Unexpected ')'") elseif read.tokentable[tok.type] then return read.tokentable[tok.type](tok) else error("Bad token: '" .. tok.value .. "'") end end read.read = function (program) return read.parse(read.tokenize(program)) end --- return setmetatable(read, { __call = function(_, program) return read.read(program) end, })