--- lam.read local read = {} local utf8 = require "utf8" local types = require "types" table.unpack = table.unpack or unpack local string_to_table = function(str) local tbl = {} for p, c in utf8.codes(str) do table.insert(tbl, c) end return tbl end local consume_whitespace = function (chars) local s = {"\\"} -- accumulator for if there's no \n while chars[1]:match("[ \t]") do table.insert(s, util.pop(chars)) end if chars[1] ~= "\n" then table.insert(s, chars[1]) return table.concat(s), chars end while chars[1]:match("%s") do util.pop(chars) end return chars[1], chars end local consume_hexvalue = function (chars) local u8ch = {} repeat local c = util.pop(chars) table.insert(u8ch,c) until c == ";" table.remove(u8ch) -- remove semicolon return utf8.char(tonumber(table.concat(u8ch), 16)), chars end local string_bslash = { -- backslash characters a = "\a", b = "\b", t = "\t", n = "\n", r = "\r", ["\""] = "\"", ["\\"] = "\\", ["|"] = "|", -- \* * : nothing [" "] = consume_whitespace, ["\t"] = consuem_whitespace, ["\n"] = consume_whitespace, -- \x; : specified character x = consume_hexvalue, } local consume_string = function(chars) local str = {} repeat local c = util.pop(chars) if c == "\\" then c = chars[1] if string_bslash[c] then if type(string_bslash[c]) == "function" then c, chars = string_bslash[c](chars) table.insert(str, c) else table.insert( str, string_bslash[c]) end else table.insert(str, "\\"..c) end util.pop(chars) elseif c == "\"" then break else table.insert(str, c) end until #chars == 0 return table.concat(str), chars end read.tokenize = function (program) if not program or program == "" then return nil end local tokens = {} local token = "" local token_type = nil local push_token = function (type, tok) type = type or token_type token = tok or token if token:len() > 0 then table.insert(tokens, { type = type, value = token, }) token = "" token_type = nil end end local chars = string_to_table(program) while #chars > 0 do local c = util.pop(chars) if c == "(" then push_token() push_token("begin_list", "(") elseif c == ")" then push_token() push_token("end_list", ")") elseif c:match("%s") then -- whitespace push_token() elseif c == "\"" then -- string str, chars = consume_string(chars) push_token("string", str) elseif c:match("%d") then -- numbers token = token .. c token_type = token_type or "number" else token = token .. c token_type = token_type or "symbol" end end push_token() return tokens end read.tokentable = { string = function (tok) return types.String(tok.value) end, number = function (tok) return types.Number(tok.value) end, symbol = function (tok) return types.Symbol(tok.value) end, } read.parse = function (tokens) assert(next(tokens), "Unexpected EOF") tok = util.pop(tokens) if tok.value == "(" then local L = {} while tokens[1].value ~= ")" do table.insert(L, read.parse(tokens)) end util.pop(tokens) -- remove ")" return types.List(table.unpack(L)) elseif tok.value == ")" then error("Unexpected ')'") elseif read.tokentable[tok.type] then return read.tokentable[tok.type](tok) else error("Bad token: '" .. tok.value .. "'") end end read.read = function (program) return read.parse(read.tokenize(program)) end --- return setmetatable(read, { __call = function(_, program) return read.read(program) end, })