--- lam.read local read = {} local type = require "type" local utf8 = require "utf8" local util = require "util" local unpack = table.unpack or unpack function program_characters (program) local chars = {} for pos, code in utf8.codes(program) do table.insert(chars, code) end return chars end local function consume_string_whitespace (chars) -- \* * : nothing local s = {"\\"} while chars[1]:match("[ \t]") do table.insert(s, util.pop(chars)) end if chars[1] ~= "\n" then table.insert(s, chars[1]) return table.concat(s), chars end while chars[1]:match("%s") do util.pop(chars) end return chars[1], chars end local function consume_string_hexvalue (chars) -- \x; : specified character local u8ch = {} repeat local c = util.pop(chars) table.insert(u8ch, c) until c == ";" table.remove(u8ch) -- remove semicolon return utf8.char(tonumber(table.concat(u8ch), 16)), chars end local function consume_string (chars) local str = {} local backslash = { a = "\a", b = "\b", t = "\t", n = "\n", r = "\r", ["\""] = "\"", ["\\"] = "\\", ["|"] = "|", [" "] = consume_string_whitespace, ["\t"] = consume_string_whitespace, ["\n"] = consume_string_whitespace, x = consume_string_hexvalue, } util.pop(chars) -- throw initial " away repeat local c = util.pop(chars) if c == [[\]] then c = chars[1] if backlash[c] then if type(backslash[c]) == "function" then c, chars = backslash[c](chars) table.insert(str, c) else table.insert(str, backlash[c]) end else table.insert(str, "\\"..c) end util.pop(chars) elseif c == [["]] then break else table.insert(str, c) end until #chars == 0 return table.concat(str), "string", chars end local function consume_token (chars) local tok = {} while chars[1]:match("[^%s()\"#'`,@;]") do table.insert(tok, util.pop(chars)) end return table.concat(tok), chars end local consume_symbol = consume_token local function consume_number (chars) local digits, chars = consume_token(chars) local num = tonumber(digits) if num == nil then error("Bad number: " .. num) end return num, chars end local function consume_whitespace (chars) while chars[1]:match("%s") do util.pop(chars) end return chars end local function consume_comment (chars) local comment = {} repeat table.insert(comment, util.pop(chars)) until #chars == 0 or chars[1]:match("\n") return table.concat(comment), "comment", chars end --- API read.readtable = { ["("] = function(chars) return util.pop(chars), "begin_list", chars end, [")"] = function(chars) return util.pop(chars), "end_list", chars end, ["\""] = consume_string, [";"] = consume_comment, -- ["#"] = -- ["'"] = -- ["`"] = -- [","] = } function read.scan (chars) local chars = chars return function() if #chars == 0 then return nil end local token, toktype = "", nil while true do if read.readtable[chars[1]] then token, toktype, chars = read.readtable[chars[1]](chars) return token, toktype elseif chars[1]:match("%s") then chars = consume_whitespace(chars) elseif chars[1]:match("%d") then token, chars = consume_number(chars) return token, "number" else token, chars = consume_symbol(chars) return token, "symbol" end end end end function read.tokenize (program) if not program or #program == 0 then return nil end local tokens = {} for token, toktype in read.scan(program_characters(program)) do table.insert(tokens, {type = toktype, value = token}) end return tokens end function read.parse (tokens) if not next(tokens) then return nil end local token = util.pop(tokens) if token.value == "(" then local L = {} while tokens[1].value ~= ")" do table.insert(L, read.parse(tokens)) end util.pop(tokens) -- remove the final ")" return type.List(L) elseif token.value == ")" then error("Unexpected ')'") else return token.value end end function read.read (program) return read.parse(read.tokenize(program)) end --- return read