From a72ff678da253fce46e8e4648f6e4cf5ce1ea9b4 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Sun, 10 Mar 2024 21:39:53 -0500 Subject: uh new start --- read.lua | 297 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 153 insertions(+), 144 deletions(-) (limited to 'read.lua') diff --git a/read.lua b/read.lua index 00a2d2a..bba4ffa 100644 --- a/read.lua +++ b/read.lua @@ -1,173 +1,182 @@ --- lam.read local read = {} +local type = require "type" local utf8 = require "utf8" -local types = require "types" -table.unpack = table.unpack or unpack - -local string_to_table = - function(str) - local tbl = {} - for p, c in utf8.codes(str) do - table.insert(tbl, c) - end - return tbl - end +local util = require "util" +local unpack = table.unpack or unpack -local consume_whitespace = - function (chars) - local s = {"\\"} -- accumulator for if there's no \n - while chars[1]:match("[ \t]") do - table.insert(s, util.pop(chars)) - end - if chars[1] ~= "\n" then - table.insert(s, chars[1]) - return table.concat(s), chars - end - while chars[1]:match("%s") do - util.pop(chars) - end - return chars[1], chars +function program_characters (program) + local chars = {} + for pos, code in utf8.codes(program) do + table.insert(chars, code) end + return chars +end -local consume_hexvalue = - function (chars) - local u8ch = {} - repeat - local c = util.pop(chars) - table.insert(u8ch,c) - until c == ";" - table.remove(u8ch) -- remove semicolon - return - utf8.char(tonumber(table.concat(u8ch), 16)), - chars +local function consume_string_whitespace (chars) + -- \* * : nothing + local s = {"\\"} + while chars[1]:match("[ \t]") do + table.insert(s, util.pop(chars)) end + if chars[1] ~= "\n" then + table.insert(s, chars[1]) + return table.concat(s), chars + end + while chars[1]:match("%s") do + util.pop(chars) + end + return chars[1], chars +end -local string_bslash = { -- backslash characters - a = "\a", - b = "\b", - t = "\t", - n = "\n", - r = "\r", - ["\""] = "\"", - ["\\"] = "\\", - ["|"] = "|", - -- \* * : nothing - [" "] = consume_whitespace, - ["\t"] = consuem_whitespace, - ["\n"] = consume_whitespace, +local function consume_string_hexvalue (chars) -- \x; : specified character - x = consume_hexvalue, -} + local u8ch = {} + repeat + local c = util.pop(chars) + table.insert(u8ch, c) + until c == ";" + table.remove(u8ch) -- remove semicolon + return utf8.char(tonumber(table.concat(u8ch), 16)), chars +end -local consume_string = - function(chars) - local str = {} - repeat - local c = util.pop(chars) - if c == "\\" then - c = chars[1] - if string_bslash[c] then - if type(string_bslash[c]) == "function" - then - c, chars = - string_bslash[c](chars) - table.insert(str, c) - else - table.insert( - str, - string_bslash[c]) - end +local function consume_string (chars) + local str = {} + local backslash = { + a = "\a", + b = "\b", + t = "\t", + n = "\n", + r = "\r", + ["\""] = "\"", + ["\\"] = "\\", + ["|"] = "|", + [" "] = consume_string_whitespace, + ["\t"] = consume_string_whitespace, + ["\n"] = consume_string_whitespace, + x = consume_string_hexvalue, + } + util.pop(chars) -- throw initial " away + repeat + local c = util.pop(chars) + if c == [[\]] then + c = chars[1] + if backlash[c] then + if type(backslash[c]) == "function" then + c, chars = backslash[c](chars) + table.insert(str, c) else - table.insert(str, "\\"..c) + table.insert(str, backlash[c]) end - util.pop(chars) - elseif c == "\"" then - break else - table.insert(str, c) + table.insert(str, "\\"..c) end - until #chars == 0 - return table.concat(str), chars + util.pop(chars) + elseif c == [["]] then + break + else + table.insert(str, c) + end + until #chars == 0 + return table.concat(str), "string", chars +end + +local function consume_token (chars) + local tok = {} + while chars[1]:match("[^%s()\"#'`,@;]") do + table.insert(tok, util.pop(chars)) end + return table.concat(tok), chars +end -read.tokenize = - function (program) - if not program or program == "" then return nil end - local tokens = {} - local token = "" - local token_type = nil - - local push_token = - function (type, tok) - type = type or token_type - token = tok or token - if token:len() > 0 then - table.insert(tokens, { - type = type, - value = token, }) - token = "" - token_type = nil - end - end +local consume_symbol = consume_token + +local function consume_number (chars) + local digits, chars = consume_token(chars) + local num = tonumber(digits) + if num == nil then error("Bad number: " .. num) end + return num, chars +end + +local function consume_whitespace (chars) + while chars[1]:match("%s") do util.pop(chars) end + return chars +end + +local function consume_comment (chars) + local comment = {} + repeat + table.insert(comment, util.pop(chars)) + until #chars == 0 or chars[1]:match("\n") + return table.concat(comment), "comment", chars +end + +--- API - local chars = string_to_table(program) - while #chars > 0 do - local c = util.pop(chars) - if c == "(" then - push_token() - push_token("begin_list", "(") - elseif c == ")" then - push_token() - push_token("end_list", ")") - elseif c:match("%s") then -- whitespace - push_token() - elseif c == "\"" then -- string - str, chars = consume_string(chars) - push_token("string", str) - elseif c:match("%d") then -- numbers - token = token .. c - token_type = token_type or "number" +read.readtable = { + ["("] = function(chars) return util.pop(chars), "begin_list", chars end, + [")"] = function(chars) return util.pop(chars), "end_list", chars end, + ["\""] = consume_string, + [";"] = consume_comment, + -- ["#"] = + -- ["'"] = + -- ["`"] = + -- [","] = +} + +function read.scan (chars) + local chars = chars + return function() + if #chars == 0 then return nil end + local token, toktype = "", nil + while true do + if read.readtable[chars[1]] then + token, toktype, chars = + read.readtable[chars[1]](chars) + return token, toktype + elseif chars[1]:match("%s") then + chars = consume_whitespace(chars) + elseif chars[1]:match("%d") then + token, chars = consume_number(chars) + return token, "number" else - token = token .. c - token_type = token_type or "symbol" + token, chars = consume_symbol(chars) + return token, "symbol" end end - push_token() - return tokens end +end -read.tokentable = { - string = function (tok) return types.String(tok.value) end, - number = function (tok) return types.Number(tok.value) end, - symbol = function (tok) return types.Symbol(tok.value) end, -} +function read.tokenize (program) + if not program or #program == 0 then return nil end + local tokens = {} + for token, toktype in read.scan(program_characters(program)) do + table.insert(tokens, {type = toktype, value = token}) + end + return tokens +end -read.parse = - function (tokens) - assert(next(tokens), "Unexpected EOF") - tok = util.pop(tokens) - if tok.value == "(" then - local L = {} - while tokens[1].value ~= ")" do - table.insert(L, read.parse(tokens)) - end - util.pop(tokens) -- remove ")" - return types.List(table.unpack(L)) - elseif tok.value == ")" then - error("Unexpected ')'") - elseif read.tokentable[tok.type] then - return read.tokentable[tok.type](tok) - else - error("Bad token: '" .. tok.value .. "'") +function read.parse (tokens) + if not next(tokens) then return nil end + local token = util.pop(tokens) + if token.value == "(" then + local L = {} + while tokens[1].value ~= ")" do + table.insert(L, read.parse(tokens)) end + util.pop(tokens) -- remove the final ")" + return type.List(L) + elseif token.value == ")" then + error("Unexpected ')'") + else + return token.value end +end -read.read = function (program) return read.parse(read.tokenize(program)) end +function read.read (program) + return read.parse(read.tokenize(program)) +end --- -return setmetatable(read, { __call = - function(_, program) - return read.read(program) - end, -}) +return read -- cgit 1.4.1-21-gabe81