about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2024-03-01 22:50:34 -0600
committerCase Duckworth2024-03-01 22:50:34 -0600
commitf7d58cf08bbfe319f7df45156d0d3d2e07624edc (patch)
treef25653bb96be0c5976ce563c21d1e9257459e7a8
parentChange types to lowercase; add todos (diff)
downloadlam-f7d58cf08bbfe319f7df45156d0d3d2e07624edc.tar.gz
lam-f7d58cf08bbfe319f7df45156d0d3d2e07624edc.zip
Rewrite read
Now works with strings and numbers and does \x...; escapes
Does not yet do symbol conversion or newline escapes
-rw-r--r--read.lua181
1 files changed, 133 insertions, 48 deletions
diff --git a/read.lua b/read.lua index 43b272a..c89261c 100644 --- a/read.lua +++ b/read.lua
@@ -1,73 +1,158 @@
1--- lam.read 1--- lam.read
2 2
3local read = {} 3local read = {}
4local utf8 = require "utf8"
5table.unpack = table.unpack or unpack
4 6
5local util = require "util" 7local string_to_table =
6 8 function(str)
7function read.tokenize (str) 9 local tbl = {}
8 --[[ Convert a string of characters into a list of tokens ]] 10 for p, c in utf8.codes(str) do
9 assert(str, "No program given") 11 table.insert(tbl, c)
10 local tbl = {}
11 local word = ""
12 local push_word =
13 function ()
14 if word:len() > 0 then
15 table.insert(tbl, word)
16 word = ""
17 end
18 end 12 end
13 return tbl
14 end
19 15
20 for c = 1, #str do 16local bslash = { -- backslash characters
21 char = string.char(str:byte(c)) 17 a = "\a",
22 if char == " " or char == "\t" or char == "\n" then 18 b = "\b",
23 push_word() 19 t = "\t",
24 elseif char == "(" then 20 n = "\n",
25 push_word() 21 r = "\r",
26 table.insert(tbl, "(") 22 ["\""] = "\"",
27 elseif char == ")" then 23 ["\\"] = "\\",
28 push_word() 24 ["|"] = "|",
29 table.insert(tbl, ")") 25
30 else 26 -- TODO: whitespace
31 word = word .. char 27 -- \<intraline whitespace>*<line ending> <intraline whitespace>* :
32 end 28 -- nothing
29
30 x = -- \x<hex scalar value>; : specified character
31 function (chars)
32 local u8ch = {}
33 repeat
34 local c = util.pop(chars)
35 table.insert(u8ch,c)
36 until c == ";"
37 table.remove(u8ch) -- remove semicolon
38 return
39 utf8.char(tonumber(table.concat(u8ch), 16)),
40 chars
41 end,
42}
43
44local consume_string =
45 function(chars)
46 local str = {}
47 repeat
48 local c = util.pop(chars)
49 if c == "\\" then
50 c = util.pop(chars)
51 if bslash[c] then
52 if type(bslash[c]) == "function" then
53 c, chars = bslash[c](chars)
54 table.insert(str, c)
55 else
56 table.insert(str, bslash[c])
57 end
58 else
59 table.insert(str, "\\"..c)
60 end
61 elseif c == "\"" then
62 break
63 else
64 table.insert(str, c)
65 end
66 until #chars == 0
67 return table.concat(str), chars
33 end 68 end
34 push_word()
35 return tbl
36end
37 69
38function read.read (str) 70read.tokenize =
39 -- [[ Read a scheme expression from a string ]] 71 function (program)
72 if not program or program == "" then return nil end
73 local tokens = {}
74 local token = ""
75 local token_type = nil
76
77 local push_token =
78 function (type, tok)
79 type = type or token_type
80 token = tok or token
81 if token:len() > 0 then
82 table.insert(tokens, {
83 type = type,
84 value = token, })
85 token = ""
86 token_type = nil
87 end
88 end
40 89
41 local function Atom (token) 90 local chars = string_to_table(program)
42 local n = tonumber(token) 91 while #chars > 0 do
43 if n then return n 92 local c = util.pop(chars)
44 else return tostring(token) 93 if c == "(" then
94 push_token()
95 push_token("begin_list", "(")
96 elseif c == ")" then
97 push_token()
98 push_token("end_list", ")")
99 elseif c:match("%s") then -- whitespace
100 push_token()
101 elseif c == "\"" then -- string
102 str, chars = consume_string(chars)
103 push_token("string", str)
104 elseif c:match("%d") then -- numbers
105 token = token .. c
106 token_type = token_type or "number"
107 else
108 token = token .. c
109 token_type = token_type or "symbol"
110 end
45 end 111 end
112 push_token()
113 return tokens
46 end 114 end
47 115
48 local function read_tokens (tokens) 116read.tokentable = {
49 --[[ Read a list of tokens from `tokenize' ]] 117 string =
118 function (tok)
119 return tok.value
120 end,
121 number =
122 function (tok)
123 return tonumber(tok.value)
124 end,
125 symbol =
126 function (tok) -- TODO need to return a Symbol from types...
127 return tok.value
128 end,
129}
130
131read.parse =
132 function (tokens)
50 assert(next(tokens), "Unexpected EOF") 133 assert(next(tokens), "Unexpected EOF")
51 token = util.pop(tokens) 134 tok = util.pop(tokens)
52 if token == "(" then 135 if tok.value == "(" then
53 local L = {} 136 local L = {}
54 while tokens[1] ~= ")" do 137 while tokens[1].value ~= ")" do
55 table.insert(L, read_tokens(tokens)) 138 table.insert(L, read.parse(tokens))
56 end 139 end
57 util.pop(tokens) -- remove ")" 140 util.pop(tokens) -- remove ")"
58 return L 141 return L
59 elseif token == ")" then 142 elseif tok.value == ")" then
60 error("Unexpected ')'") 143 error("Unexpected ')'")
144 elseif read.tokentable[tok.type] then
145 return read.tokentable[tok.type](tok)
61 else 146 else
62 return Atom(token) 147 error("Bad token: '" .. tok.value .. "'")
63 end 148 end
64 end 149 end
65 150
66 return read_tokens(read.tokenize(str)) 151read.read = function (program) return read.parse(read.tokenize(program)) end
67end
68 152
153---
69return setmetatable(read, { __call = 154return setmetatable(read, { __call =
70 function(_, str) 155 function(_, program)
71 return read.read(str) 156 return read.read(program)
72 end, 157 end,
73}) 158})