diff options
author | Case Duckworth | 2024-03-01 22:50:34 -0600 |
---|---|---|
committer | Case Duckworth | 2024-03-01 22:50:34 -0600 |
commit | f7d58cf08bbfe319f7df45156d0d3d2e07624edc (patch) | |
tree | f25653bb96be0c5976ce563c21d1e9257459e7a8 | |
parent | Change types to lowercase; add todos (diff) | |
download | lam-f7d58cf08bbfe319f7df45156d0d3d2e07624edc.tar.gz lam-f7d58cf08bbfe319f7df45156d0d3d2e07624edc.zip |
Rewrite read
Now works with strings and numbers and does \x...; escapes Does not yet do symbol conversion or newline escapes
-rw-r--r-- | read.lua | 181 |
1 files changed, 133 insertions, 48 deletions
diff --git a/read.lua b/read.lua index 43b272a..c89261c 100644 --- a/read.lua +++ b/read.lua | |||
@@ -1,73 +1,158 @@ | |||
1 | --- lam.read | 1 | --- lam.read |
2 | 2 | ||
3 | local read = {} | 3 | local read = {} |
4 | local utf8 = require "utf8" | ||
5 | table.unpack = table.unpack or unpack | ||
4 | 6 | ||
5 | local util = require "util" | 7 | local string_to_table = |
6 | 8 | function(str) | |
7 | function read.tokenize (str) | 9 | local tbl = {} |
8 | --[[ Convert a string of characters into a list of tokens ]] | 10 | for p, c in utf8.codes(str) do |
9 | assert(str, "No program given") | 11 | table.insert(tbl, c) |
10 | local tbl = {} | ||
11 | local word = "" | ||
12 | local push_word = | ||
13 | function () | ||
14 | if word:len() > 0 then | ||
15 | table.insert(tbl, word) | ||
16 | word = "" | ||
17 | end | ||
18 | end | 12 | end |
13 | return tbl | ||
14 | end | ||
19 | 15 | ||
20 | for c = 1, #str do | 16 | local bslash = { -- backslash characters |
21 | char = string.char(str:byte(c)) | 17 | a = "\a", |
22 | if char == " " or char == "\t" or char == "\n" then | 18 | b = "\b", |
23 | push_word() | 19 | t = "\t", |
24 | elseif char == "(" then | 20 | n = "\n", |
25 | push_word() | 21 | r = "\r", |
26 | table.insert(tbl, "(") | 22 | ["\""] = "\"", |
27 | elseif char == ")" then | 23 | ["\\"] = "\\", |
28 | push_word() | 24 | ["|"] = "|", |
29 | table.insert(tbl, ")") | 25 | |
30 | else | 26 | -- TODO: whitespace |
31 | word = word .. char | 27 | -- \<intraline whitespace>*<line ending> <intraline whitespace>* : |
32 | end | 28 | -- nothing |
29 | |||
30 | x = -- \x<hex scalar value>; : specified character | ||
31 | function (chars) | ||
32 | local u8ch = {} | ||
33 | repeat | ||
34 | local c = util.pop(chars) | ||
35 | table.insert(u8ch,c) | ||
36 | until c == ";" | ||
37 | table.remove(u8ch) -- remove semicolon | ||
38 | return | ||
39 | utf8.char(tonumber(table.concat(u8ch), 16)), | ||
40 | chars | ||
41 | end, | ||
42 | } | ||
43 | |||
44 | local consume_string = | ||
45 | function(chars) | ||
46 | local str = {} | ||
47 | repeat | ||
48 | local c = util.pop(chars) | ||
49 | if c == "\\" then | ||
50 | c = util.pop(chars) | ||
51 | if bslash[c] then | ||
52 | if type(bslash[c]) == "function" then | ||
53 | c, chars = bslash[c](chars) | ||
54 | table.insert(str, c) | ||
55 | else | ||
56 | table.insert(str, bslash[c]) | ||
57 | end | ||
58 | else | ||
59 | table.insert(str, "\\"..c) | ||
60 | end | ||
61 | elseif c == "\"" then | ||
62 | break | ||
63 | else | ||
64 | table.insert(str, c) | ||
65 | end | ||
66 | until #chars == 0 | ||
67 | return table.concat(str), chars | ||
33 | end | 68 | end |
34 | push_word() | ||
35 | return tbl | ||
36 | end | ||
37 | 69 | ||
38 | function read.read (str) | 70 | read.tokenize = |
39 | -- [[ Read a scheme expression from a string ]] | 71 | function (program) |
72 | if not program or program == "" then return nil end | ||
73 | local tokens = {} | ||
74 | local token = "" | ||
75 | local token_type = nil | ||
76 | |||
77 | local push_token = | ||
78 | function (type, tok) | ||
79 | type = type or token_type | ||
80 | token = tok or token | ||
81 | if token:len() > 0 then | ||
82 | table.insert(tokens, { | ||
83 | type = type, | ||
84 | value = token, }) | ||
85 | token = "" | ||
86 | token_type = nil | ||
87 | end | ||
88 | end | ||
40 | 89 | ||
41 | local function Atom (token) | 90 | local chars = string_to_table(program) |
42 | local n = tonumber(token) | 91 | while #chars > 0 do |
43 | if n then return n | 92 | local c = util.pop(chars) |
44 | else return tostring(token) | 93 | if c == "(" then |
94 | push_token() | ||
95 | push_token("begin_list", "(") | ||
96 | elseif c == ")" then | ||
97 | push_token() | ||
98 | push_token("end_list", ")") | ||
99 | elseif c:match("%s") then -- whitespace | ||
100 | push_token() | ||
101 | elseif c == "\"" then -- string | ||
102 | str, chars = consume_string(chars) | ||
103 | push_token("string", str) | ||
104 | elseif c:match("%d") then -- numbers | ||
105 | token = token .. c | ||
106 | token_type = token_type or "number" | ||
107 | else | ||
108 | token = token .. c | ||
109 | token_type = token_type or "symbol" | ||
110 | end | ||
45 | end | 111 | end |
112 | push_token() | ||
113 | return tokens | ||
46 | end | 114 | end |
47 | 115 | ||
48 | local function read_tokens (tokens) | 116 | read.tokentable = { |
49 | --[[ Read a list of tokens from `tokenize' ]] | 117 | string = |
118 | function (tok) | ||
119 | return tok.value | ||
120 | end, | ||
121 | number = | ||
122 | function (tok) | ||
123 | return tonumber(tok.value) | ||
124 | end, | ||
125 | symbol = | ||
126 | function (tok) -- TODO need to return a Symbol from types... | ||
127 | return tok.value | ||
128 | end, | ||
129 | } | ||
130 | |||
131 | read.parse = | ||
132 | function (tokens) | ||
50 | assert(next(tokens), "Unexpected EOF") | 133 | assert(next(tokens), "Unexpected EOF") |
51 | token = util.pop(tokens) | 134 | tok = util.pop(tokens) |
52 | if token == "(" then | 135 | if tok.value == "(" then |
53 | local L = {} | 136 | local L = {} |
54 | while tokens[1] ~= ")" do | 137 | while tokens[1].value ~= ")" do |
55 | table.insert(L, read_tokens(tokens)) | 138 | table.insert(L, read.parse(tokens)) |
56 | end | 139 | end |
57 | util.pop(tokens) -- remove ")" | 140 | util.pop(tokens) -- remove ")" |
58 | return L | 141 | return L |
59 | elseif token == ")" then | 142 | elseif tok.value == ")" then |
60 | error("Unexpected ')'") | 143 | error("Unexpected ')'") |
144 | elseif read.tokentable[tok.type] then | ||
145 | return read.tokentable[tok.type](tok) | ||
61 | else | 146 | else |
62 | return Atom(token) | 147 | error("Bad token: '" .. tok.value .. "'") |
63 | end | 148 | end |
64 | end | 149 | end |
65 | 150 | ||
66 | return read_tokens(read.tokenize(str)) | 151 | read.read = function (program) return read.parse(read.tokenize(program)) end |
67 | end | ||
68 | 152 | ||
153 | --- | ||
69 | return setmetatable(read, { __call = | 154 | return setmetatable(read, { __call = |
70 | function(_, str) | 155 | function(_, program) |
71 | return read.read(str) | 156 | return read.read(program) |
72 | end, | 157 | end, |
73 | }) | 158 | }) |