diff options
author | Case Duckworth | 2024-04-09 21:04:17 -0500 |
---|---|---|
committer | Case Duckworth | 2024-04-09 21:04:29 -0500 |
commit | 8ce2915e3c54598c2fda4fec0980ebfc2a3adf6e (patch) | |
tree | 124ef31663ed570bed358dffd9c861d10fabce7b /read.lua | |
parent | Uh (diff) | |
download | lam-8ce2915e3c54598c2fda4fec0980ebfc2a3adf6e.tar.gz lam-8ce2915e3c54598c2fda4fec0980ebfc2a3adf6e.zip |
Reorganization
Diffstat (limited to 'read.lua')
-rw-r--r-- | read.lua | 380 |
1 files changed, 159 insertions, 221 deletions
diff --git a/read.lua b/read.lua index 332c919..6d55e23 100644 --- a/read.lua +++ b/read.lua | |||
@@ -1,166 +1,29 @@ | |||
1 | --- lam.read | 1 | --- lam.read |
2 | 2 | ||
3 | local m = {} | 3 | local m = {} |
4 | local t = require "type" | 4 | local type = require("type") |
5 | local utf8 = require "utf8" | 5 | local port = require("port") |
6 | local pop = require("util").pop | 6 | local eof, input_port = port.eof, port.input_port |
7 | local util = require("util") | ||
8 | local constantly, error, pop = util.constantly, util.error, util.pop | ||
7 | 9 | ||
8 | -- TODO: | 10 | local token_separators = "[%s#()\"'`,@;]" |
9 | -- - string reading | ||
10 | -- - probably more | ||
11 | |||
12 | m.eof = setmetatable({}, { | ||
13 | __type = "EOF", | ||
14 | __tostring = function () return "#<eof>" end, | ||
15 | }) | ||
16 | |||
17 | local function inport_next_token (port) | ||
18 | local tok, toktype | ||
19 | while true do | ||
20 | if #port.line == 0 then | ||
21 | if port.file then | ||
22 | local ln = port.file:read() | ||
23 | if ln == nil then return m.eof end | ||
24 | port.line = m.tochars(ln) | ||
25 | else | ||
26 | return nil | ||
27 | end | ||
28 | end | ||
29 | tok, toktype, port.line = m.scan(port.line)() | ||
30 | port.line = port.line or {} | ||
31 | if tok ~= nil then return tok, toktype end | ||
32 | end | ||
33 | end | ||
34 | |||
35 | function m.inport (source, kind) | ||
36 | -- KIND can be one of "file", "string"; defaults to "file" | ||
37 | -- SOURCE is the name of the file or the string to read, or nil; if nil, | ||
38 | -- read from standard input. | ||
39 | local f, l | ||
40 | local k = kind or "file" | ||
41 | if source then | ||
42 | if k == "file" then | ||
43 | f = io.open(source, "r") | ||
44 | elseif k == "string" then | ||
45 | l = m.tochars(source) | ||
46 | end | ||
47 | else | ||
48 | -- KIND is ignored here | ||
49 | f = io.input() | ||
50 | end | ||
51 | local t = { | ||
52 | file = f, | ||
53 | filename = source, | ||
54 | kind = kind, | ||
55 | line = l or {}, | ||
56 | next_token = inport_next_token, | ||
57 | } | ||
58 | if t.file then t.close = function (self) self.file:close() end; end | ||
59 | local mt = { | ||
60 | __type = "port", | ||
61 | __tostring = | ||
62 | function (self) | ||
63 | return string.format("#<port %s>", | ||
64 | self.file or "(string)") | ||
65 | end, | ||
66 | } | ||
67 | return setmetatable(t, mt) | ||
68 | end | ||
69 | |||
70 | function m.tochars (s) | ||
71 | local chars = {} | ||
72 | for _, code in utf8.codes(s) do | ||
73 | table.insert(chars, code) | ||
74 | end | ||
75 | return chars | ||
76 | end | ||
77 | |||
78 | --- Consumers | ||
79 | -- These take a table of characters (cs) and return: | ||
80 | -- a token, its type, and the rest of the characters | ||
81 | |||
82 | local token_separator = "[^%s#()\"'`,@;]" | ||
83 | 11 | ||
84 | local function consume_token (cs) | 12 | local function consume_token (cs) |
85 | local token = {} | 13 | local tok = {} |
86 | while #cs > 0 and cs[1]:match(token_separator) do | 14 | while #cs > 0 and not cs[1]:match(token_separators) do |
87 | table.insert(token, pop(cs)) | 15 | local c = pop(cs) |
16 | table.insert(tok, c) | ||
88 | end | 17 | end |
89 | return table.concat(token), "symbol", cs | 18 | return table.concat(tok), cs |
90 | end | 19 | end |
91 | 20 | ||
92 | local function consume_whitespace (cs) | 21 | ---[[ READ TABLE ]]--- |
93 | while #cs > 0 and cs[1]:match("%s") do pop(cs) end | ||
94 | return nil, nil, cs | ||
95 | end | ||
96 | |||
97 | local function consume_comment (cs) | ||
98 | local comment = {} | ||
99 | repeat table.insert(comment, pop(cs)) | ||
100 | until #cs == 0 or cs[1]:match("\n") | ||
101 | return table.concat(comment), "comment", cs | ||
102 | end | ||
103 | |||
104 | local function idf (x) | ||
105 | return function () return x end | ||
106 | end | ||
107 | |||
108 | local function numf (base) | ||
109 | return function (token) | ||
110 | local n = tonumber(token:sub(3), base) | ||
111 | assert(n, "Can't read number: " .. token) | ||
112 | return n | ||
113 | end | ||
114 | end | ||
115 | |||
116 | local literals = { | ||
117 | literal = { | ||
118 | ["#t"] = idf(true), | ||
119 | ["#true"] = idf(true), | ||
120 | ["#f"] = idf(false), | ||
121 | ["#false"] = idf(false), | ||
122 | ["#\\space"] = idf(t.character(" ")), | ||
123 | ["#\\tab"] = idf(t.character("\t")), | ||
124 | ["#\\newline"] = idf(t.character("\n")), | ||
125 | }, | ||
126 | match = { | ||
127 | ["^#b"] = numf(2), | ||
128 | ["^#o"] = numf(8), | ||
129 | ["^#d"] = numf(10), | ||
130 | ["^#x"] = numf(16), | ||
131 | ["^#\\"] = function (tok) return t.character(tok:sub(3)) end, | ||
132 | } | ||
133 | } | ||
134 | |||
135 | local function consume_literal (cs) | ||
136 | -- whitespace and parantheses character literals. | ||
137 | -- reverse the match test b/c it's already a complement | ||
138 | if cs[2] == "\\" and not cs[3]:match(token_separator) then | ||
139 | return type.character(cs[3]) | ||
140 | end | ||
141 | pop(cs) -- discard '#' | ||
142 | local token, value, cs = consume_token(cs) -- todo: vectors #(...) | ||
143 | token = "#" .. token -- put '#' back | ||
144 | |||
145 | if literals.literal[token] then | ||
146 | value = literals.literal[token]() | ||
147 | else | ||
148 | for re, fn in pairs(literals.match) do | ||
149 | if token:match(re) then | ||
150 | value = fn(token) | ||
151 | end | ||
152 | end | ||
153 | end | ||
154 | -- TODO : if `nil' is to be a value in lam i'm going to have to figure | ||
155 | -- out some kind of 'lam nil' and 'lua nil' or something.. | ||
156 | assert(value~=nil, "Can't read literal: " .. token) | ||
157 | |||
158 | return value, "literal", cs | ||
159 | end | ||
160 | 22 | ||
161 | --- Reading from a port | 23 | -- each function should take a list of characters and return the token, its |
162 | 24 | -- type, and the rest of the characters | |
163 | m.readtable = { | 25 | m.readtable = {} |
26 | m.readtable.chars = { | ||
164 | ["("] = function (cs) return pop(cs), "open", cs end, | 27 | ["("] = function (cs) return pop(cs), "open", cs end, |
165 | [")"] = function (cs) return pop(cs), "close", cs end, | 28 | [")"] = function (cs) return pop(cs), "close", cs end, |
166 | ["'"] = function (cs) return pop(cs), "quote", cs end, | 29 | ["'"] = function (cs) return pop(cs), "quote", cs end, |
@@ -175,109 +38,184 @@ m.readtable = { | |||
175 | return ",", "quote", cs | 38 | return ",", "quote", cs |
176 | end | 39 | end |
177 | end, | 40 | end, |
178 | [";"] = consume_comment, | 41 | [";"] = -- comment |
179 | ["#"] = consume_literal, | 42 | function (cs) |
180 | } | 43 | local comment = {} |
44 | while #cs > 0 and not cs[1]:match("\n") do | ||
45 | table.insert(comment, pop(cs)) | ||
46 | end | ||
47 | return table.concat(comment), "comment", cs | ||
48 | end, | ||
49 | ["#"] = -- literal | ||
50 | function (cs) | ||
51 | local tok | ||
52 | -- bail on just '#\' | ||
53 | if not (cs[2] and cs[3]) then | ||
54 | cs = {} | ||
55 | error("bad literal", "#\\") | ||
56 | end | ||
181 | 57 | ||
182 | --- TODO: Figure out how to read #f and #n properly | 58 | -- read '#\ ' and such correctly |
59 | if cs[2] == "\\" and cs[3]:match(token_separators) then | ||
60 | pop(cs) -- remove '\' | ||
61 | pop(cs) -- remove next character | ||
62 | return type.character(cs[1]) | ||
63 | end | ||
183 | 64 | ||
184 | -- Return an iterator over a character table, so you can do: | 65 | pop(cs) -- discard '#' ... |
185 | -- for token, chars in scan(cs) do ... end | 66 | tok, cs = consume_token(cs) |
186 | function m.scan (cs) | 67 | tok = "#" .. tok -- ... then put it back |
187 | local cs = cs | 68 | |
188 | return function () | 69 | local val |
189 | if not next(cs) then return nil end | 70 | if m.readtable.literals.lit[tok] then |
190 | local token, toktype | 71 | val = m.readtable.literals.lit[tok] |
191 | while true do | ||
192 | if m.readtable[cs[1]] then | ||
193 | token, toktype, cs = m.readtable[cs[1]](cs) | ||
194 | return token, toktype, cs | ||
195 | elseif cs[1]:match("%s") then | ||
196 | --- should this just continue the loop? | ||
197 | -- i.e., remove `return' | ||
198 | return consume_whitespace(cs) | ||
199 | elseif cs[1]:match("[%d.+-]") then | ||
200 | -- numbers, +, -, ., ... | ||
201 | local token, _, cs = consume_token(cs) | ||
202 | if token:match("[-+]") or token == "..." then | ||
203 | return token, "symbol", cs | ||
204 | elseif token == "." then | ||
205 | return token, "dot", cs | ||
206 | else | ||
207 | local n = tonumber(token) | ||
208 | assert (n ~= nil, "Bad number: "..n) | ||
209 | return n, "number", cs | ||
210 | end | ||
211 | else | 72 | else |
212 | return consume_token(cs) | 73 | for re, fn in pairs(m.readtable.literals.regex) |
74 | do | ||
75 | if tok:match(re) then | ||
76 | val = fn(tok) | ||
77 | end | ||
78 | end | ||
79 | end | ||
80 | |||
81 | if val == nil then | ||
82 | error("bad literal", tok) | ||
83 | end | ||
84 | return val, "literal", cs | ||
85 | end, | ||
86 | } | ||
87 | m.readtable.regex = { | ||
88 | ["%s"] = -- whitespace | ||
89 | function (cs) | ||
90 | while #cs > 0 and cs[1]:match("%s") do | ||
91 | pop(cs) | ||
92 | end | ||
93 | return false, nil, cs | ||
94 | end, | ||
95 | ["[%d.+-]"] = -- numbers and symbols +, -, ., and ... | ||
96 | function (cs) | ||
97 | local tok | ||
98 | tok, cs = consume_token(cs) | ||
99 | if tok:match("^[-+]$") or tok == "..." then | ||
100 | return tok, "symbol", cs | ||
101 | elseif tok == "." then | ||
102 | return tok, "dot", cs | ||
103 | else -- number | ||
104 | local n = tonumber(tok) | ||
105 | if not n then | ||
106 | error("bad number", n) | ||
107 | end | ||
108 | return n, "number", cs | ||
213 | end | 109 | end |
110 | end, | ||
111 | } | ||
112 | m.readtable.default = -- default action if nothing else matches | ||
113 | function (cs) | ||
114 | local tok, cs = consume_token(cs) | ||
115 | return tok, "symbol", cs | ||
116 | end | ||
117 | |||
118 | -- convenience function to make writing the regexen rules easier below | ||
119 | local function based_num (base) | ||
120 | return function (token) | ||
121 | local n = tonumber(token:sub(3), base) | ||
122 | if not n then | ||
123 | error("bad number", token) | ||
214 | end | 124 | end |
125 | return n | ||
215 | end | 126 | end |
216 | end | 127 | end |
217 | 128 | ||
218 | function m.readchar (port) | 129 | m.readtable.literals = { |
219 | if #port.line > 0 then | 130 | lit = { |
220 | local ch = pop(port.line) | 131 | ["#t"] = true, |
221 | return ch | 132 | ["#true"] = true, |
222 | else | 133 | ["#f"] = false, |
223 | return port.file and port.file.read(1) | 134 | ["#false"] = false, |
224 | end | 135 | }, |
136 | regex = { | ||
137 | ["^#b"] = based_num(2), | ||
138 | ["^#o"] = based_num(8), | ||
139 | ["^#d"] = based_num(10), | ||
140 | ["^#x"] = based_num(16), | ||
141 | ["^#\\."] = | ||
142 | function (tok) | ||
143 | return type.character(tok:sub(3)) | ||
144 | end, | ||
145 | }, | ||
146 | } | ||
147 | -- add named characters | ||
148 | for char, name in pairs(type.character_names) do | ||
149 | m.readtable.literals.lit["#\\"..name] = type.character(char) | ||
225 | end | 150 | end |
226 | 151 | ||
152 | ---[[ READER MACROS ]]--- | ||
153 | -- Each of these are named after the type of the token read and contain | ||
154 | -- function taking (TOKEN, TYPE, PORT) and returning a lisp object | ||
155 | |||
227 | m.readmacros = { | 156 | m.readmacros = { |
157 | close = | ||
158 | function (token, _, _) | ||
159 | error("unexpected", token) | ||
160 | end, | ||
228 | quote = | 161 | quote = |
229 | function (tok, toktype, port) | 162 | function (token, _, port) |
230 | local qs = { | 163 | local qs = { |
231 | ["'"] = "quote", | 164 | ["'"] = "quote", |
232 | ["`"] = "quasiquote", | 165 | ["`"] = "quasiquote", |
233 | [","] = "unquote", | 166 | [","] = "unquote", |
234 | [",@"] = "unquote-splicing", | 167 | [",@"] = "unquote-splicing", |
235 | } | 168 | } |
236 | if not qs[tok] then | 169 | if not qs[token] then |
237 | error(string.format("Bad quote: '%s'\n", tok)) | 170 | error("bad quote", token) |
238 | end | 171 | end |
239 | local Q = {qs[tok]} | 172 | local Q = {qs[token]} |
240 | table.insert(Q, m.read(port)) | 173 | table.insert(Q, m.read(port)) |
241 | return t.list(Q) | 174 | return type.list(Q) |
242 | end, | 175 | end, |
243 | comment = idf(nil) | 176 | comment = constantly(nil), -- throw comments away |
244 | } | 177 | } |
245 | 178 | ||
179 | ---[[ READ ]]--- | ||
180 | |||
246 | function m.read (port) | 181 | function m.read (port) |
247 | local function read_ahead (tok, toktype) | 182 | local function read_ahead(token, token_type) |
248 | if tok == m.eof then error("Unexpected EOF") end | 183 | if token == eof then error("unexpected", token) end |
249 | if toktype == "open" then | 184 | if token_type == "open" then |
185 | -- this must be defined here because it calls read_ahead | ||
186 | -- recursively. | ||
250 | local L = {} | 187 | local L = {} |
251 | while true do | 188 | repeat |
252 | local tok, toktype = port:next_token() | 189 | token, token_type = port:next(m.readtable) |
253 | if toktype == "close" then | 190 | if token_type == "close" then |
254 | return t.list(L) | 191 | return type.list(L) |
255 | elseif toktype == "dot" then | 192 | elseif token_type == "dot" then |
256 | local fin = m.read(port) | 193 | local fin = m.read(port) |
257 | port:next_token() -- throw away ')' | 194 | port:next(m.readtable) -- discard ')' |
258 | return t.list(L, fin) | 195 | return type.list(L, fin) |
259 | else | 196 | else |
260 | table.insert(L, | 197 | table.insert(L, |
261 | read_ahead(tok, toktype)) | 198 | read_ahead(token, token_type)) |
262 | end | 199 | end |
263 | end | 200 | until nil |
264 | elseif toktype == "close" then | 201 | elseif m.readmacros[token_type] then |
265 | error("Unexpected ')'") | 202 | return m.readmacros[token_type](token, token_type, port) |
266 | elseif m.readmacros[toktype] then | 203 | else |
267 | return m.readmacros[toktype](tok, toktype, port) | 204 | return token |
268 | else return tok | ||
269 | end | 205 | end |
270 | end | 206 | end |
271 | -- body of read | 207 | --- |
272 | local tok1, toktype1 = port:next_token() | 208 | local token1, type1 = port:next(m.readtable) |
273 | if tok1 == m.eof then return m.eof | 209 | if token1 == eof then |
274 | else return read_ahead(tok1, toktype1) | 210 | return eof |
211 | else | ||
212 | return read_ahead(token1, type1) | ||
275 | end | 213 | end |
276 | end | 214 | end |
277 | 215 | ||
278 | function m.read_string (str) | 216 | function m.read_string (str) |
279 | return m.read(m.inport(str, "string")) | 217 | return m.read(input_port(str, "string")) |
280 | end | 218 | end |
281 | 219 | ||
282 | --- | 220 | -------- |
283 | return m | 221 | return m |