about summary refs log tree commit diff stats
path: root/read.lua
diff options
context:
space:
mode:
authorCase Duckworth2024-04-09 21:04:17 -0500
committerCase Duckworth2024-04-09 21:04:29 -0500
commit8ce2915e3c54598c2fda4fec0980ebfc2a3adf6e (patch)
tree124ef31663ed570bed358dffd9c861d10fabce7b /read.lua
parentUh (diff)
downloadlam-8ce2915e3c54598c2fda4fec0980ebfc2a3adf6e.tar.gz
lam-8ce2915e3c54598c2fda4fec0980ebfc2a3adf6e.zip
Reorganization
Diffstat (limited to 'read.lua')
-rw-r--r--read.lua380
1 files changed, 159 insertions, 221 deletions
diff --git a/read.lua b/read.lua index 332c919..6d55e23 100644 --- a/read.lua +++ b/read.lua
@@ -1,166 +1,29 @@
1--- lam.read 1--- lam.read
2 2
3local m = {} 3local m = {}
4local t = require "type" 4local type = require("type")
5local utf8 = require "utf8" 5local port = require("port")
6local pop = require("util").pop 6local eof, input_port = port.eof, port.input_port
7local util = require("util")
8local constantly, error, pop = util.constantly, util.error, util.pop
7 9
8-- TODO: 10local token_separators = "[%s#()\"'`,@;]"
9-- - string reading
10-- - probably more
11
12m.eof = setmetatable({}, {
13 __type = "EOF",
14 __tostring = function () return "#<eof>" end,
15})
16
17local function inport_next_token (port)
18 local tok, toktype
19 while true do
20 if #port.line == 0 then
21 if port.file then
22 local ln = port.file:read()
23 if ln == nil then return m.eof end
24 port.line = m.tochars(ln)
25 else
26 return nil
27 end
28 end
29 tok, toktype, port.line = m.scan(port.line)()
30 port.line = port.line or {}
31 if tok ~= nil then return tok, toktype end
32 end
33end
34
35function m.inport (source, kind)
36 -- KIND can be one of "file", "string"; defaults to "file"
37 -- SOURCE is the name of the file or the string to read, or nil; if nil,
38 -- read from standard input.
39 local f, l
40 local k = kind or "file"
41 if source then
42 if k == "file" then
43 f = io.open(source, "r")
44 elseif k == "string" then
45 l = m.tochars(source)
46 end
47 else
48 -- KIND is ignored here
49 f = io.input()
50 end
51 local t = {
52 file = f,
53 filename = source,
54 kind = kind,
55 line = l or {},
56 next_token = inport_next_token,
57 }
58 if t.file then t.close = function (self) self.file:close() end; end
59 local mt = {
60 __type = "port",
61 __tostring =
62 function (self)
63 return string.format("#<port %s>",
64 self.file or "(string)")
65 end,
66 }
67 return setmetatable(t, mt)
68end
69
70function m.tochars (s)
71 local chars = {}
72 for _, code in utf8.codes(s) do
73 table.insert(chars, code)
74 end
75 return chars
76end
77
78--- Consumers
79-- These take a table of characters (cs) and return:
80-- a token, its type, and the rest of the characters
81
82local token_separator = "[^%s#()\"'`,@;]"
83 11
84local function consume_token (cs) 12local function consume_token (cs)
85 local token = {} 13 local tok = {}
86 while #cs > 0 and cs[1]:match(token_separator) do 14 while #cs > 0 and not cs[1]:match(token_separators) do
87 table.insert(token, pop(cs)) 15 local c = pop(cs)
16 table.insert(tok, c)
88 end 17 end
89 return table.concat(token), "symbol", cs 18 return table.concat(tok), cs
90end 19end
91 20
92local function consume_whitespace (cs) 21---[[ READ TABLE ]]---
93 while #cs > 0 and cs[1]:match("%s") do pop(cs) end
94 return nil, nil, cs
95end
96
97local function consume_comment (cs)
98 local comment = {}
99 repeat table.insert(comment, pop(cs))
100 until #cs == 0 or cs[1]:match("\n")
101 return table.concat(comment), "comment", cs
102end
103
104local function idf (x)
105 return function () return x end
106end
107
108local function numf (base)
109 return function (token)
110 local n = tonumber(token:sub(3), base)
111 assert(n, "Can't read number: " .. token)
112 return n
113 end
114end
115
116local literals = {
117 literal = {
118 ["#t"] = idf(true),
119 ["#true"] = idf(true),
120 ["#f"] = idf(false),
121 ["#false"] = idf(false),
122 ["#\\space"] = idf(t.character(" ")),
123 ["#\\tab"] = idf(t.character("\t")),
124 ["#\\newline"] = idf(t.character("\n")),
125 },
126 match = {
127 ["^#b"] = numf(2),
128 ["^#o"] = numf(8),
129 ["^#d"] = numf(10),
130 ["^#x"] = numf(16),
131 ["^#\\"] = function (tok) return t.character(tok:sub(3)) end,
132 }
133}
134
135local function consume_literal (cs)
136 -- whitespace and parantheses character literals.
137 -- reverse the match test b/c it's already a complement
138 if cs[2] == "\\" and not cs[3]:match(token_separator) then
139 return type.character(cs[3])
140 end
141 pop(cs) -- discard '#'
142 local token, value, cs = consume_token(cs) -- todo: vectors #(...)
143 token = "#" .. token -- put '#' back
144
145 if literals.literal[token] then
146 value = literals.literal[token]()
147 else
148 for re, fn in pairs(literals.match) do
149 if token:match(re) then
150 value = fn(token)
151 end
152 end
153 end
154 -- TODO : if `nil' is to be a value in lam i'm going to have to figure
155 -- out some kind of 'lam nil' and 'lua nil' or something..
156 assert(value~=nil, "Can't read literal: " .. token)
157
158 return value, "literal", cs
159end
160 22
161--- Reading from a port 23-- each function should take a list of characters and return the token, its
162 24-- type, and the rest of the characters
163m.readtable = { 25m.readtable = {}
26m.readtable.chars = {
164 ["("] = function (cs) return pop(cs), "open", cs end, 27 ["("] = function (cs) return pop(cs), "open", cs end,
165 [")"] = function (cs) return pop(cs), "close", cs end, 28 [")"] = function (cs) return pop(cs), "close", cs end,
166 ["'"] = function (cs) return pop(cs), "quote", cs end, 29 ["'"] = function (cs) return pop(cs), "quote", cs end,
@@ -175,109 +38,184 @@ m.readtable = {
175 return ",", "quote", cs 38 return ",", "quote", cs
176 end 39 end
177 end, 40 end,
178 [";"] = consume_comment, 41 [";"] = -- comment
179 ["#"] = consume_literal, 42 function (cs)
180} 43 local comment = {}
44 while #cs > 0 and not cs[1]:match("\n") do
45 table.insert(comment, pop(cs))
46 end
47 return table.concat(comment), "comment", cs
48 end,
49 ["#"] = -- literal
50 function (cs)
51 local tok
52 -- bail on just '#\'
53 if not (cs[2] and cs[3]) then
54 cs = {}
55 error("bad literal", "#\\")
56 end
181 57
182--- TODO: Figure out how to read #f and #n properly 58 -- read '#\ ' and such correctly
59 if cs[2] == "\\" and cs[3]:match(token_separators) then
60 pop(cs) -- remove '\'
61 pop(cs) -- remove next character
62 return type.character(cs[1])
63 end
183 64
184-- Return an iterator over a character table, so you can do: 65 pop(cs) -- discard '#' ...
185-- for token, chars in scan(cs) do ... end 66 tok, cs = consume_token(cs)
186function m.scan (cs) 67 tok = "#" .. tok -- ... then put it back
187 local cs = cs 68
188 return function () 69 local val
189 if not next(cs) then return nil end 70 if m.readtable.literals.lit[tok] then
190 local token, toktype 71 val = m.readtable.literals.lit[tok]
191 while true do
192 if m.readtable[cs[1]] then
193 token, toktype, cs = m.readtable[cs[1]](cs)
194 return token, toktype, cs
195 elseif cs[1]:match("%s") then
196 --- should this just continue the loop?
197 -- i.e., remove `return'
198 return consume_whitespace(cs)
199 elseif cs[1]:match("[%d.+-]") then
200 -- numbers, +, -, ., ...
201 local token, _, cs = consume_token(cs)
202 if token:match("[-+]") or token == "..." then
203 return token, "symbol", cs
204 elseif token == "." then
205 return token, "dot", cs
206 else
207 local n = tonumber(token)
208 assert (n ~= nil, "Bad number: "..n)
209 return n, "number", cs
210 end
211 else 72 else
212 return consume_token(cs) 73 for re, fn in pairs(m.readtable.literals.regex)
74 do
75 if tok:match(re) then
76 val = fn(tok)
77 end
78 end
79 end
80
81 if val == nil then
82 error("bad literal", tok)
83 end
84 return val, "literal", cs
85 end,
86}
87m.readtable.regex = {
88 ["%s"] = -- whitespace
89 function (cs)
90 while #cs > 0 and cs[1]:match("%s") do
91 pop(cs)
92 end
93 return false, nil, cs
94 end,
95 ["[%d.+-]"] = -- numbers and symbols +, -, ., and ...
96 function (cs)
97 local tok
98 tok, cs = consume_token(cs)
99 if tok:match("^[-+]$") or tok == "..." then
100 return tok, "symbol", cs
101 elseif tok == "." then
102 return tok, "dot", cs
103 else -- number
104 local n = tonumber(tok)
105 if not n then
106 error("bad number", n)
107 end
108 return n, "number", cs
213 end 109 end
110 end,
111}
112m.readtable.default = -- default action if nothing else matches
113 function (cs)
114 local tok, cs = consume_token(cs)
115 return tok, "symbol", cs
116 end
117
118-- convenience function to make writing the regexen rules easier below
119local function based_num (base)
120 return function (token)
121 local n = tonumber(token:sub(3), base)
122 if not n then
123 error("bad number", token)
214 end 124 end
125 return n
215 end 126 end
216end 127end
217 128
218function m.readchar (port) 129m.readtable.literals = {
219 if #port.line > 0 then 130 lit = {
220 local ch = pop(port.line) 131 ["#t"] = true,
221 return ch 132 ["#true"] = true,
222 else 133 ["#f"] = false,
223 return port.file and port.file.read(1) 134 ["#false"] = false,
224 end 135 },
136 regex = {
137 ["^#b"] = based_num(2),
138 ["^#o"] = based_num(8),
139 ["^#d"] = based_num(10),
140 ["^#x"] = based_num(16),
141 ["^#\\."] =
142 function (tok)
143 return type.character(tok:sub(3))
144 end,
145 },
146}
147-- add named characters
148for char, name in pairs(type.character_names) do
149 m.readtable.literals.lit["#\\"..name] = type.character(char)
225end 150end
226 151
152---[[ READER MACROS ]]---
153-- Each of these are named after the type of the token read and contain
154-- function taking (TOKEN, TYPE, PORT) and returning a lisp object
155
227m.readmacros = { 156m.readmacros = {
157 close =
158 function (token, _, _)
159 error("unexpected", token)
160 end,
228 quote = 161 quote =
229 function (tok, toktype, port) 162 function (token, _, port)
230 local qs = { 163 local qs = {
231 ["'"] = "quote", 164 ["'"] = "quote",
232 ["`"] = "quasiquote", 165 ["`"] = "quasiquote",
233 [","] = "unquote", 166 [","] = "unquote",
234 [",@"] = "unquote-splicing", 167 [",@"] = "unquote-splicing",
235 } 168 }
236 if not qs[tok] then 169 if not qs[token] then
237 error(string.format("Bad quote: '%s'\n", tok)) 170 error("bad quote", token)
238 end 171 end
239 local Q = {qs[tok]} 172 local Q = {qs[token]}
240 table.insert(Q, m.read(port)) 173 table.insert(Q, m.read(port))
241 return t.list(Q) 174 return type.list(Q)
242 end, 175 end,
243 comment = idf(nil) 176 comment = constantly(nil), -- throw comments away
244} 177}
245 178
179---[[ READ ]]---
180
246function m.read (port) 181function m.read (port)
247 local function read_ahead (tok, toktype) 182 local function read_ahead(token, token_type)
248 if tok == m.eof then error("Unexpected EOF") end 183 if token == eof then error("unexpected", token) end
249 if toktype == "open" then 184 if token_type == "open" then
185 -- this must be defined here because it calls read_ahead
186 -- recursively.
250 local L = {} 187 local L = {}
251 while true do 188 repeat
252 local tok, toktype = port:next_token() 189 token, token_type = port:next(m.readtable)
253 if toktype == "close" then 190 if token_type == "close" then
254 return t.list(L) 191 return type.list(L)
255 elseif toktype == "dot" then 192 elseif token_type == "dot" then
256 local fin = m.read(port) 193 local fin = m.read(port)
257 port:next_token() -- throw away ')' 194 port:next(m.readtable) -- discard ')'
258 return t.list(L, fin) 195 return type.list(L, fin)
259 else 196 else
260 table.insert(L, 197 table.insert(L,
261 read_ahead(tok, toktype)) 198 read_ahead(token, token_type))
262 end 199 end
263 end 200 until nil
264 elseif toktype == "close" then 201 elseif m.readmacros[token_type] then
265 error("Unexpected ')'") 202 return m.readmacros[token_type](token, token_type, port)
266 elseif m.readmacros[toktype] then 203 else
267 return m.readmacros[toktype](tok, toktype, port) 204 return token
268 else return tok
269 end 205 end
270 end 206 end
271 -- body of read 207 ---
272 local tok1, toktype1 = port:next_token() 208 local token1, type1 = port:next(m.readtable)
273 if tok1 == m.eof then return m.eof 209 if token1 == eof then
274 else return read_ahead(tok1, toktype1) 210 return eof
211 else
212 return read_ahead(token1, type1)
275 end 213 end
276end 214end
277 215
278function m.read_string (str) 216function m.read_string (str)
279 return m.read(m.inport(str, "string")) 217 return m.read(input_port(str, "string"))
280end 218end
281 219
282--- 220--------
283return m 221return m