about summary refs log tree commit diff stats
path: root/read.lua
diff options
context:
space:
mode:
authorCase Duckworth2024-03-30 22:20:36 -0500
committerCase Duckworth2024-03-30 22:20:36 -0500
commitab8a02fd30451207578927c7e69aa397ad596459 (patch)
tree24803910776ed692f1610f44e35d0f23b9712ca3 /read.lua
parentSpecial-case '.', '...', '+', '-' (diff)
downloadlam-ab8a02fd30451207578927c7e69aa397ad596459.tar.gz
lam-ab8a02fd30451207578927c7e69aa397ad596459.zip
Read from ports now
Diffstat (limited to 'read.lua')
-rw-r--r--read.lua318
1 files changed, 129 insertions, 189 deletions
diff --git a/read.lua b/read.lua index f23c5cc..226af51 100644 --- a/read.lua +++ b/read.lua
@@ -1,230 +1,170 @@
1--- lam.read 1--- lam.read
2 2
3local read = {} 3local m = {}
4local type = require "type" 4local t = require "type"
5local utf8 = require "utf8" 5local utf8 = require "utf8"
6local util = require "util" 6local pop = require("util").pop
7local pop = util.pop 7
8local unpack = table.unpack or unpack 8local pp = require("pp").pp
9 9
10local function program_characters (program) 10function m.inport (source, kind)
11 local chars = {} 11 -- KIND can be one of "file", "string"; defaults to "file"
12 for pos, code in utf8.codes(program) do 12 -- SOURCE is the name of the file or the string to read, or nil; if nil,
13 table.insert(chars, code) 13 -- read from standard input.
14 end 14 local f, l
15 return chars 15 local k = kind or "file"
16end 16 if source then
17 17 if kind == "file" then
18local function consume_string_whitespace (chars) 18 f = io.open(source, "r")
19 -- \<intraline ws>*<line ending> <intraline ws>* : nothing 19 elseif kind == "string" then
20 local s = {"\\"} 20 l = m.tochars(source)
21 while chars[1]:match("[ \t]") do 21 end
22 table.insert(s, pop(chars)) 22 else
23 end 23 -- KIND is ignored here
24 if chars[1] ~= "\n" then 24 f = io.input()
25 table.insert(s, chars[1])
26 return table.concat(s), chars
27 end
28 while chars[1]:match("%s") do
29 pop(chars)
30 end 25 end
31 return chars[1], chars 26 local t = {
32end 27 file = f,
33 28 line = l or {},
34local function consume_string_hexvalue (chars) 29 next_token =
35 -- \x<hex scalar value>; : specified character 30 function (self)
36 local u8ch = {} 31 local tok, toktype
37 repeat 32 while true do
38 local c = pop(chars) 33 if #self.line == 0 and self.file then
39 table.insert(u8ch, c) 34 self.line = m.tochars(
40 until c == ";" 35 self.file:read("*l"))
41 table.remove(u8ch) -- remove semicolon 36 end
42 return utf8.char(tonumber(table.concat(u8ch), 16)), chars 37 if not self.line or #self.line == 0 then
43end 38 return nil
44 39 end
45local function consume_string (chars) 40 tok, toktype, self.line =
46 local str = {} 41 m.scan(self.line)()
47 local backslash = { 42 if tok then return tok, toktype end
48 a = "\a",
49 b = "\b",
50 t = "\t",
51 n = "\n",
52 r = "\r",
53 ["\""] = "\"",
54 ["\\"] = "\\",
55 ["|"] = "|",
56 [" "] = consume_string_whitespace,
57 ["\t"] = consume_string_whitespace,
58 ["\n"] = consume_string_whitespace,
59 x = consume_string_hexvalue,
60 }
61 pop(chars) -- throw initial " away
62 repeat
63 local c = pop(chars)
64 if c == [[\]] then
65 c = chars[1]
66 if backlash[c] then
67 if type(backslash[c]) == "function" then
68 c, chars = backslash[c](chars)
69 table.insert(str, c)
70 else
71 table.insert(str, backlash[c])
72 end 43 end
73 else 44 end,
74 table.insert(str, "\\"..c) 45 }
75 end 46 if t.file then t.close = function (self) self.file:close() end; end
76 pop(chars) 47 local mt = {
77 elseif c == [["]] then 48 __type = "port",
78 break 49 __tostring =
79 else 50 function (self)
80 table.insert(str, c) 51 return string.format("#<port %s>",
81 end 52 self.file or "(string)")
82 until #chars == 0 53 end,
83 return table.concat(str), "string", chars 54 }
55 return setmetatable(t, mt)
84end 56end
85 57
86local function consume_token (chars) 58function m.tochars (s)
87 local tok = {} 59 local chars = {}
88 while #chars>0 and chars[1]:match("[^%s()\"#'`,@;]") do 60 for _, code in utf8.codes(s) do
89 table.insert(tok, pop(chars)) 61 table.insert(chars, code)
90 end 62 end
91 return table.concat(tok), chars 63 return chars
92end 64end
93 65
94local consume_symbol = consume_token 66--- Consumers
67-- These take a table of characters (cs) and return a token and the rest of the
68-- chars
95 69
96local function consume_number (chars) 70local function consume_token (cs)
97 local digits, chars = consume_token(chars) 71 local token = {}
98 -- The signs by themselves are symbols, as well as '...' 72 while #cs > 0 and cs[1]:match("[^%s()\"#'`,@;]") do
99 if digits:match("[-+.]") or digits == "..." then 73 table.insert(token, pop(cs))
100 return digits, chars
101 end 74 end
102 -- Otherwise try converting the digits to a number 75 return table.concat(token), cs
103 local num = tonumber(digits)
104 if num == nil then error("Bad number: " .. num) end
105 return num, chars
106end 76end
107 77
108local function consume_whitespace (chars) 78local function consume_whitespace (cs)
109 while #chars>0 and chars[1]:match("%s") do pop(chars) end 79 while #cs > 0 and cs[1]:match("%s") do pop(cs) end
110 return chars 80 return nil, cs
111end 81end
112 82
113local function consume_comment (chars) 83--- Reading from a port
114 local comment = {}
115 repeat
116 table.insert(comment, pop(chars))
117 until #chars == 0 or chars[1]:match("\n")
118 return table.concat(comment), "comment", chars
119end
120 84
121--- API 85m.readtable = {
122 86 ["("] = function (cs) return pop(cs), "open", cs end,
123read.readtable = { 87 [")"] = function (cs) return pop(cs), "close", cs end,
124 ["("] = function(chars) return pop(chars), "open", chars end,
125 [")"] = function(chars) return pop(chars), "close", chars end,
126 ["'"] = function(chars) return pop(chars), "quote", chars end,
127 ["`"] = function(chars) return pop(chars), "quote", chars end,
128 [","] = function(chars) return pop(chars), "quote", chars end,
129 ["\""] = consume_string,
130 [";"] = consume_comment,
131 -- ["#"] = ...,
132} 88}
133 89
134function read.scan (chars) 90-- Return an iterator over a character table, so you can do:
135 local chars = chars 91-- for token, chars in scan(cs) do ... end
92function m.scan (cs)
93 local cs = cs
136 return function () 94 return function ()
137 if not next(chars) then return nil end 95 if not next(cs) then return nil end
138 local token, toktype = "", nil 96 local token, toktype
139 while true do 97 while true do
140 if read.readtable[chars[1]] then 98 if m.readtable[cs[1]] then
141 token, toktype, chars = 99 token, toktype, cs = m.readtable[cs[1]](cs)
142 read.readtable[chars[1]](chars) 100 -- return { v = token, u = toktype }, cs
143 return token, toktype 101 return token, toktype, cs
144 elseif chars[1]:match("%s") then 102 elseif cs[1]:match("%s") then
145 chars = consume_whitespace(chars) 103 _, cs = consume_whitespace(cs)
146 elseif chars[1]:match("%d") then 104 return nil, nil, cs
147 token, chars = consume_number(chars) 105 -- return nil, cs
148 return token, "number" 106 elseif cs[1]:match("[%d.+-]") then
149 elseif chars[1]:match("[.+-]") then 107 -- numbers, +, -, ., ...
150 -- special casing for ., ..., +, - 108 local token, cs = consume_token(cs)
151 token, chars = consume_number(chars) 109 if token:match("[-+]") or token == "..." then
152 if token == "." then 110 return token, "symbol", cs
153 return token, "dot" 111 -- return { v = token, u = "symbol" }, cs
154 elseif token == "..." then 112 elseif token == "." then
155 return token, "symbol" 113 return token, "dot", cs
114 -- return { v = token, u = "dot" }, cs
156 else 115 else
157 return token, "number" 116 local n = tonumber(token)
117 assert (n ~= nil, "Bad number: "..n)
118 return n, "number", cs
119 -- return { v = n, u = "number" }, cs
158 end 120 end
159 else 121 else
160 token, chars = consume_symbol(chars) 122 token, cs = consume_token(cs)
161 return token, "symbol" 123 return token, "symbol", cs
124 -- return { v = token, u = "symbol" }, cs
162 end 125 end
163 if #chars == 0 then return nil end
164 end 126 end
165 end 127 end
166end 128end
167 129
168function read.tokenize (program) 130function m.readchar (port)
169 if not program or #program == 0 then return nil end 131 if #port.line > 0 then
170 local tokens = {} 132 local ch = pop(port.line)
171 for token, toktype in read.scan(program_characters(program)) do 133 return ch
172 table.insert(tokens, {type = toktype, value = token}) 134 else
135 return port.file.read(1)
173 end 136 end
174 return tokens
175end 137end
176 138
177read.readmacros = { 139function m.read (port)
178 open = 140 local function read_ahead (tok, toktype)
179 function (token, tokens) 141 if not tok then error("Unexpected EOF") end
180 local L, lt = {}, nil 142 if toktype == "open" then
181 while tokens[1].type ~= "close" do 143 local L = {}
182 local nt = read.parse(tokens) 144 while true do
183 -- this isn't .. my /favorite/ implementation, 145 local tok, toktype = port:next_token()
184 -- but it works 146 if toktype == "close" then
185 if nt == "." then 147 return t.list(L)
186 lt = read.parse(tokens)
187 break
188 else 148 else
189 table.insert(L, nt) 149 table.insert(L,
150 read_ahead(tok, toktype))
190 end 151 end
191 assert(tokens[1], "Unexpected EOF")
192 end 152 end
193 pop(tokens) -- remove final ")" 153 elseif toktype == "close" then
194 return type.List(L, lt) 154 error("Unexpected ')'")
195 end, 155 else return tok
196 close = 156 end
197 function (token, tokens) 157 end
198 error ("Unexpected '" .. token.value .. "'") 158 -- body of read
199 end, 159 local tok1, toktype1 = port:next_token()
200 quote = 160 if not tok1 then return nil
201 function (token, tokens) 161 else return read_ahead(tok1, toktype1)
202 local Q
203 if token.value == "'" then
204 Q = {"quote"}
205 elseif token.value == "`" then
206 Q = {"quasiquote"}
207 elseif token.value == "," then
208 Q = {"unquote"}
209 end
210 table.insert(Q, read.parse(tokens))
211 return type.List(Q)
212 end,
213}
214
215function read.parse (tokens)
216 if not next(tokens) then return nil end
217 local token = pop(tokens)
218 if read.readmacros[token.type] then
219 return read.readmacros[token.type](token, tokens)
220 else
221 return token.value
222 end 162 end
223end 163end
224 164
225function read.read (program) 165function m.read_string (str)
226 return read.parse(read.tokenize(program)) 166 return m.read(m.inport(str, "string"))
227end 167end
228 168
229--- 169---
230return read 170return m