diff options
author | Case Duckworth | 2024-03-30 22:20:36 -0500 |
---|---|---|
committer | Case Duckworth | 2024-03-30 22:20:36 -0500 |
commit | ab8a02fd30451207578927c7e69aa397ad596459 (patch) | |
tree | 24803910776ed692f1610f44e35d0f23b9712ca3 /read.lua | |
parent | Special-case '.', '...', '+', '-' (diff) | |
download | lam-ab8a02fd30451207578927c7e69aa397ad596459.tar.gz lam-ab8a02fd30451207578927c7e69aa397ad596459.zip |
Read from ports now
Diffstat (limited to 'read.lua')
-rw-r--r-- | read.lua | 318 |
1 files changed, 129 insertions, 189 deletions
diff --git a/read.lua b/read.lua index f23c5cc..226af51 100644 --- a/read.lua +++ b/read.lua | |||
@@ -1,230 +1,170 @@ | |||
1 | --- lam.read | 1 | --- lam.read |
2 | 2 | ||
3 | local read = {} | 3 | local m = {} |
4 | local type = require "type" | 4 | local t = require "type" |
5 | local utf8 = require "utf8" | 5 | local utf8 = require "utf8" |
6 | local util = require "util" | 6 | local pop = require("util").pop |
7 | local pop = util.pop | 7 | |
8 | local unpack = table.unpack or unpack | 8 | local pp = require("pp").pp |
9 | 9 | ||
10 | local function program_characters (program) | 10 | function m.inport (source, kind) |
11 | local chars = {} | 11 | -- KIND can be one of "file", "string"; defaults to "file" |
12 | for pos, code in utf8.codes(program) do | 12 | -- SOURCE is the name of the file or the string to read, or nil; if nil, |
13 | table.insert(chars, code) | 13 | -- read from standard input. |
14 | end | 14 | local f, l |
15 | return chars | 15 | local k = kind or "file" |
16 | end | 16 | if source then |
17 | 17 | if kind == "file" then | |
18 | local function consume_string_whitespace (chars) | 18 | f = io.open(source, "r") |
19 | -- \<intraline ws>*<line ending> <intraline ws>* : nothing | 19 | elseif kind == "string" then |
20 | local s = {"\\"} | 20 | l = m.tochars(source) |
21 | while chars[1]:match("[ \t]") do | 21 | end |
22 | table.insert(s, pop(chars)) | 22 | else |
23 | end | 23 | -- KIND is ignored here |
24 | if chars[1] ~= "\n" then | 24 | f = io.input() |
25 | table.insert(s, chars[1]) | ||
26 | return table.concat(s), chars | ||
27 | end | ||
28 | while chars[1]:match("%s") do | ||
29 | pop(chars) | ||
30 | end | 25 | end |
31 | return chars[1], chars | 26 | local t = { |
32 | end | 27 | file = f, |
33 | 28 | line = l or {}, | |
34 | local function consume_string_hexvalue (chars) | 29 | next_token = |
35 | -- \x<hex scalar value>; : specified character | 30 | function (self) |
36 | local u8ch = {} | 31 | local tok, toktype |
37 | repeat | 32 | while true do |
38 | local c = pop(chars) | 33 | if #self.line == 0 and self.file then |
39 | table.insert(u8ch, c) | 34 | self.line = m.tochars( |
40 | until c == ";" | 35 | self.file:read("*l")) |
41 | table.remove(u8ch) -- remove semicolon | 36 | end |
42 | return utf8.char(tonumber(table.concat(u8ch), 16)), chars | 37 | if not self.line or #self.line == 0 then |
43 | end | 38 | return nil |
44 | 39 | end | |
45 | local function consume_string (chars) | 40 | tok, toktype, self.line = |
46 | local str = {} | 41 | m.scan(self.line)() |
47 | local backslash = { | 42 | if tok then return tok, toktype end |
48 | a = "\a", | ||
49 | b = "\b", | ||
50 | t = "\t", | ||
51 | n = "\n", | ||
52 | r = "\r", | ||
53 | ["\""] = "\"", | ||
54 | ["\\"] = "\\", | ||
55 | ["|"] = "|", | ||
56 | [" "] = consume_string_whitespace, | ||
57 | ["\t"] = consume_string_whitespace, | ||
58 | ["\n"] = consume_string_whitespace, | ||
59 | x = consume_string_hexvalue, | ||
60 | } | ||
61 | pop(chars) -- throw initial " away | ||
62 | repeat | ||
63 | local c = pop(chars) | ||
64 | if c == [[\]] then | ||
65 | c = chars[1] | ||
66 | if backlash[c] then | ||
67 | if type(backslash[c]) == "function" then | ||
68 | c, chars = backslash[c](chars) | ||
69 | table.insert(str, c) | ||
70 | else | ||
71 | table.insert(str, backlash[c]) | ||
72 | end | 43 | end |
73 | else | 44 | end, |
74 | table.insert(str, "\\"..c) | 45 | } |
75 | end | 46 | if t.file then t.close = function (self) self.file:close() end; end |
76 | pop(chars) | 47 | local mt = { |
77 | elseif c == [["]] then | 48 | __type = "port", |
78 | break | 49 | __tostring = |
79 | else | 50 | function (self) |
80 | table.insert(str, c) | 51 | return string.format("#<port %s>", |
81 | end | 52 | self.file or "(string)") |
82 | until #chars == 0 | 53 | end, |
83 | return table.concat(str), "string", chars | 54 | } |
55 | return setmetatable(t, mt) | ||
84 | end | 56 | end |
85 | 57 | ||
86 | local function consume_token (chars) | 58 | function m.tochars (s) |
87 | local tok = {} | 59 | local chars = {} |
88 | while #chars>0 and chars[1]:match("[^%s()\"#'`,@;]") do | 60 | for _, code in utf8.codes(s) do |
89 | table.insert(tok, pop(chars)) | 61 | table.insert(chars, code) |
90 | end | 62 | end |
91 | return table.concat(tok), chars | 63 | return chars |
92 | end | 64 | end |
93 | 65 | ||
94 | local consume_symbol = consume_token | 66 | --- Consumers |
67 | -- These take a table of characters (cs) and return a token and the rest of the | ||
68 | -- chars | ||
95 | 69 | ||
96 | local function consume_number (chars) | 70 | local function consume_token (cs) |
97 | local digits, chars = consume_token(chars) | 71 | local token = {} |
98 | -- The signs by themselves are symbols, as well as '...' | 72 | while #cs > 0 and cs[1]:match("[^%s()\"#'`,@;]") do |
99 | if digits:match("[-+.]") or digits == "..." then | 73 | table.insert(token, pop(cs)) |
100 | return digits, chars | ||
101 | end | 74 | end |
102 | -- Otherwise try converting the digits to a number | 75 | return table.concat(token), cs |
103 | local num = tonumber(digits) | ||
104 | if num == nil then error("Bad number: " .. num) end | ||
105 | return num, chars | ||
106 | end | 76 | end |
107 | 77 | ||
108 | local function consume_whitespace (chars) | 78 | local function consume_whitespace (cs) |
109 | while #chars>0 and chars[1]:match("%s") do pop(chars) end | 79 | while #cs > 0 and cs[1]:match("%s") do pop(cs) end |
110 | return chars | 80 | return nil, cs |
111 | end | 81 | end |
112 | 82 | ||
113 | local function consume_comment (chars) | 83 | --- Reading from a port |
114 | local comment = {} | ||
115 | repeat | ||
116 | table.insert(comment, pop(chars)) | ||
117 | until #chars == 0 or chars[1]:match("\n") | ||
118 | return table.concat(comment), "comment", chars | ||
119 | end | ||
120 | 84 | ||
121 | --- API | 85 | m.readtable = { |
122 | 86 | ["("] = function (cs) return pop(cs), "open", cs end, | |
123 | read.readtable = { | 87 | [")"] = function (cs) return pop(cs), "close", cs end, |
124 | ["("] = function(chars) return pop(chars), "open", chars end, | ||
125 | [")"] = function(chars) return pop(chars), "close", chars end, | ||
126 | ["'"] = function(chars) return pop(chars), "quote", chars end, | ||
127 | ["`"] = function(chars) return pop(chars), "quote", chars end, | ||
128 | [","] = function(chars) return pop(chars), "quote", chars end, | ||
129 | ["\""] = consume_string, | ||
130 | [";"] = consume_comment, | ||
131 | -- ["#"] = ..., | ||
132 | } | 88 | } |
133 | 89 | ||
134 | function read.scan (chars) | 90 | -- Return an iterator over a character table, so you can do: |
135 | local chars = chars | 91 | -- for token, chars in scan(cs) do ... end |
92 | function m.scan (cs) | ||
93 | local cs = cs | ||
136 | return function () | 94 | return function () |
137 | if not next(chars) then return nil end | 95 | if not next(cs) then return nil end |
138 | local token, toktype = "", nil | 96 | local token, toktype |
139 | while true do | 97 | while true do |
140 | if read.readtable[chars[1]] then | 98 | if m.readtable[cs[1]] then |
141 | token, toktype, chars = | 99 | token, toktype, cs = m.readtable[cs[1]](cs) |
142 | read.readtable[chars[1]](chars) | 100 | -- return { v = token, u = toktype }, cs |
143 | return token, toktype | 101 | return token, toktype, cs |
144 | elseif chars[1]:match("%s") then | 102 | elseif cs[1]:match("%s") then |
145 | chars = consume_whitespace(chars) | 103 | _, cs = consume_whitespace(cs) |
146 | elseif chars[1]:match("%d") then | 104 | return nil, nil, cs |
147 | token, chars = consume_number(chars) | 105 | -- return nil, cs |
148 | return token, "number" | 106 | elseif cs[1]:match("[%d.+-]") then |
149 | elseif chars[1]:match("[.+-]") then | 107 | -- numbers, +, -, ., ... |
150 | -- special casing for ., ..., +, - | 108 | local token, cs = consume_token(cs) |
151 | token, chars = consume_number(chars) | 109 | if token:match("[-+]") or token == "..." then |
152 | if token == "." then | 110 | return token, "symbol", cs |
153 | return token, "dot" | 111 | -- return { v = token, u = "symbol" }, cs |
154 | elseif token == "..." then | 112 | elseif token == "." then |
155 | return token, "symbol" | 113 | return token, "dot", cs |
114 | -- return { v = token, u = "dot" }, cs | ||
156 | else | 115 | else |
157 | return token, "number" | 116 | local n = tonumber(token) |
117 | assert (n ~= nil, "Bad number: "..n) | ||
118 | return n, "number", cs | ||
119 | -- return { v = n, u = "number" }, cs | ||
158 | end | 120 | end |
159 | else | 121 | else |
160 | token, chars = consume_symbol(chars) | 122 | token, cs = consume_token(cs) |
161 | return token, "symbol" | 123 | return token, "symbol", cs |
124 | -- return { v = token, u = "symbol" }, cs | ||
162 | end | 125 | end |
163 | if #chars == 0 then return nil end | ||
164 | end | 126 | end |
165 | end | 127 | end |
166 | end | 128 | end |
167 | 129 | ||
168 | function read.tokenize (program) | 130 | function m.readchar (port) |
169 | if not program or #program == 0 then return nil end | 131 | if #port.line > 0 then |
170 | local tokens = {} | 132 | local ch = pop(port.line) |
171 | for token, toktype in read.scan(program_characters(program)) do | 133 | return ch |
172 | table.insert(tokens, {type = toktype, value = token}) | 134 | else |
135 | return port.file.read(1) | ||
173 | end | 136 | end |
174 | return tokens | ||
175 | end | 137 | end |
176 | 138 | ||
177 | read.readmacros = { | 139 | function m.read (port) |
178 | open = | 140 | local function read_ahead (tok, toktype) |
179 | function (token, tokens) | 141 | if not tok then error("Unexpected EOF") end |
180 | local L, lt = {}, nil | 142 | if toktype == "open" then |
181 | while tokens[1].type ~= "close" do | 143 | local L = {} |
182 | local nt = read.parse(tokens) | 144 | while true do |
183 | -- this isn't .. my /favorite/ implementation, | 145 | local tok, toktype = port:next_token() |
184 | -- but it works | 146 | if toktype == "close" then |
185 | if nt == "." then | 147 | return t.list(L) |
186 | lt = read.parse(tokens) | ||
187 | break | ||
188 | else | 148 | else |
189 | table.insert(L, nt) | 149 | table.insert(L, |
150 | read_ahead(tok, toktype)) | ||
190 | end | 151 | end |
191 | assert(tokens[1], "Unexpected EOF") | ||
192 | end | 152 | end |
193 | pop(tokens) -- remove final ")" | 153 | elseif toktype == "close" then |
194 | return type.List(L, lt) | 154 | error("Unexpected ')'") |
195 | end, | 155 | else return tok |
196 | close = | 156 | end |
197 | function (token, tokens) | 157 | end |
198 | error ("Unexpected '" .. token.value .. "'") | 158 | -- body of read |
199 | end, | 159 | local tok1, toktype1 = port:next_token() |
200 | quote = | 160 | if not tok1 then return nil |
201 | function (token, tokens) | 161 | else return read_ahead(tok1, toktype1) |
202 | local Q | ||
203 | if token.value == "'" then | ||
204 | Q = {"quote"} | ||
205 | elseif token.value == "`" then | ||
206 | Q = {"quasiquote"} | ||
207 | elseif token.value == "," then | ||
208 | Q = {"unquote"} | ||
209 | end | ||
210 | table.insert(Q, read.parse(tokens)) | ||
211 | return type.List(Q) | ||
212 | end, | ||
213 | } | ||
214 | |||
215 | function read.parse (tokens) | ||
216 | if not next(tokens) then return nil end | ||
217 | local token = pop(tokens) | ||
218 | if read.readmacros[token.type] then | ||
219 | return read.readmacros[token.type](token, tokens) | ||
220 | else | ||
221 | return token.value | ||
222 | end | 162 | end |
223 | end | 163 | end |
224 | 164 | ||
225 | function read.read (program) | 165 | function m.read_string (str) |
226 | return read.parse(read.tokenize(program)) | 166 | return m.read(m.inport(str, "string")) |
227 | end | 167 | end |
228 | 168 | ||
229 | --- | 169 | --- |
230 | return read | 170 | return m |