about summary refs log tree commit diff stats
path: root/read.lua
diff options
context:
space:
mode:
authorCase Duckworth2024-03-10 21:39:53 -0500
committerCase Duckworth2024-03-10 21:39:53 -0500
commita72ff678da253fce46e8e4648f6e4cf5ce1ea9b4 (patch)
treea82501fca97f4adc272d05145548d10cabe3ea2a /read.lua
parentUgghhhh totally not working (diff)
downloadlam-a72ff678da253fce46e8e4648f6e4cf5ce1ea9b4.tar.gz
lam-a72ff678da253fce46e8e4648f6e4cf5ce1ea9b4.zip
uh new start
Diffstat (limited to 'read.lua')
-rw-r--r--read.lua297
1 files changed, 153 insertions, 144 deletions
diff --git a/read.lua b/read.lua index 00a2d2a..bba4ffa 100644 --- a/read.lua +++ b/read.lua
@@ -1,173 +1,182 @@
1--- lam.read 1--- lam.read
2 2
3local read = {} 3local read = {}
4local type = require "type"
4local utf8 = require "utf8" 5local utf8 = require "utf8"
5local types = require "types" 6local util = require "util"
6table.unpack = table.unpack or unpack 7local unpack = table.unpack or unpack
7
8local string_to_table =
9 function(str)
10 local tbl = {}
11 for p, c in utf8.codes(str) do
12 table.insert(tbl, c)
13 end
14 return tbl
15 end
16 8
17local consume_whitespace = 9function program_characters (program)
18 function (chars) 10 local chars = {}
19 local s = {"\\"} -- accumulator for if there's no \n 11 for pos, code in utf8.codes(program) do
20 while chars[1]:match("[ \t]") do 12 table.insert(chars, code)
21 table.insert(s, util.pop(chars))
22 end
23 if chars[1] ~= "\n" then
24 table.insert(s, chars[1])
25 return table.concat(s), chars
26 end
27 while chars[1]:match("%s") do
28 util.pop(chars)
29 end
30 return chars[1], chars
31 end 13 end
14 return chars
15end
32 16
33local consume_hexvalue = 17local function consume_string_whitespace (chars)
34 function (chars) 18 -- \<intraline ws>*<line ending> <intraline ws>* : nothing
35 local u8ch = {} 19 local s = {"\\"}
36 repeat 20 while chars[1]:match("[ \t]") do
37 local c = util.pop(chars) 21 table.insert(s, util.pop(chars))
38 table.insert(u8ch,c)
39 until c == ";"
40 table.remove(u8ch) -- remove semicolon
41 return
42 utf8.char(tonumber(table.concat(u8ch), 16)),
43 chars
44 end 22 end
23 if chars[1] ~= "\n" then
24 table.insert(s, chars[1])
25 return table.concat(s), chars
26 end
27 while chars[1]:match("%s") do
28 util.pop(chars)
29 end
30 return chars[1], chars
31end
45 32
46local string_bslash = { -- backslash characters 33local function consume_string_hexvalue (chars)
47 a = "\a",
48 b = "\b",
49 t = "\t",
50 n = "\n",
51 r = "\r",
52 ["\""] = "\"",
53 ["\\"] = "\\",
54 ["|"] = "|",
55 -- \<intraline ws>*<line ending> <intraline ws>* : nothing
56 [" "] = consume_whitespace,
57 ["\t"] = consuem_whitespace,
58 ["\n"] = consume_whitespace,
59 -- \x<hex scalar value>; : specified character 34 -- \x<hex scalar value>; : specified character
60 x = consume_hexvalue, 35 local u8ch = {}
61} 36 repeat
37 local c = util.pop(chars)
38 table.insert(u8ch, c)
39 until c == ";"
40 table.remove(u8ch) -- remove semicolon
41 return utf8.char(tonumber(table.concat(u8ch), 16)), chars
42end
62 43
63local consume_string = 44local function consume_string (chars)
64 function(chars) 45 local str = {}
65 local str = {} 46 local backslash = {
66 repeat 47 a = "\a",
67 local c = util.pop(chars) 48 b = "\b",
68 if c == "\\" then 49 t = "\t",
69 c = chars[1] 50 n = "\n",
70 if string_bslash[c] then 51 r = "\r",
71 if type(string_bslash[c]) == "function" 52 ["\""] = "\"",
72 then 53 ["\\"] = "\\",
73 c, chars = 54 ["|"] = "|",
74 string_bslash[c](chars) 55 [" "] = consume_string_whitespace,
75 table.insert(str, c) 56 ["\t"] = consume_string_whitespace,
76 else 57 ["\n"] = consume_string_whitespace,
77 table.insert( 58 x = consume_string_hexvalue,
78 str, 59 }
79 string_bslash[c]) 60 util.pop(chars) -- throw initial " away
80 end 61 repeat
62 local c = util.pop(chars)
63 if c == [[\]] then
64 c = chars[1]
65 if backlash[c] then
66 if type(backslash[c]) == "function" then
67 c, chars = backslash[c](chars)
68 table.insert(str, c)
81 else 69 else
82 table.insert(str, "\\"..c) 70 table.insert(str, backlash[c])
83 end 71 end
84 util.pop(chars)
85 elseif c == "\"" then
86 break
87 else 72 else
88 table.insert(str, c) 73 table.insert(str, "\\"..c)
89 end 74 end
90 until #chars == 0 75 util.pop(chars)
91 return table.concat(str), chars 76 elseif c == [["]] then
77 break
78 else
79 table.insert(str, c)
80 end
81 until #chars == 0
82 return table.concat(str), "string", chars
83end
84
85local function consume_token (chars)
86 local tok = {}
87 while chars[1]:match("[^%s()\"#'`,@;]") do
88 table.insert(tok, util.pop(chars))
92 end 89 end
90 return table.concat(tok), chars
91end
93 92
94read.tokenize = 93local consume_symbol = consume_token
95 function (program) 94
96 if not program or program == "" then return nil end 95local function consume_number (chars)
97 local tokens = {} 96 local digits, chars = consume_token(chars)
98 local token = "" 97 local num = tonumber(digits)
99 local token_type = nil 98 if num == nil then error("Bad number: " .. num) end
100 99 return num, chars
101 local push_token = 100end
102 function (type, tok) 101
103 type = type or token_type 102local function consume_whitespace (chars)
104 token = tok or token 103 while chars[1]:match("%s") do util.pop(chars) end
105 if token:len() > 0 then 104 return chars
106 table.insert(tokens, { 105end
107 type = type, 106
108 value = token, }) 107local function consume_comment (chars)
109 token = "" 108 local comment = {}
110 token_type = nil 109 repeat
111 end 110 table.insert(comment, util.pop(chars))
112 end 111 until #chars == 0 or chars[1]:match("\n")
112 return table.concat(comment), "comment", chars
113end
114
115--- API
113 116
114 local chars = string_to_table(program) 117read.readtable = {
115 while #chars > 0 do 118 ["("] = function(chars) return util.pop(chars), "begin_list", chars end,
116 local c = util.pop(chars) 119 [")"] = function(chars) return util.pop(chars), "end_list", chars end,
117 if c == "(" then 120 ["\""] = consume_string,
118 push_token() 121 [";"] = consume_comment,
119 push_token("begin_list", "(") 122 -- ["#"] =
120 elseif c == ")" then 123 -- ["'"] =
121 push_token() 124 -- ["`"] =
122 push_token("end_list", ")") 125 -- [","] =
123 elseif c:match("%s") then -- whitespace 126}
124 push_token() 127
125 elseif c == "\"" then -- string 128function read.scan (chars)
126 str, chars = consume_string(chars) 129 local chars = chars
127 push_token("string", str) 130 return function()
128 elseif c:match("%d") then -- numbers 131 if #chars == 0 then return nil end
129 token = token .. c 132 local token, toktype = "", nil
130 token_type = token_type or "number" 133 while true do
134 if read.readtable[chars[1]] then
135 token, toktype, chars =
136 read.readtable[chars[1]](chars)
137 return token, toktype
138 elseif chars[1]:match("%s") then
139 chars = consume_whitespace(chars)
140 elseif chars[1]:match("%d") then
141 token, chars = consume_number(chars)
142 return token, "number"
131 else 143 else
132 token = token .. c 144 token, chars = consume_symbol(chars)
133 token_type = token_type or "symbol" 145 return token, "symbol"
134 end 146 end
135 end 147 end
136 push_token()
137 return tokens
138 end 148 end
149end
139 150
140read.tokentable = { 151function read.tokenize (program)
141 string = function (tok) return types.String(tok.value) end, 152 if not program or #program == 0 then return nil end
142 number = function (tok) return types.Number(tok.value) end, 153 local tokens = {}
143 symbol = function (tok) return types.Symbol(tok.value) end, 154 for token, toktype in read.scan(program_characters(program)) do
144} 155 table.insert(tokens, {type = toktype, value = token})
156 end
157 return tokens
158end
145 159
146read.parse = 160function read.parse (tokens)
147 function (tokens) 161 if not next(tokens) then return nil end
148 assert(next(tokens), "Unexpected EOF") 162 local token = util.pop(tokens)
149 tok = util.pop(tokens) 163 if token.value == "(" then
150 if tok.value == "(" then 164 local L = {}
151 local L = {} 165 while tokens[1].value ~= ")" do
152 while tokens[1].value ~= ")" do 166 table.insert(L, read.parse(tokens))
153 table.insert(L, read.parse(tokens))
154 end
155 util.pop(tokens) -- remove ")"
156 return types.List(table.unpack(L))
157 elseif tok.value == ")" then
158 error("Unexpected ')'")
159 elseif read.tokentable[tok.type] then
160 return read.tokentable[tok.type](tok)
161 else
162 error("Bad token: '" .. tok.value .. "'")
163 end 167 end
168 util.pop(tokens) -- remove the final ")"
169 return type.List(L)
170 elseif token.value == ")" then
171 error("Unexpected ')'")
172 else
173 return token.value
164 end 174 end
175end
165 176
166read.read = function (program) return read.parse(read.tokenize(program)) end 177function read.read (program)
178 return read.parse(read.tokenize(program))
179end
167 180
168--- 181---
169return setmetatable(read, { __call = 182return read
170 function(_, program)
171 return read.read(program)
172 end,
173})