about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2024-04-09 22:48:12 -0500
committerCase Duckworth2024-04-09 22:48:12 -0500
commitff222d305ecc5625a68e5b61d6f03f6201676dd4 (patch)
tree2d80436f7008dc6b76aa7a31471d2218065f9f52
parentFix buffering (diff)
downloadlam-ff222d305ecc5625a68e5b61d6f03f6201676dd4.tar.gz
lam-ff222d305ecc5625a68e5b61d6f03f6201676dd4.zip
Implement strings
-rw-r--r--read.lua244
-rw-r--r--type.lua11
2 files changed, 174 insertions, 81 deletions
diff --git a/read.lua b/read.lua index 6d55e23..069df57 100644 --- a/read.lua +++ b/read.lua
@@ -20,94 +20,180 @@ end
20 20
21---[[ READ TABLE ]]--- 21---[[ READ TABLE ]]---
22 22
23-- each function should take a list of characters and return the token, its 23--- helper functions
24-- type, and the rest of the characters 24
25m.readtable = {} 25local function consume_unquote (cs)
26m.readtable.chars = { 26 pop(cs) -- remove ','
27 ["("] = function (cs) return pop(cs), "open", cs end, 27 if cs[1] == "@" then
28 [")"] = function (cs) return pop(cs), "close", cs end, 28 pop(cs) -- remove '@'
29 ["'"] = function (cs) return pop(cs), "quote", cs end, 29 return ",@", "quote", cs
30 ["`"] = function (cs) return pop(cs), "quote", cs end, 30 else
31 [","] = -- unquote 31 return ",", "quote", cs
32 function (cs) 32 end
33 pop(cs) -- remove ',' 33end
34 if cs[1] == "@" then 34
35 pop(cs) -- remove '@' 35local function consume_comment (cs)
36 return ",@", "quote", cs 36 local comment = {}
37 else 37 while #cs > 0 and not cs[1]:match("\n") do
38 return ",", "quote", cs 38 table.insert(comment, pop(cs))
39 end 39 end
40 end, 40 return table.concat(comment), "comment", cs
41 [";"] = -- comment 41end
42 function (cs)
43 local comment = {}
44 while #cs > 0 and not cs[1]:match("\n") do
45 table.insert(comment, pop(cs))
46 end
47 return table.concat(comment), "comment", cs
48 end,
49 ["#"] = -- literal
50 function (cs)
51 local tok
52 -- bail on just '#\'
53 if not (cs[2] and cs[3]) then
54 cs = {}
55 error("bad literal", "#\\")
56 end
57 42
58 -- read '#\ ' and such correctly 43local function consume_literal (cs)
59 if cs[2] == "\\" and cs[3]:match(token_separators) then 44 local tok
60 pop(cs) -- remove '\' 45 -- bail on just '#\'
61 pop(cs) -- remove next character 46 if not (cs[2] and cs[3]) then
62 return type.character(cs[1]) 47 cs = {}
48 error("bad literal", "#\\")
49 end
50
51 -- read '#\ ' and such correctly
52 if cs[2] == "\\" and cs[3]:match(token_separators) then
53 pop(cs) -- remove '\'
54 pop(cs) -- remove next character
55 return type.character(cs[1])
56 end
57
58 pop(cs) -- discard '#' ...
59 tok, cs = consume_token(cs)
60 tok = "#" .. tok -- ... then put it back
61
62 local val
63 if m.readtable.literals.lit[tok] then
64 val = m.readtable.literals.lit[tok]
65 else
66 for re, fn in pairs(m.readtable.literals.regex)
67 do
68 if tok:match(re) then
69 val = fn(tok)
63 end 70 end
71 end
72 end
64 73
65 pop(cs) -- discard '#' ... 74 if val == nil then
66 tok, cs = consume_token(cs) 75 error("bad literal", tok)
67 tok = "#" .. tok -- ... then put it back 76 end
77 return val, "literal", cs
78end
68 79
69 local val 80local function consume_whitespace (cs)
70 if m.readtable.literals.lit[tok] then 81 while #cs > 0 and cs[1]:match("%s") do
71 val = m.readtable.literals.lit[tok] 82 pop(cs)
72 else 83 end
73 for re, fn in pairs(m.readtable.literals.regex) 84 return false, nil, cs
74 do 85end
75 if tok:match(re) then 86
76 val = fn(tok) 87local function consume_numbers_etc (cs)
77 end 88 -- Since numbers can start with +, -, and ., those symbols and ... are
89 -- handled along with numbers.
90 local tok
91 tok, cs = consume_token(cs)
92 if tok:match("^[-+]$") or tok == "..." then
93 return tok, "symbol", cs
94 elseif tok == "." then
95 return tok, "dot", cs
96 else -- number
97 local n = tonumber(tok)
98 if not n then
99 error("bad number", n)
100 end
101 return n, "number", cs
102 end
103end
104
105-- strings
106
107local function consume_string_whitespace (cs)
108 -- \<intraline ws>*<line ending> <intraline ws>* : nothing
109 local s = {"\\"}
110 while cs[1]:match("[ \t]") do
111 table.insert(s, pop(cs))
112 end
113 if cs[1] ~= "\n" then
114 table.insert(s, cs[1])
115 return table.concat(s), cs
116 end
117 while cs[1]:match("%s") do
118 pop(cs)
119 end
120 return cs[1], cs
121end
122
123local function consume_string_hexvalue (cs)
124 -- \x<hex scalar value>; : specified character
125 local u8ch = {}
126 repeat
127 local c = pop(cs)
128 table.insert(u8ch, c)
129 until c == ";"
130 table.remove(u8ch) -- discard ';'
131 return utf8.char(tonumber(table.concat(u8ch), 16)), cs
132end
133
134local function consume_string (cs)
135 local str = {}
136 local escapes = {
137 a = "\a",
138 b = "\b",
139 t = "\t",
140 n = "\n",
141 r = "\r",
142 ["\""] = "\"",
143 ["\\"] = "\\",
144 ["|"] = "|",
145 [" "] = consume_string_whitespace,
146 ["\t"] = consume_string_whitespace,
147 ["\n"] = consume_string_whitespace,
148 x = consume_string_hexvalue,
149 }
150 pop(cs) -- discard '"'
151 repeat
152 local c = pop(cs)
153 if c == "\\" then
154 c = cs[1]
155 if escapes[c] then
156 if type.luatype(escapes[c]) == "function" then
157 c, cs = escapes[c](cs)
158 table.insert(str, c)
159 else
160 table.insert(str, escapes[c])
78 end 161 end
162 else
163 table.insert(str, "\\"..c)
79 end 164 end
165 pop(cs)
166 elseif c == "\"" then
167 break
168 else
169 table.insert(str, c)
170 end
171 until #cs == 0
172 return type.string(str), "string", cs
173end
80 174
81 if val == nil then 175local function consume_char_as (token_type)
82 error("bad literal", tok) 176 -- return a function that pops a character and returns it with
83 end 177 -- TOKEN_TYPE
84 return val, "literal", cs 178 return function (cs) return pop(cs), token_type, cs end
85 end, 179end
180
181-- each function should take a list of characters and return the token, its
182-- type, and the rest of the characters
183m.readtable = {}
184m.readtable.chars = {
185 ["("] = consume_char_as("open"),
186 [")"] = consume_char_as("close"),
187 ["'"] = consume_char_as("quote"),
188 ["`"] = consume_char_as("quote"),
189 [","] = consume_unquote,
190 ["\""] = consume_string,
191 [";"] = consume_comment,
192 ["#"] = consume_literal,
86} 193}
87m.readtable.regex = { 194m.readtable.regex = {
88 ["%s"] = -- whitespace 195 ["%s"] = consume_whitespace,
89 function (cs) 196 ["[%d.+-]"] = consume_number_etc,
90 while #cs > 0 and cs[1]:match("%s") do
91 pop(cs)
92 end
93 return false, nil, cs
94 end,
95 ["[%d.+-]"] = -- numbers and symbols +, -, ., and ...
96 function (cs)
97 local tok
98 tok, cs = consume_token(cs)
99 if tok:match("^[-+]$") or tok == "..." then
100 return tok, "symbol", cs
101 elseif tok == "." then
102 return tok, "dot", cs
103 else -- number
104 local n = tonumber(tok)
105 if not n then
106 error("bad number", n)
107 end
108 return n, "number", cs
109 end
110 end,
111} 197}
112m.readtable.default = -- default action if nothing else matches 198m.readtable.default = -- default action if nothing else matches
113 function (cs) 199 function (cs)
diff --git a/type.lua b/type.lua index c205468..0000bfb 100644 --- a/type.lua +++ b/type.lua
@@ -207,9 +207,16 @@ function m.list (items, final)
207 return tolist(final or m.null, items) 207 return tolist(final or m.null, items)
208end 208end
209 209
210-- strings are vectors of chars 210-- strings are vectors of chars. not lam characters, but one-character strings.
211-- this is for utf8 ease-of-use... TODO i still need to write functions to pluck
212-- out a single lam character from a string, etc.
211function m.string (x) 213function m.string (x)
212 local t = tochars(tostring(x)) 214 local t
215 if m.luatype(x) == "table" then
216 t = x
217 else
218 t = tochars(tostring(x))
219 end
213 local mt = { 220 local mt = {
214 __type = "string", 221 __type = "string",
215 __tostring = 222 __tostring =