diff options
Diffstat (limited to 'read.lua')
-rw-r--r-- | read.lua | 244 |
1 files changed, 165 insertions, 79 deletions
diff --git a/read.lua b/read.lua index 6d55e23..069df57 100644 --- a/read.lua +++ b/read.lua | |||
@@ -20,94 +20,180 @@ end | |||
20 | 20 | ||
21 | ---[[ READ TABLE ]]--- | 21 | ---[[ READ TABLE ]]--- |
22 | 22 | ||
23 | -- each function should take a list of characters and return the token, its | 23 | --- helper functions |
24 | -- type, and the rest of the characters | 24 | |
25 | m.readtable = {} | 25 | local function consume_unquote (cs) |
26 | m.readtable.chars = { | 26 | pop(cs) -- remove ',' |
27 | ["("] = function (cs) return pop(cs), "open", cs end, | 27 | if cs[1] == "@" then |
28 | [")"] = function (cs) return pop(cs), "close", cs end, | 28 | pop(cs) -- remove '@' |
29 | ["'"] = function (cs) return pop(cs), "quote", cs end, | 29 | return ",@", "quote", cs |
30 | ["`"] = function (cs) return pop(cs), "quote", cs end, | 30 | else |
31 | [","] = -- unquote | 31 | return ",", "quote", cs |
32 | function (cs) | 32 | end |
33 | pop(cs) -- remove ',' | 33 | end |
34 | if cs[1] == "@" then | 34 | |
35 | pop(cs) -- remove '@' | 35 | local function consume_comment (cs) |
36 | return ",@", "quote", cs | 36 | local comment = {} |
37 | else | 37 | while #cs > 0 and not cs[1]:match("\n") do |
38 | return ",", "quote", cs | 38 | table.insert(comment, pop(cs)) |
39 | end | 39 | end |
40 | end, | 40 | return table.concat(comment), "comment", cs |
41 | [";"] = -- comment | 41 | end |
42 | function (cs) | ||
43 | local comment = {} | ||
44 | while #cs > 0 and not cs[1]:match("\n") do | ||
45 | table.insert(comment, pop(cs)) | ||
46 | end | ||
47 | return table.concat(comment), "comment", cs | ||
48 | end, | ||
49 | ["#"] = -- literal | ||
50 | function (cs) | ||
51 | local tok | ||
52 | -- bail on just '#\' | ||
53 | if not (cs[2] and cs[3]) then | ||
54 | cs = {} | ||
55 | error("bad literal", "#\\") | ||
56 | end | ||
57 | 42 | ||
58 | -- read '#\ ' and such correctly | 43 | local function consume_literal (cs) |
59 | if cs[2] == "\\" and cs[3]:match(token_separators) then | 44 | local tok |
60 | pop(cs) -- remove '\' | 45 | -- bail on just '#\' |
61 | pop(cs) -- remove next character | 46 | if not (cs[2] and cs[3]) then |
62 | return type.character(cs[1]) | 47 | cs = {} |
48 | error("bad literal", "#\\") | ||
49 | end | ||
50 | |||
51 | -- read '#\ ' and such correctly | ||
52 | if cs[2] == "\\" and cs[3]:match(token_separators) then | ||
53 | pop(cs) -- remove '\' | ||
54 | pop(cs) -- remove next character | ||
55 | return type.character(cs[1]) | ||
56 | end | ||
57 | |||
58 | pop(cs) -- discard '#' ... | ||
59 | tok, cs = consume_token(cs) | ||
60 | tok = "#" .. tok -- ... then put it back | ||
61 | |||
62 | local val | ||
63 | if m.readtable.literals.lit[tok] then | ||
64 | val = m.readtable.literals.lit[tok] | ||
65 | else | ||
66 | for re, fn in pairs(m.readtable.literals.regex) | ||
67 | do | ||
68 | if tok:match(re) then | ||
69 | val = fn(tok) | ||
63 | end | 70 | end |
71 | end | ||
72 | end | ||
64 | 73 | ||
65 | pop(cs) -- discard '#' ... | 74 | if val == nil then |
66 | tok, cs = consume_token(cs) | 75 | error("bad literal", tok) |
67 | tok = "#" .. tok -- ... then put it back | 76 | end |
77 | return val, "literal", cs | ||
78 | end | ||
68 | 79 | ||
69 | local val | 80 | local function consume_whitespace (cs) |
70 | if m.readtable.literals.lit[tok] then | 81 | while #cs > 0 and cs[1]:match("%s") do |
71 | val = m.readtable.literals.lit[tok] | 82 | pop(cs) |
72 | else | 83 | end |
73 | for re, fn in pairs(m.readtable.literals.regex) | 84 | return false, nil, cs |
74 | do | 85 | end |
75 | if tok:match(re) then | 86 | |
76 | val = fn(tok) | 87 | local function consume_numbers_etc (cs) |
77 | end | 88 | -- Since numbers can start with +, -, and ., those symbols and ... are |
89 | -- handled along with numbers. | ||
90 | local tok | ||
91 | tok, cs = consume_token(cs) | ||
92 | if tok:match("^[-+]$") or tok == "..." then | ||
93 | return tok, "symbol", cs | ||
94 | elseif tok == "." then | ||
95 | return tok, "dot", cs | ||
96 | else -- number | ||
97 | local n = tonumber(tok) | ||
98 | if not n then | ||
99 | error("bad number", n) | ||
100 | end | ||
101 | return n, "number", cs | ||
102 | end | ||
103 | end | ||
104 | |||
105 | -- strings | ||
106 | |||
107 | local function consume_string_whitespace (cs) | ||
108 | -- \<intraline ws>*<line ending> <intraline ws>* : nothing | ||
109 | local s = {"\\"} | ||
110 | while cs[1]:match("[ \t]") do | ||
111 | table.insert(s, pop(cs)) | ||
112 | end | ||
113 | if cs[1] ~= "\n" then | ||
114 | table.insert(s, cs[1]) | ||
115 | return table.concat(s), cs | ||
116 | end | ||
117 | while cs[1]:match("%s") do | ||
118 | pop(cs) | ||
119 | end | ||
120 | return cs[1], cs | ||
121 | end | ||
122 | |||
123 | local function consume_string_hexvalue (cs) | ||
124 | -- \x<hex scalar value>; : specified character | ||
125 | local u8ch = {} | ||
126 | repeat | ||
127 | local c = pop(cs) | ||
128 | table.insert(u8ch, c) | ||
129 | until c == ";" | ||
130 | table.remove(u8ch) -- discard ';' | ||
131 | return utf8.char(tonumber(table.concat(u8ch), 16)), cs | ||
132 | end | ||
133 | |||
134 | local function consume_string (cs) | ||
135 | local str = {} | ||
136 | local escapes = { | ||
137 | a = "\a", | ||
138 | b = "\b", | ||
139 | t = "\t", | ||
140 | n = "\n", | ||
141 | r = "\r", | ||
142 | ["\""] = "\"", | ||
143 | ["\\"] = "\\", | ||
144 | ["|"] = "|", | ||
145 | [" "] = consume_string_whitespace, | ||
146 | ["\t"] = consume_string_whitespace, | ||
147 | ["\n"] = consume_string_whitespace, | ||
148 | x = consume_string_hexvalue, | ||
149 | } | ||
150 | pop(cs) -- discard '"' | ||
151 | repeat | ||
152 | local c = pop(cs) | ||
153 | if c == "\\" then | ||
154 | c = cs[1] | ||
155 | if escapes[c] then | ||
156 | if type.luatype(escapes[c]) == "function" then | ||
157 | c, cs = escapes[c](cs) | ||
158 | table.insert(str, c) | ||
159 | else | ||
160 | table.insert(str, escapes[c]) | ||
78 | end | 161 | end |
162 | else | ||
163 | table.insert(str, "\\"..c) | ||
79 | end | 164 | end |
165 | pop(cs) | ||
166 | elseif c == "\"" then | ||
167 | break | ||
168 | else | ||
169 | table.insert(str, c) | ||
170 | end | ||
171 | until #cs == 0 | ||
172 | return type.string(str), "string", cs | ||
173 | end | ||
80 | 174 | ||
81 | if val == nil then | 175 | local function consume_char_as (token_type) |
82 | error("bad literal", tok) | 176 | -- return a function that pops a character and returns it with |
83 | end | 177 | -- TOKEN_TYPE |
84 | return val, "literal", cs | 178 | return function (cs) return pop(cs), token_type, cs end |
85 | end, | 179 | end |
180 | |||
181 | -- each function should take a list of characters and return the token, its | ||
182 | -- type, and the rest of the characters | ||
183 | m.readtable = {} | ||
184 | m.readtable.chars = { | ||
185 | ["("] = consume_char_as("open"), | ||
186 | [")"] = consume_char_as("close"), | ||
187 | ["'"] = consume_char_as("quote"), | ||
188 | ["`"] = consume_char_as("quote"), | ||
189 | [","] = consume_unquote, | ||
190 | ["\""] = consume_string, | ||
191 | [";"] = consume_comment, | ||
192 | ["#"] = consume_literal, | ||
86 | } | 193 | } |
87 | m.readtable.regex = { | 194 | m.readtable.regex = { |
88 | ["%s"] = -- whitespace | 195 | ["%s"] = consume_whitespace, |
89 | function (cs) | 196 | ["[%d.+-]"] = consume_number_etc, |
90 | while #cs > 0 and cs[1]:match("%s") do | ||
91 | pop(cs) | ||
92 | end | ||
93 | return false, nil, cs | ||
94 | end, | ||
95 | ["[%d.+-]"] = -- numbers and symbols +, -, ., and ... | ||
96 | function (cs) | ||
97 | local tok | ||
98 | tok, cs = consume_token(cs) | ||
99 | if tok:match("^[-+]$") or tok == "..." then | ||
100 | return tok, "symbol", cs | ||
101 | elseif tok == "." then | ||
102 | return tok, "dot", cs | ||
103 | else -- number | ||
104 | local n = tonumber(tok) | ||
105 | if not n then | ||
106 | error("bad number", n) | ||
107 | end | ||
108 | return n, "number", cs | ||
109 | end | ||
110 | end, | ||
111 | } | 197 | } |
112 | m.readtable.default = -- default action if nothing else matches | 198 | m.readtable.default = -- default action if nothing else matches |
113 | function (cs) | 199 | function (cs) |