diff options
author | Case Duckworth | 2024-03-04 21:01:27 -0600 |
---|---|---|
committer | Case Duckworth | 2024-03-04 21:01:27 -0600 |
commit | bbaff0e0c204c2fab216f6501dc8c11b4425b4bc (patch) | |
tree | b2a06edc7b1c8e6f86839bff8c16e06297080674 /read.lua | |
parent | Add copying (diff) | |
download | lam-bbaff0e0c204c2fab216f6501dc8c11b4425b4bc.tar.gz lam-bbaff0e0c204c2fab216f6501dc8c11b4425b4bc.zip |
Ugghhhh totally not working first-try
Diffstat (limited to 'read.lua')
-rw-r--r-- | read.lua | 93 |
1 files changed, 54 insertions, 39 deletions
diff --git a/read.lua b/read.lua index c89261c..00a2d2a 100644 --- a/read.lua +++ b/read.lua | |||
@@ -2,6 +2,7 @@ | |||
2 | 2 | ||
3 | local read = {} | 3 | local read = {} |
4 | local utf8 = require "utf8" | 4 | local utf8 = require "utf8" |
5 | local types = require "types" | ||
5 | table.unpack = table.unpack or unpack | 6 | table.unpack = table.unpack or unpack |
6 | 7 | ||
7 | local string_to_table = | 8 | local string_to_table = |
@@ -13,7 +14,36 @@ local string_to_table = | |||
13 | return tbl | 14 | return tbl |
14 | end | 15 | end |
15 | 16 | ||
16 | local bslash = { -- backslash characters | 17 | local consume_whitespace = |
18 | function (chars) | ||
19 | local s = {"\\"} -- accumulator for if there's no \n | ||
20 | while chars[1]:match("[ \t]") do | ||
21 | table.insert(s, util.pop(chars)) | ||
22 | end | ||
23 | if chars[1] ~= "\n" then | ||
24 | table.insert(s, chars[1]) | ||
25 | return table.concat(s), chars | ||
26 | end | ||
27 | while chars[1]:match("%s") do | ||
28 | util.pop(chars) | ||
29 | end | ||
30 | return chars[1], chars | ||
31 | end | ||
32 | |||
33 | local consume_hexvalue = | ||
34 | function (chars) | ||
35 | local u8ch = {} | ||
36 | repeat | ||
37 | local c = util.pop(chars) | ||
38 | table.insert(u8ch,c) | ||
39 | until c == ";" | ||
40 | table.remove(u8ch) -- remove semicolon | ||
41 | return | ||
42 | utf8.char(tonumber(table.concat(u8ch), 16)), | ||
43 | chars | ||
44 | end | ||
45 | |||
46 | local string_bslash = { -- backslash characters | ||
17 | a = "\a", | 47 | a = "\a", |
18 | b = "\b", | 48 | b = "\b", |
19 | t = "\t", | 49 | t = "\t", |
@@ -22,23 +52,12 @@ local bslash = { -- backslash characters | |||
22 | ["\""] = "\"", | 52 | ["\""] = "\"", |
23 | ["\\"] = "\\", | 53 | ["\\"] = "\\", |
24 | ["|"] = "|", | 54 | ["|"] = "|", |
25 | 55 | -- \<intraline ws>*<line ending> <intraline ws>* : nothing | |
26 | -- TODO: whitespace | 56 | [" "] = consume_whitespace, |
27 | -- \<intraline whitespace>*<line ending> <intraline whitespace>* : | 57 | ["\t"] = consuem_whitespace, |
28 | -- nothing | 58 | ["\n"] = consume_whitespace, |
29 | 59 | -- \x<hex scalar value>; : specified character | |
30 | x = -- \x<hex scalar value>; : specified character | 60 | x = consume_hexvalue, |
31 | function (chars) | ||
32 | local u8ch = {} | ||
33 | repeat | ||
34 | local c = util.pop(chars) | ||
35 | table.insert(u8ch,c) | ||
36 | until c == ";" | ||
37 | table.remove(u8ch) -- remove semicolon | ||
38 | return | ||
39 | utf8.char(tonumber(table.concat(u8ch), 16)), | ||
40 | chars | ||
41 | end, | ||
42 | } | 61 | } |
43 | 62 | ||
44 | local consume_string = | 63 | local consume_string = |
@@ -47,17 +66,22 @@ local consume_string = | |||
47 | repeat | 66 | repeat |
48 | local c = util.pop(chars) | 67 | local c = util.pop(chars) |
49 | if c == "\\" then | 68 | if c == "\\" then |
50 | c = util.pop(chars) | 69 | c = chars[1] |
51 | if bslash[c] then | 70 | if string_bslash[c] then |
52 | if type(bslash[c]) == "function" then | 71 | if type(string_bslash[c]) == "function" |
53 | c, chars = bslash[c](chars) | 72 | then |
73 | c, chars = | ||
74 | string_bslash[c](chars) | ||
54 | table.insert(str, c) | 75 | table.insert(str, c) |
55 | else | 76 | else |
56 | table.insert(str, bslash[c]) | 77 | table.insert( |
78 | str, | ||
79 | string_bslash[c]) | ||
57 | end | 80 | end |
58 | else | 81 | else |
59 | table.insert(str, "\\"..c) | 82 | table.insert(str, "\\"..c) |
60 | end | 83 | end |
84 | util.pop(chars) | ||
61 | elseif c == "\"" then | 85 | elseif c == "\"" then |
62 | break | 86 | break |
63 | else | 87 | else |
@@ -114,18 +138,9 @@ read.tokenize = | |||
114 | end | 138 | end |
115 | 139 | ||
116 | read.tokentable = { | 140 | read.tokentable = { |
117 | string = | 141 | string = function (tok) return types.String(tok.value) end, |
118 | function (tok) | 142 | number = function (tok) return types.Number(tok.value) end, |
119 | return tok.value | 143 | symbol = function (tok) return types.Symbol(tok.value) end, |
120 | end, | ||
121 | number = | ||
122 | function (tok) | ||
123 | return tonumber(tok.value) | ||
124 | end, | ||
125 | symbol = | ||
126 | function (tok) -- TODO need to return a Symbol from types... | ||
127 | return tok.value | ||
128 | end, | ||
129 | } | 144 | } |
130 | 145 | ||
131 | read.parse = | 146 | read.parse = |
@@ -138,7 +153,7 @@ read.parse = | |||
138 | table.insert(L, read.parse(tokens)) | 153 | table.insert(L, read.parse(tokens)) |
139 | end | 154 | end |
140 | util.pop(tokens) -- remove ")" | 155 | util.pop(tokens) -- remove ")" |
141 | return L | 156 | return types.List(table.unpack(L)) |
142 | elseif tok.value == ")" then | 157 | elseif tok.value == ")" then |
143 | error("Unexpected ')'") | 158 | error("Unexpected ')'") |
144 | elseif read.tokentable[tok.type] then | 159 | elseif read.tokentable[tok.type] then |
@@ -152,7 +167,7 @@ read.read = function (program) return read.parse(read.tokenize(program)) end | |||
152 | 167 | ||
153 | --- | 168 | --- |
154 | return setmetatable(read, { __call = | 169 | return setmetatable(read, { __call = |
155 | function(_, program) | 170 | function(_, program) |
156 | return read.read(program) | 171 | return read.read(program) |
157 | end, | 172 | end, |
158 | }) | 173 | }) |