diff options
Diffstat (limited to 'read.lua')
-rw-r--r-- | read.lua | 297 |
1 files changed, 153 insertions, 144 deletions
diff --git a/read.lua b/read.lua index 00a2d2a..bba4ffa 100644 --- a/read.lua +++ b/read.lua | |||
@@ -1,173 +1,182 @@ | |||
1 | --- lam.read | 1 | --- lam.read |
2 | 2 | ||
3 | local read = {} | 3 | local read = {} |
4 | local type = require "type" | ||
4 | local utf8 = require "utf8" | 5 | local utf8 = require "utf8" |
5 | local types = require "types" | 6 | local util = require "util" |
6 | table.unpack = table.unpack or unpack | 7 | local unpack = table.unpack or unpack |
7 | |||
8 | local string_to_table = | ||
9 | function(str) | ||
10 | local tbl = {} | ||
11 | for p, c in utf8.codes(str) do | ||
12 | table.insert(tbl, c) | ||
13 | end | ||
14 | return tbl | ||
15 | end | ||
16 | 8 | ||
17 | local consume_whitespace = | 9 | function program_characters (program) |
18 | function (chars) | 10 | local chars = {} |
19 | local s = {"\\"} -- accumulator for if there's no \n | 11 | for pos, code in utf8.codes(program) do |
20 | while chars[1]:match("[ \t]") do | 12 | table.insert(chars, code) |
21 | table.insert(s, util.pop(chars)) | ||
22 | end | ||
23 | if chars[1] ~= "\n" then | ||
24 | table.insert(s, chars[1]) | ||
25 | return table.concat(s), chars | ||
26 | end | ||
27 | while chars[1]:match("%s") do | ||
28 | util.pop(chars) | ||
29 | end | ||
30 | return chars[1], chars | ||
31 | end | 13 | end |
14 | return chars | ||
15 | end | ||
32 | 16 | ||
33 | local consume_hexvalue = | 17 | local function consume_string_whitespace (chars) |
34 | function (chars) | 18 | -- \<intraline ws>*<line ending> <intraline ws>* : nothing |
35 | local u8ch = {} | 19 | local s = {"\\"} |
36 | repeat | 20 | while chars[1]:match("[ \t]") do |
37 | local c = util.pop(chars) | 21 | table.insert(s, util.pop(chars)) |
38 | table.insert(u8ch,c) | ||
39 | until c == ";" | ||
40 | table.remove(u8ch) -- remove semicolon | ||
41 | return | ||
42 | utf8.char(tonumber(table.concat(u8ch), 16)), | ||
43 | chars | ||
44 | end | 22 | end |
23 | if chars[1] ~= "\n" then | ||
24 | table.insert(s, chars[1]) | ||
25 | return table.concat(s), chars | ||
26 | end | ||
27 | while chars[1]:match("%s") do | ||
28 | util.pop(chars) | ||
29 | end | ||
30 | return chars[1], chars | ||
31 | end | ||
45 | 32 | ||
46 | local string_bslash = { -- backslash characters | 33 | local function consume_string_hexvalue (chars) |
47 | a = "\a", | ||
48 | b = "\b", | ||
49 | t = "\t", | ||
50 | n = "\n", | ||
51 | r = "\r", | ||
52 | ["\""] = "\"", | ||
53 | ["\\"] = "\\", | ||
54 | ["|"] = "|", | ||
55 | -- \<intraline ws>*<line ending> <intraline ws>* : nothing | ||
56 | [" "] = consume_whitespace, | ||
57 | ["\t"] = consuem_whitespace, | ||
58 | ["\n"] = consume_whitespace, | ||
59 | -- \x<hex scalar value>; : specified character | 34 | -- \x<hex scalar value>; : specified character |
60 | x = consume_hexvalue, | 35 | local u8ch = {} |
61 | } | 36 | repeat |
37 | local c = util.pop(chars) | ||
38 | table.insert(u8ch, c) | ||
39 | until c == ";" | ||
40 | table.remove(u8ch) -- remove semicolon | ||
41 | return utf8.char(tonumber(table.concat(u8ch), 16)), chars | ||
42 | end | ||
62 | 43 | ||
63 | local consume_string = | 44 | local function consume_string (chars) |
64 | function(chars) | 45 | local str = {} |
65 | local str = {} | 46 | local backslash = { |
66 | repeat | 47 | a = "\a", |
67 | local c = util.pop(chars) | 48 | b = "\b", |
68 | if c == "\\" then | 49 | t = "\t", |
69 | c = chars[1] | 50 | n = "\n", |
70 | if string_bslash[c] then | 51 | r = "\r", |
71 | if type(string_bslash[c]) == "function" | 52 | ["\""] = "\"", |
72 | then | 53 | ["\\"] = "\\", |
73 | c, chars = | 54 | ["|"] = "|", |
74 | string_bslash[c](chars) | 55 | [" "] = consume_string_whitespace, |
75 | table.insert(str, c) | 56 | ["\t"] = consume_string_whitespace, |
76 | else | 57 | ["\n"] = consume_string_whitespace, |
77 | table.insert( | 58 | x = consume_string_hexvalue, |
78 | str, | 59 | } |
79 | string_bslash[c]) | 60 | util.pop(chars) -- throw initial " away |
80 | end | 61 | repeat |
62 | local c = util.pop(chars) | ||
63 | if c == [[\]] then | ||
64 | c = chars[1] | ||
65 | if backlash[c] then | ||
66 | if type(backslash[c]) == "function" then | ||
67 | c, chars = backslash[c](chars) | ||
68 | table.insert(str, c) | ||
81 | else | 69 | else |
82 | table.insert(str, "\\"..c) | 70 | table.insert(str, backlash[c]) |
83 | end | 71 | end |
84 | util.pop(chars) | ||
85 | elseif c == "\"" then | ||
86 | break | ||
87 | else | 72 | else |
88 | table.insert(str, c) | 73 | table.insert(str, "\\"..c) |
89 | end | 74 | end |
90 | until #chars == 0 | 75 | util.pop(chars) |
91 | return table.concat(str), chars | 76 | elseif c == [["]] then |
77 | break | ||
78 | else | ||
79 | table.insert(str, c) | ||
80 | end | ||
81 | until #chars == 0 | ||
82 | return table.concat(str), "string", chars | ||
83 | end | ||
84 | |||
85 | local function consume_token (chars) | ||
86 | local tok = {} | ||
87 | while chars[1]:match("[^%s()\"#'`,@;]") do | ||
88 | table.insert(tok, util.pop(chars)) | ||
92 | end | 89 | end |
90 | return table.concat(tok), chars | ||
91 | end | ||
93 | 92 | ||
94 | read.tokenize = | 93 | local consume_symbol = consume_token |
95 | function (program) | 94 | |
96 | if not program or program == "" then return nil end | 95 | local function consume_number (chars) |
97 | local tokens = {} | 96 | local digits, chars = consume_token(chars) |
98 | local token = "" | 97 | local num = tonumber(digits) |
99 | local token_type = nil | 98 | if num == nil then error("Bad number: " .. num) end |
100 | 99 | return num, chars | |
101 | local push_token = | 100 | end |
102 | function (type, tok) | 101 | |
103 | type = type or token_type | 102 | local function consume_whitespace (chars) |
104 | token = tok or token | 103 | while chars[1]:match("%s") do util.pop(chars) end |
105 | if token:len() > 0 then | 104 | return chars |
106 | table.insert(tokens, { | 105 | end |
107 | type = type, | 106 | |
108 | value = token, }) | 107 | local function consume_comment (chars) |
109 | token = "" | 108 | local comment = {} |
110 | token_type = nil | 109 | repeat |
111 | end | 110 | table.insert(comment, util.pop(chars)) |
112 | end | 111 | until #chars == 0 or chars[1]:match("\n") |
112 | return table.concat(comment), "comment", chars | ||
113 | end | ||
114 | |||
115 | --- API | ||
113 | 116 | ||
114 | local chars = string_to_table(program) | 117 | read.readtable = { |
115 | while #chars > 0 do | 118 | ["("] = function(chars) return util.pop(chars), "begin_list", chars end, |
116 | local c = util.pop(chars) | 119 | [")"] = function(chars) return util.pop(chars), "end_list", chars end, |
117 | if c == "(" then | 120 | ["\""] = consume_string, |
118 | push_token() | 121 | [";"] = consume_comment, |
119 | push_token("begin_list", "(") | 122 | -- ["#"] = |
120 | elseif c == ")" then | 123 | -- ["'"] = |
121 | push_token() | 124 | -- ["`"] = |
122 | push_token("end_list", ")") | 125 | -- [","] = |
123 | elseif c:match("%s") then -- whitespace | 126 | } |
124 | push_token() | 127 | |
125 | elseif c == "\"" then -- string | 128 | function read.scan (chars) |
126 | str, chars = consume_string(chars) | 129 | local chars = chars |
127 | push_token("string", str) | 130 | return function() |
128 | elseif c:match("%d") then -- numbers | 131 | if #chars == 0 then return nil end |
129 | token = token .. c | 132 | local token, toktype = "", nil |
130 | token_type = token_type or "number" | 133 | while true do |
134 | if read.readtable[chars[1]] then | ||
135 | token, toktype, chars = | ||
136 | read.readtable[chars[1]](chars) | ||
137 | return token, toktype | ||
138 | elseif chars[1]:match("%s") then | ||
139 | chars = consume_whitespace(chars) | ||
140 | elseif chars[1]:match("%d") then | ||
141 | token, chars = consume_number(chars) | ||
142 | return token, "number" | ||
131 | else | 143 | else |
132 | token = token .. c | 144 | token, chars = consume_symbol(chars) |
133 | token_type = token_type or "symbol" | 145 | return token, "symbol" |
134 | end | 146 | end |
135 | end | 147 | end |
136 | push_token() | ||
137 | return tokens | ||
138 | end | 148 | end |
149 | end | ||
139 | 150 | ||
140 | read.tokentable = { | 151 | function read.tokenize (program) |
141 | string = function (tok) return types.String(tok.value) end, | 152 | if not program or #program == 0 then return nil end |
142 | number = function (tok) return types.Number(tok.value) end, | 153 | local tokens = {} |
143 | symbol = function (tok) return types.Symbol(tok.value) end, | 154 | for token, toktype in read.scan(program_characters(program)) do |
144 | } | 155 | table.insert(tokens, {type = toktype, value = token}) |
156 | end | ||
157 | return tokens | ||
158 | end | ||
145 | 159 | ||
146 | read.parse = | 160 | function read.parse (tokens) |
147 | function (tokens) | 161 | if not next(tokens) then return nil end |
148 | assert(next(tokens), "Unexpected EOF") | 162 | local token = util.pop(tokens) |
149 | tok = util.pop(tokens) | 163 | if token.value == "(" then |
150 | if tok.value == "(" then | 164 | local L = {} |
151 | local L = {} | 165 | while tokens[1].value ~= ")" do |
152 | while tokens[1].value ~= ")" do | 166 | table.insert(L, read.parse(tokens)) |
153 | table.insert(L, read.parse(tokens)) | ||
154 | end | ||
155 | util.pop(tokens) -- remove ")" | ||
156 | return types.List(table.unpack(L)) | ||
157 | elseif tok.value == ")" then | ||
158 | error("Unexpected ')'") | ||
159 | elseif read.tokentable[tok.type] then | ||
160 | return read.tokentable[tok.type](tok) | ||
161 | else | ||
162 | error("Bad token: '" .. tok.value .. "'") | ||
163 | end | 167 | end |
168 | util.pop(tokens) -- remove the final ")" | ||
169 | return type.List(L) | ||
170 | elseif token.value == ")" then | ||
171 | error("Unexpected ')'") | ||
172 | else | ||
173 | return token.value | ||
164 | end | 174 | end |
175 | end | ||
165 | 176 | ||
166 | read.read = function (program) return read.parse(read.tokenize(program)) end | 177 | function read.read (program) |
178 | return read.parse(read.tokenize(program)) | ||
179 | end | ||
167 | 180 | ||
168 | --- | 181 | --- |
169 | return setmetatable(read, { __call = | 182 | return read |
170 | function(_, program) | ||
171 | return read.read(program) | ||
172 | end, | ||
173 | }) | ||