about summary refs log tree commit diff stats
path: root/read.lua
blob: 00a2d2a62bb86b61b59fc10af01d7b34d10f7b0a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
--- lam.read

local read = {}
local utf8 = require "utf8"
local types = require "types"
table.unpack = table.unpack or unpack

local string_to_table =
	function(str)
		local tbl = {}
		for p, c in utf8.codes(str) do
			table.insert(tbl, c)
		end
		return tbl
	end

local consume_whitespace =
	function (chars)
		local s = {"\\"} -- accumulator for if there's no \n
		while chars[1]:match("[ \t]") do
			table.insert(s, util.pop(chars))
		end
		if chars[1] ~= "\n" then
			table.insert(s, chars[1])
			return table.concat(s), chars
		end
		while chars[1]:match("%s") do
			util.pop(chars)
		end
		return chars[1], chars
	end

local consume_hexvalue =
	function (chars)
		local u8ch = {}
		repeat
			local c = util.pop(chars)
			table.insert(u8ch,c)
		until c == ";"
		table.remove(u8ch) -- remove semicolon
		return
			utf8.char(tonumber(table.concat(u8ch), 16)),
			chars
	end

local string_bslash = { -- backslash characters
	a = "\a",
	b = "\b",
	t = "\t",
	n = "\n",
	r = "\r",
	["\""] = "\"",
	["\\"] = "\\",
	["|"] = "|",
	-- \<intraline ws>*<line ending> <intraline ws>* : nothing
	[" "] = consume_whitespace,
	["\t"] = consuem_whitespace,
	["\n"] = consume_whitespace,
	-- \x<hex scalar value>; : specified character
	x = consume_hexvalue,
}

local consume_string =
	function(chars)
		local str = {}
		repeat
			local c = util.pop(chars)
			if c == "\\" then
				c = chars[1]
				if string_bslash[c] then
					if type(string_bslash[c]) == "function"
					then
						c, chars =
							string_bslash[c](chars)
						table.insert(str, c)
					else
						table.insert(
							str,
							string_bslash[c])
					end
				else
					table.insert(str, "\\"..c)
				end
				util.pop(chars)
			elseif c == "\"" then
				break
			else
				table.insert(str, c)
			end
		until #chars == 0
		return table.concat(str), chars
	end

read.tokenize =
	function (program)
		if not program or program == "" then return nil end
		local tokens = {}
		local token = ""
		local token_type = nil

		local push_token =
			function (type, tok)
				type = type or token_type
				token = tok or token
				if token:len() > 0 then
					table.insert(tokens, {
							type = type,
							value = token, })
					token = ""
					token_type = nil
				end
			end

		local chars = string_to_table(program)
		while #chars > 0 do
			local c = util.pop(chars)
			if c == "(" then
				push_token()
				push_token("begin_list", "(")
			elseif c == ")" then
				push_token()
				push_token("end_list", ")")
			elseif c:match("%s") then -- whitespace
				push_token()
			elseif c == "\"" then		  -- string
				str, chars = consume_string(chars)
				push_token("string", str)
			elseif c:match("%d") then -- numbers
				token = token .. c
				token_type = token_type or "number"
			else
				token = token .. c
				token_type = token_type or "symbol"
			end
		end
		push_token()
		return tokens
	end

read.tokentable = {
	string = function (tok) return types.String(tok.value) end,
	number = function (tok) return types.Number(tok.value) end,
	symbol = function (tok) return types.Symbol(tok.value) end,
}

read.parse =
	function (tokens)
		assert(next(tokens), "Unexpected EOF")
		tok = util.pop(tokens)
		if tok.value == "(" then
			local L = {}
			while tokens[1].value ~= ")" do
				table.insert(L, read.parse(tokens))
			end
			util.pop(tokens) -- remove ")"
			return types.List(table.unpack(L))
		elseif tok.value == ")" then
			error("Unexpected ')'")
		elseif read.tokentable[tok.type] then
			return read.tokentable[tok.type](tok)
		else
			error("Bad token: '" .. tok.value .. "'")
		end
	end

read.read = function (program) return read.parse(read.tokenize(program)) end

---
return setmetatable(read, { __call =
			function(_, program)
				return read.read(program)
			end,
})