about summary refs log tree commit diff stats
path: root/read.lua
blob: c89261ce0c1902da39b4efaeee3c2cd260578ccc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
--- lam.read

local read = {}
local utf8 = require "utf8"
table.unpack = table.unpack or unpack

local string_to_table =
	function(str)
		local tbl = {}
		for p, c in utf8.codes(str) do
			table.insert(tbl, c)
		end
		return tbl
	end

local bslash = { -- backslash characters
	a = "\a",
	b = "\b",
	t = "\t",
	n = "\n",
	r = "\r",
	["\""] = "\"",
	["\\"] = "\\",
	["|"] = "|",

	-- TODO: whitespace
	-- \<intraline whitespace>*<line ending> <intraline whitespace>* :
	-- nothing

	x = -- \x<hex scalar value>; : specified character
		function (chars)
			local u8ch = {}
			repeat
				local c = util.pop(chars)
				table.insert(u8ch,c)
			until c == ";"
			table.remove(u8ch) -- remove semicolon
			return
				utf8.char(tonumber(table.concat(u8ch), 16)),
				chars
		end,
}

local consume_string =
	function(chars)
		local str = {}
		repeat
			local c = util.pop(chars)
			if c == "\\" then
				c = util.pop(chars)
				if bslash[c] then
					if type(bslash[c]) == "function" then
						c, chars = bslash[c](chars)
						table.insert(str, c)
					else
						table.insert(str, bslash[c])
					end
				else
					table.insert(str, "\\"..c)
				end
			elseif c == "\"" then
				break
			else
				table.insert(str, c)
			end
		until #chars == 0
		return table.concat(str), chars
	end

read.tokenize =
	function (program)
		if not program or program == "" then return nil end
		local tokens = {}
		local token = ""
		local token_type = nil

		local push_token =
			function (type, tok)
				type = type or token_type
				token = tok or token
				if token:len() > 0 then
					table.insert(tokens, {
							type = type,
							value = token, })
					token = ""
					token_type = nil
				end
			end

		local chars = string_to_table(program)
		while #chars > 0 do
			local c = util.pop(chars)
			if c == "(" then
				push_token()
				push_token("begin_list", "(")
			elseif c == ")" then
				push_token()
				push_token("end_list", ")")
			elseif c:match("%s") then -- whitespace
				push_token()
			elseif c == "\"" then		  -- string
				str, chars = consume_string(chars)
				push_token("string", str)
			elseif c:match("%d") then -- numbers
				token = token .. c
				token_type = token_type or "number"
			else
				token = token .. c
				token_type = token_type or "symbol"
			end
		end
		push_token()
		return tokens
	end

read.tokentable = {
	string =
		function (tok)
			return tok.value
		end,
	number =
		function (tok)
			return tonumber(tok.value)
		end,
	symbol =
		function (tok)	-- TODO need to return a Symbol from types...
			return tok.value
		end,
}

read.parse =
	function (tokens)
		assert(next(tokens), "Unexpected EOF")
		tok = util.pop(tokens)
		if tok.value == "(" then
			local L = {}
			while tokens[1].value ~= ")" do
				table.insert(L, read.parse(tokens))
			end
			util.pop(tokens) -- remove ")"
			return L
		elseif tok.value == ")" then
			error("Unexpected ')'")
		elseif read.tokentable[tok.type] then
			return read.tokentable[tok.type](tok)
		else
			error("Bad token: '" .. tok.value .. "'")
		end
	end

read.read = function (program) return read.parse(read.tokenize(program)) end

---
return setmetatable(read, { __call =
				    function(_, program)
					    return read.read(program)
				    end,
})