diff options
-rw-r--r-- | read.lua | 78 |
1 files changed, 62 insertions, 16 deletions
diff --git a/read.lua b/read.lua index 8a154a1..6b223f8 100644 --- a/read.lua +++ b/read.lua | |||
@@ -43,7 +43,10 @@ function m.inport (source, kind) | |||
43 | end | 43 | end |
44 | tok, toktype, self.line = | 44 | tok, toktype, self.line = |
45 | m.scan(self.line)() | 45 | m.scan(self.line)() |
46 | if tok then return tok, toktype end | 46 | if tok ~= nil then |
47 | print(tok, toktype) | ||
48 | return tok, toktype | ||
49 | end | ||
47 | end | 50 | end |
48 | end, | 51 | end, |
49 | } | 52 | } |
@@ -68,20 +71,61 @@ function m.tochars (s) | |||
68 | end | 71 | end |
69 | 72 | ||
70 | --- Consumers | 73 | --- Consumers |
71 | -- These take a table of characters (cs) and return a token and the rest of the | 74 | -- These take a table of characters (cs) and return: |
72 | -- chars | 75 | -- a token, its type, and the rest of the characters |
76 | |||
77 | local token_separator = "[^%s#()\"'`,@;]" | ||
73 | 78 | ||
74 | local function consume_token (cs) | 79 | local function consume_token (cs) |
75 | local token = {} | 80 | local token = {} |
76 | while #cs > 0 and cs[1]:match("[^%s()\"#'`,@;]") do | 81 | while #cs > 0 and cs[1]:match(token_separator) do |
77 | table.insert(token, pop(cs)) | 82 | table.insert(token, pop(cs)) |
78 | end | 83 | end |
79 | return table.concat(token), cs | 84 | return table.concat(token), "symbol", cs |
80 | end | 85 | end |
81 | 86 | ||
82 | local function consume_whitespace (cs) | 87 | local function consume_whitespace (cs) |
83 | while #cs > 0 and cs[1]:match("%s") do pop(cs) end | 88 | while #cs > 0 and cs[1]:match("%s") do pop(cs) end |
84 | return nil, cs | 89 | return nil, nil, cs |
90 | end | ||
91 | |||
92 | local function consume_comment (cs) | ||
93 | local comment = {} | ||
94 | repeat table.insert(comment, pop(cs)) | ||
95 | until #cs == 0 or cs[1]:match("\n") | ||
96 | return table.concat(comment), "comment", cs | ||
97 | end | ||
98 | |||
99 | local function consume_literal (cs) | ||
100 | -- whitespace and parantheses character literals. | ||
101 | -- reverse the match test b/c it's already a complement | ||
102 | if cs[2] == "\\" and not cs[3]:match(token_separator) then | ||
103 | return type.character(cs[3]) | ||
104 | end | ||
105 | pop(cs) -- discard '#' | ||
106 | local token, value, cs = consume_token(cs) -- todo: vectors #(...) | ||
107 | token = "#" .. token -- put '#' back | ||
108 | -- tokens! | ||
109 | if token == "#t" or token == "#true" then -- booleans | ||
110 | value = true | ||
111 | elseif token == "#f" or token == "#false" then | ||
112 | value = false | ||
113 | --[[ To actually *read* nil, I need to change ports from | ||
114 | returning `nil' on eof to an `eof' symbol, i think. | ||
115 | |||
116 | elseif token == "#n" or token == "#nil" then | ||
117 | value = nil | ||
118 | --]] | ||
119 | elseif token == "#\\space" then -- characters | ||
120 | value = type.character(" ") | ||
121 | elseif token == "#\\newline" then | ||
122 | value = type.character("\n") | ||
123 | elseif token:match("^#\\") then | ||
124 | value = type.character(token:sub(3)) | ||
125 | else | ||
126 | error("Bad literal notation: " .. token) | ||
127 | end | ||
128 | return value, "literal", cs | ||
85 | end | 129 | end |
86 | 130 | ||
87 | --- Reading from a port | 131 | --- Reading from a port |
@@ -91,7 +135,7 @@ m.readtable = { | |||
91 | [")"] = function (cs) return pop(cs), "close", cs end, | 135 | [")"] = function (cs) return pop(cs), "close", cs end, |
92 | ["'"] = function (cs) return pop(cs), "quote", cs end, | 136 | ["'"] = function (cs) return pop(cs), "quote", cs end, |
93 | ["`"] = function (cs) return pop(cs), "quote", cs end, | 137 | ["`"] = function (cs) return pop(cs), "quote", cs end, |
94 | [","] = | 138 | [","] = -- unquote |
95 | function (cs) | 139 | function (cs) |
96 | pop(cs) -- remove ',' | 140 | pop(cs) -- remove ',' |
97 | if cs[1] == "@" then | 141 | if cs[1] == "@" then |
@@ -101,8 +145,12 @@ m.readtable = { | |||
101 | return ",", "quote", cs | 145 | return ",", "quote", cs |
102 | end | 146 | end |
103 | end, | 147 | end, |
148 | [";"] = consume_comment, | ||
149 | ["#"] = consume_literal, | ||
104 | } | 150 | } |
105 | 151 | ||
152 | --- TODO: Figure out how to read #f and #n properly | ||
153 | |||
106 | -- Return an iterator over a character table, so you can do: | 154 | -- Return an iterator over a character table, so you can do: |
107 | -- for token, chars in scan(cs) do ... end | 155 | -- for token, chars in scan(cs) do ... end |
108 | function m.scan (cs) | 156 | function m.scan (cs) |
@@ -115,12 +163,10 @@ function m.scan (cs) | |||
115 | token, toktype, cs = m.readtable[cs[1]](cs) | 163 | token, toktype, cs = m.readtable[cs[1]](cs) |
116 | return token, toktype, cs | 164 | return token, toktype, cs |
117 | elseif cs[1]:match("%s") then | 165 | elseif cs[1]:match("%s") then |
118 | _, cs = consume_whitespace(cs) | 166 | return consume_whitespace(cs) |
119 | return nil, nil, cs | ||
120 | -- return nil, cs | ||
121 | elseif cs[1]:match("[%d.+-]") then | 167 | elseif cs[1]:match("[%d.+-]") then |
122 | -- numbers, +, -, ., ... | 168 | -- numbers, +, -, ., ... |
123 | local token, cs = consume_token(cs) | 169 | local token, _, cs = consume_token(cs) |
124 | if token:match("[-+]") or token == "..." then | 170 | if token:match("[-+]") or token == "..." then |
125 | return token, "symbol", cs | 171 | return token, "symbol", cs |
126 | elseif token == "." then | 172 | elseif token == "." then |
@@ -131,8 +177,7 @@ function m.scan (cs) | |||
131 | return n, "number", cs | 177 | return n, "number", cs |
132 | end | 178 | end |
133 | else | 179 | else |
134 | token, cs = consume_token(cs) | 180 | return consume_token(cs) |
135 | return token, "symbol", cs | ||
136 | end | 181 | end |
137 | end | 182 | end |
138 | end | 183 | end |
@@ -143,7 +188,7 @@ function m.readchar (port) | |||
143 | local ch = pop(port.line) | 188 | local ch = pop(port.line) |
144 | return ch | 189 | return ch |
145 | else | 190 | else |
146 | return port.file.read(1) | 191 | return port.file and port.file.read(1) |
147 | end | 192 | end |
148 | end | 193 | end |
149 | 194 | ||
@@ -163,11 +208,12 @@ m.readmacros = { | |||
163 | table.insert(Q, m.read(port)) | 208 | table.insert(Q, m.read(port)) |
164 | return t.list(Q) | 209 | return t.list(Q) |
165 | end, | 210 | end, |
211 | comment = function () return nil end, | ||
166 | } | 212 | } |
167 | 213 | ||
168 | function m.read (port) | 214 | function m.read (port) |
169 | local function read_ahead (tok, toktype) | 215 | local function read_ahead (tok, toktype) |
170 | if not tok then error("Unexpected EOF") end | 216 | if tok == nil then error("Unexpected EOF") end |
171 | if toktype == "open" then | 217 | if toktype == "open" then |
172 | local L = {} | 218 | local L = {} |
173 | while true do | 219 | while true do |
@@ -192,7 +238,7 @@ function m.read (port) | |||
192 | end | 238 | end |
193 | -- body of read | 239 | -- body of read |
194 | local tok1, toktype1 = port:next_token() | 240 | local tok1, toktype1 = port:next_token() |
195 | if not tok1 then return nil | 241 | if tok1 == nil then return nil |
196 | else return read_ahead(tok1, toktype1) | 242 | else return read_ahead(tok1, toktype1) |
197 | end | 243 | end |
198 | end | 244 | end |