Implement strings

author: Case Duckworth 2024-04-09 22:48:12 -0500
committer: Case Duckworth 2024-04-09 22:48:12 -0500
commit: ff222d305ecc5625a68e5b61d6f03f6201676dd4 (patch)
tree: 2d80436f7008dc6b76aa7a31471d2218065f9f52
parent: Fix buffering (diff)
download: lam-ff222d305ecc5625a68e5b61d6f03f6201676dd4.tar.gz
lam-ff222d305ecc5625a68e5b61d6f03f6201676dd4.zip
2 files changed, 174 insertions, 81 deletions
diff --git a/read.lua b/read.lua
index 6d55e23..069df57 100644
--- a/read.lua
+++ b/read.lua

@@ -20,94 +20,180 @@ end
 ---[[ READ TABLE ]]---
-- each function should take a list of characters and return the token, its
+--- helper functions
-- type, and the rest of the characters
-m.readtable = {}
+local function consume_unquote (cs)
-m.readtable.chars = {
+        pop(cs) -- remove ','
-        ["("] = function (cs) return pop(cs), "open", cs end,
+        if cs[1] == "@" then
-        [")"] = function (cs) return pop(cs), "close", cs end,
+                pop(cs) -- remove '@'
-        ["'"] = function (cs) return pop(cs), "quote", cs end,
+                return ",@", "quote", cs
-        ["`"] = function (cs) return pop(cs), "quote", cs end,
+        else
-        [","] = -- unquote
+                return ",", "quote", cs
-                function (cs)
+        end
-                        pop(cs) -- remove ','
+end
-                        if cs[1] == "@" then
-                                pop(cs) -- remove '@'
+local function consume_comment (cs)
-                                return ",@", "quote", cs
+        local comment = {}
-                        else
+        while #cs > 0 and not cs[1]:match("\n") do
-                                return ",", "quote", cs
+                table.insert(comment, pop(cs))
-                        end
+        end
-                end,
+        return table.concat(comment), "comment", cs
-        [";"] = -- comment
+end
-                function (cs)
-                        local comment = {}
-                        while #cs > 0 and not cs[1]:match("\n") do
-                                table.insert(comment, pop(cs))
-                        end
-                        return table.concat(comment), "comment", cs
-                end,
-        ["#"] = -- literal
-                function (cs)
-                        local tok
-                        -- bail on just '#\'
-                        if not (cs[2] and cs[3]) then
-                                cs = {}
-                                error("bad literal", "#\\")
-                        end
-                        -- read '#\ ' and such correctly
+local function consume_literal (cs)
-                        if cs[2] == "\\" and cs[3]:match(token_separators) then
+        local tok
-                                pop(cs) -- remove '\'
+        -- bail on just '#\'
-                                pop(cs) -- remove next character
+        if not (cs[2] and cs[3]) then
-                                return type.character(cs[1])
+                cs = {}
+                error("bad literal", "#\\")
+        end
+        -- read '#\ ' and such correctly
+        if cs[2] == "\\" and cs[3]:match(token_separators) then
+                pop(cs) -- remove '\'
+                pop(cs) -- remove next character
+                return type.character(cs[1])
+        end
+        pop(cs) -- discard '#' ...
+        tok, cs = consume_token(cs)
+        tok = "#" .. tok -- ... then put it back
+        local val
+        if m.readtable.literals.lit[tok] then
+                val = m.readtable.literals.lit[tok]
+        else
+                for re, fn in pairs(m.readtable.literals.regex)
+                do
+                        if tok:match(re) then
+                                val = fn(tok)
                        end
+                end
+        end
-                        pop(cs) -- discard '#' ...
+        if val == nil then
-                        tok, cs = consume_token(cs)
+                error("bad literal", tok)
-                        tok = "#" .. tok -- ... then put it back
+        end
+        return val, "literal", cs
+end
-                        local val
+local function consume_whitespace (cs)
-                        if m.readtable.literals.lit[tok] then
+        while #cs > 0 and cs[1]:match("%s") do
-                                val = m.readtable.literals.lit[tok]
+                pop(cs)
-                        else
+        end
-                                for re, fn in pairs(m.readtable.literals.regex)
+        return false, nil, cs
-                                do
+end
-                                        if tok:match(re) then
-                                                val = fn(tok)
+local function consume_numbers_etc (cs)
-                                        end
+        -- Since numbers can start with +, -, and ., those symbols and ... are
+        -- handled along with numbers.
+        local tok
+        tok, cs = consume_token(cs)
+        if tok:match("^[-+]$") or tok == "..." then
+                return tok, "symbol", cs
+        elseif tok == "." then
+                return tok, "dot", cs
+        else -- number
+                local n = tonumber(tok)
+                if not n then
+                        error("bad number", n)
+                end
+                return n, "number", cs
+        end
+end
+-- strings
+local function consume_string_whitespace (cs)
+        -- \<intraline ws>*<line ending> <intraline ws>* : nothing
+        local s = {"\\"}
+        while cs[1]:match("[ \t]") do
+                table.insert(s, pop(cs))
+        end
+        if cs[1] ~= "\n" then
+                table.insert(s, cs[1])
+                return table.concat(s), cs
+        end
+        while cs[1]:match("%s") do
+                pop(cs)
+        end
+        return cs[1], cs
+end
+local function consume_string_hexvalue (cs)
+        -- \x<hex scalar value>; : specified character
+        local u8ch = {}
+        repeat
+                local c = pop(cs)
+                table.insert(u8ch, c)
+        until c == ";"
+        table.remove(u8ch) -- discard ';'
+        return utf8.char(tonumber(table.concat(u8ch), 16)), cs
+end
+local function consume_string (cs)
+        local str = {}
+        local escapes = {
+                a = "\a",
+                b = "\b",
+                t = "\t",
+                n = "\n",
+                r = "\r",
+                ["\""] = "\"",
+                ["\\"] = "\\",
+                ["|"] = "|",
+                [" "] = consume_string_whitespace,
+                ["\t"] = consume_string_whitespace,
+                ["\n"] = consume_string_whitespace,
+                x = consume_string_hexvalue,
+        }
+        pop(cs) -- discard '"'
+        repeat
+                local c = pop(cs)
+                if c == "\\" then
+                        c = cs[1]
+                        if escapes[c] then
+                                if type.luatype(escapes[c]) == "function" then
+                                        c, cs = escapes[c](cs)
+                                        table.insert(str, c)
+                                else
+                                        table.insert(str, escapes[c])
                                end
+                        else
+                                table.insert(str, "\\"..c)
                        end
+                        pop(cs)
+                elseif c == "\"" then
+                        break
+                else
+                        table.insert(str, c)
+                end
+        until #cs == 0
+        return type.string(str), "string", cs
+end
-                        if val == nil then
+local function consume_char_as (token_type)
-                                error("bad literal", tok)
+        -- return a function that pops a character and returns it with
-                        end
+        -- TOKEN_TYPE
-                        return val, "literal", cs
+        return function (cs) return pop(cs), token_type, cs end
-                end,
+end
+-- each function should take a list of characters and return the token, its
+-- type, and the rest of the characters
+m.readtable = {}
+m.readtable.chars = {
+        ["("] = consume_char_as("open"),
+        [")"] = consume_char_as("close"),
+        ["'"] = consume_char_as("quote"),
+        ["`"] = consume_char_as("quote"),
+        [","] = consume_unquote,
+        ["\""] = consume_string,
+        [";"] = consume_comment,
+        ["#"] = consume_literal,
 }
 m.readtable.regex = {
-        ["%s"] = -- whitespace
+        ["%s"] = consume_whitespace,
-                function (cs)
+        ["[%d.+-]"] = consume_number_etc,
-                        while #cs > 0 and cs[1]:match("%s") do
-                                pop(cs)
-                        end
-                        return false, nil, cs
-                end,
-        ["[%d.+-]"] = -- numbers and symbols +, -, ., and ...
-                function (cs)
-                        local tok
-                        tok, cs = consume_token(cs)
-                        if tok:match("^[-+]$") or tok == "..." then
-                                return tok, "symbol", cs
-                        elseif tok == "." then
-                                return tok, "dot", cs
-                        else -- number
-                                local n = tonumber(tok)
-                                if not n then
-                                        error("bad number", n)
-                                end
-                                return n, "number", cs
-                        end
-                end,
 }
 m.readtable.default = -- default action if nothing else matches
        function (cs)
diff --git a/type.lua b/type.lua
index c205468..0000bfb 100644
--- a/type.lua
+++ b/type.lua

@@ -207,9 +207,16 @@ function m.list (items, final)
        return tolist(final or m.null, items)
 end
-- strings are vectors of chars
+-- strings are vectors of chars.  not lam characters, but one-character strings.
+-- this is for utf8 ease-of-use... TODO i still need to write functions to pluck
+-- out a single lam character from a string, etc.
 function m.string (x)
-        local t = tochars(tostring(x))
+        local t
+        if m.luatype(x) == "table" then
+                t = x
+        else
+                t = tochars(tostring(x))
+        end
        local mt = {
                __type = "string",
                __tostring =
author	Case Duckworth	2024-04-09 22:48:12 -0500
committer	Case Duckworth	2024-04-09 22:48:12 -0500
commit	ff222d305ecc5625a68e5b61d6f03f6201676dd4 (patch)
tree	2d80436f7008dc6b76aa7a31471d2218065f9f52
parent	Fix buffering (diff)
download	lam-ff222d305ecc5625a68e5b61d6f03f6201676dd4.tar.gz lam-ff222d305ecc5625a68e5b61d6f03f6201676dd4.zip

diff --git a/read.lua b/read.lua index 6d55e23..069df57 100644 --- a/read.lua +++ b/read.lua
@@ -20,94 +20,180 @@ end
20		20
21	---[[ READ TABLE ]]---	21	---[[ READ TABLE ]]---
22		22
23	-- each function should take a list of characters and return the token, its	23	--- helper functions
24	-- type, and the rest of the characters	24
25	m.readtable = {}	25	local function consume_unquote (cs)
26	m.readtable.chars = {	26	pop(cs) -- remove ','
27	["("] = function (cs) return pop(cs), "open", cs end,	27	if cs[1] == "@" then
28	[")"] = function (cs) return pop(cs), "close", cs end,	28	pop(cs) -- remove '@'
29	["'"] = function (cs) return pop(cs), "quote", cs end,	29	return ",@", "quote", cs
30	["`"] = function (cs) return pop(cs), "quote", cs end,	30	else
31	[","] = -- unquote	31	return ",", "quote", cs
32	function (cs)	32	end
33	pop(cs) -- remove ','	33	end
34	if cs[1] == "@" then	34
35	pop(cs) -- remove '@'	35	local function consume_comment (cs)
36	return ",@", "quote", cs	36	local comment = {}
37	else	37	while #cs > 0 and not cs[1]:match("\n") do
38	return ",", "quote", cs	38	table.insert(comment, pop(cs))
39	end	39	end
40	end,	40	return table.concat(comment), "comment", cs
41	[";"] = -- comment	41	end
42	function (cs)
43	local comment = {}
44	while #cs > 0 and not cs[1]:match("\n") do
45	table.insert(comment, pop(cs))
46	end
47	return table.concat(comment), "comment", cs
48	end,
49	["#"] = -- literal
50	function (cs)
51	local tok
52	-- bail on just '#\'
53	if not (cs[2] and cs[3]) then
54	cs = {}
55	error("bad literal", "#\\")
56	end
57		42
58	-- read '#\ ' and such correctly	43	local function consume_literal (cs)
59	if cs[2] == "\\" and cs[3]:match(token_separators) then	44	local tok
60	pop(cs) -- remove '\'	45	-- bail on just '#\'
61	pop(cs) -- remove next character	46	if not (cs[2] and cs[3]) then
62	return type.character(cs[1])	47	cs = {}
		48	error("bad literal", "#\\")
		49	end
		50
		51	-- read '#\ ' and such correctly
		52	if cs[2] == "\\" and cs[3]:match(token_separators) then
		53	pop(cs) -- remove '\'
		54	pop(cs) -- remove next character
		55	return type.character(cs[1])
		56	end
		57
		58	pop(cs) -- discard '#' ...
		59	tok, cs = consume_token(cs)
		60	tok = "#" .. tok -- ... then put it back
		61
		62	local val
		63	if m.readtable.literals.lit[tok] then
		64	val = m.readtable.literals.lit[tok]
		65	else
		66	for re, fn in pairs(m.readtable.literals.regex)
		67	do
		68	if tok:match(re) then
		69	val = fn(tok)
63	end	70	end
		71	end
		72	end
64		73
65	pop(cs) -- discard '#' ...	74	if val == nil then
66	tok, cs = consume_token(cs)	75	error("bad literal", tok)
67	tok = "#" .. tok -- ... then put it back	76	end
		77	return val, "literal", cs
		78	end
68		79
69	local val	80	local function consume_whitespace (cs)
70	if m.readtable.literals.lit[tok] then	81	while #cs > 0 and cs[1]:match("%s") do
71	val = m.readtable.literals.lit[tok]	82	pop(cs)
72	else	83	end
73	for re, fn in pairs(m.readtable.literals.regex)	84	return false, nil, cs
74	do	85	end
75	if tok:match(re) then	86
76	val = fn(tok)	87	local function consume_numbers_etc (cs)
77	end	88	-- Since numbers can start with +, -, and ., those symbols and ... are
		89	-- handled along with numbers.
		90	local tok
		91	tok, cs = consume_token(cs)
		92	if tok:match("^[-+]$") or tok == "..." then
		93	return tok, "symbol", cs
		94	elseif tok == "." then
		95	return tok, "dot", cs
		96	else -- number
		97	local n = tonumber(tok)
		98	if not n then
		99	error("bad number", n)
		100	end
		101	return n, "number", cs
		102	end
		103	end
		104
		105	-- strings
		106
		107	local function consume_string_whitespace (cs)
		108	-- \<intraline ws><line ending> <intraline ws> : nothing
		109	local s = {"\\"}
		110	while cs[1]:match("[ \t]") do
		111	table.insert(s, pop(cs))
		112	end
		113	if cs[1] ~= "\n" then
		114	table.insert(s, cs[1])
		115	return table.concat(s), cs
		116	end
		117	while cs[1]:match("%s") do
		118	pop(cs)
		119	end
		120	return cs[1], cs
		121	end
		122
		123	local function consume_string_hexvalue (cs)
		124	-- \x<hex scalar value>; : specified character
		125	local u8ch = {}
		126	repeat
		127	local c = pop(cs)
		128	table.insert(u8ch, c)
		129	until c == ";"
		130	table.remove(u8ch) -- discard ';'
		131	return utf8.char(tonumber(table.concat(u8ch), 16)), cs
		132	end
		133
		134	local function consume_string (cs)
		135	local str = {}
		136	local escapes = {
		137	a = "\a",
		138	b = "\b",
		139	t = "\t",
		140	n = "\n",
		141	r = "\r",
		142	["\""] = "\"",
		143	["\\"] = "\\",
		144	["\|"] = "\|",
		145	[" "] = consume_string_whitespace,
		146	["\t"] = consume_string_whitespace,
		147	["\n"] = consume_string_whitespace,
		148	x = consume_string_hexvalue,
		149	}
		150	pop(cs) -- discard '"'
		151	repeat
		152	local c = pop(cs)
		153	if c == "\\" then
		154	c = cs[1]
		155	if escapes[c] then
		156	if type.luatype(escapes[c]) == "function" then
		157	c, cs = escapes[c](cs)
		158	table.insert(str, c)
		159	else
		160	table.insert(str, escapes[c])
78	end	161	end
		162	else
		163	table.insert(str, "\\"..c)
79	end	164	end
		165	pop(cs)
		166	elseif c == "\"" then
		167	break
		168	else
		169	table.insert(str, c)
		170	end
		171	until #cs == 0
		172	return type.string(str), "string", cs
		173	end
80		174
81	if val == nil then	175	local function consume_char_as (token_type)
82	error("bad literal", tok)	176	-- return a function that pops a character and returns it with
83	end	177	-- TOKEN_TYPE
84	return val, "literal", cs	178	return function (cs) return pop(cs), token_type, cs end
85	end,	179	end
		180
		181	-- each function should take a list of characters and return the token, its
		182	-- type, and the rest of the characters
		183	m.readtable = {}
		184	m.readtable.chars = {
		185	["("] = consume_char_as("open"),
		186	[")"] = consume_char_as("close"),
		187	["'"] = consume_char_as("quote"),
		188	["`"] = consume_char_as("quote"),
		189	[","] = consume_unquote,
		190	["\""] = consume_string,
		191	[";"] = consume_comment,
		192	["#"] = consume_literal,
86	}	193	}
87	m.readtable.regex = {	194	m.readtable.regex = {
88	["%s"] = -- whitespace	195	["%s"] = consume_whitespace,
89	function (cs)	196	["[%d.+-]"] = consume_number_etc,
90	while #cs > 0 and cs[1]:match("%s") do
91	pop(cs)
92	end
93	return false, nil, cs
94	end,
95	["[%d.+-]"] = -- numbers and symbols +, -, ., and ...
96	function (cs)
97	local tok
98	tok, cs = consume_token(cs)
99	if tok:match("^[-+]$") or tok == "..." then
100	return tok, "symbol", cs
101	elseif tok == "." then
102	return tok, "dot", cs
103	else -- number
104	local n = tonumber(tok)
105	if not n then
106	error("bad number", n)
107	end
108	return n, "number", cs
109	end
110	end,
111	}	197	}
112	m.readtable.default = -- default action if nothing else matches	198	m.readtable.default = -- default action if nothing else matches
113	function (cs)	199	function (cs)


diff --git a/type.lua b/type.lua index c205468..0000bfb 100644 --- a/type.lua +++ b/type.lua
@@ -207,9 +207,16 @@ function m.list (items, final)
207	return tolist(final or m.null, items)	207	return tolist(final or m.null, items)
208	end	208	end
209		209
210	-- strings are vectors of chars	210	-- strings are vectors of chars. not lam characters, but one-character strings.
		211	-- this is for utf8 ease-of-use... TODO i still need to write functions to pluck
		212	-- out a single lam character from a string, etc.
211	function m.string (x)	213	function m.string (x)
212	local t = tochars(tostring(x))	214	local t
		215	if m.luatype(x) == "table" then
		216	t = x
		217	else
		218	t = tochars(tostring(x))
		219	end
213	local mt = {	220	local mt = {
214	__type = "string",	221	__type = "string",
215	__tostring =	222	__tostring =