From 5f6d17d3af8052da54afeb170c6986c8cec13d88 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Fri, 8 Jul 2022 16:38:49 -0500 Subject: Tokenize nesting? --- squ.awk | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) (limited to 'squ.awk') diff --git a/squ.awk b/squ.awk index d4753a0..2539786 100755 --- a/squ.awk +++ b/squ.awk @@ -10,6 +10,9 @@ BEGIN { nested = 0 buffer = "" + OFS = "\t" + STDERR = "/dev/stderr" + PRGN = "squawk" } { @@ -17,15 +20,64 @@ BEGIN { } END { - ast = read(buffer) + read(buffer, ast) eval(ast) } -function eval(buf) +function die(message, errcode) { + eprint(PRGN " ERROR" (message ? ": " message : "")) + exit (errcode ? errcode : 1) } -function read(buf) +function eprint(message) { + # Print MESSAGE to STDERR. + print(message) > STDERR +} + +function eval(ast) +{ + # Evaluate multi-dimensional array AST. + for (w in ast) { + print ast[w] + } +} + +function read(buf, ast) +{ + # Read string BUF into multi-dimensional array AST. + split(buf, b, "") + w = 1 + word = "" + # Tokenize + for (c in b) { + # print c, b[c] + if (b[c] == "\\") { + word = word b[c++] + } else if (b[c] == "(") { + if (word) { + ast[w++] = word + word = "" + } + ast[w++] = "(" nested++ + } else if (b[c] == ")") { + if (word) { + ast[w++] = word + word = "" + } + ast[w++] = ")" --nested + } else if (b[c] ~ /[ \t\r\n\f\v]/) { + if (word) { + ast[w++] = word + word = "" + } + } else { + word = word b[c] + } + if (nested < 0) { + die("Unmatched paren at " c) + } + } } -- cgit 1.4.1-21-gabe81