From cf12497051da39b4c3e0b233de89b05ca1ad9627 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Sat, 23 May 2020 19:46:58 -0500 Subject: Add URL normalizing functions for use later --- bollux | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/bollux b/bollux index b9d1443..50b4a81 100755 --- a/bollux +++ b/bollux @@ -105,7 +105,62 @@ NOT_IMPLEMENTED() { die 200 "NOT IMPLEMENTED!!!"; } NOT_FULLY_IMPLEMENTED() { log 1 "NOT FULLY IMPLEMENTED!!!"; } ### gemini ### -# normalize a gemini address +# url functions +# normalize a path from /../ /./ / +normalize_path() { # normalize_path <<< PATH + gawk '{ + if ($0 == "" || $0 ~ /^\/\/[^\/]/) { + return -1 + } + split($0, path, /\//) + for (c in path) { + if (path[c] == "" || path[c] == ".") { + continue + } + if (path[c] == "..") { + sub(/[^\/]+$/, "", ret) + continue + } + if (! ret || match(ret, /\/$/)) { + slash = "" + } else { + slash = "/" + } + ret = ret slash path[c] + } + print ret + }' +} + +# split a url into the URL array +split_url() { + gawk '{ + if (match($0, /^[A-Za-z]+:/)) { + arr["scheme"] = substr($0, RSTART, RLENGTH) + $0 = substr($0, RLENGTH + 1) + } + if (match($0, /^\/\/[^\/?#]+?/) || (match($0, /^[^\/?#]+?/) && scheme)) { + arr["authority"] = substr($0, RSTART, RLENGTH) + $0 = substr($0, RLENGTH + 1) + } + if (match($0, /^\/?[^?#]+/)) { + arr["path"] = substr($0, RSTART, RLENGTH) + $0 = substr($0, RLENGTH + 1) + } + if (match($0, /^\?[^#]+/)) { + arr["query"] = substr($0, RSTART, RLENGTH) + $0 = substr($0, RLENGTH + 1) + } + if (match($0, /^#.*/)) { + arr["fragment"] = substr($0, RSTART, RLENGTH) + $0 = substr($0, RLENGTH + 1) + } + for (part in arr) { + printf "URL[\"%s\"]=\"%s\"\n", part, arr[part] + } + }' +} + # example.com => gemini://example.com/ _address() { # _address URL addr="$1" -- cgit 1.4.1-21-gabe81