From 2e6b42e5c1e00d946691a0b40f64be1091338519 Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Thu, 28 May 2020 08:37:40 -0500
Subject: Start testing transform_uri

---
 transform_uri.sh | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 transform_uri.sh

diff --git a/transform_uri.sh b/transform_uri.sh
new file mode 100644
index 0000000..e9c9fc9
--- /dev/null
+++ b/transform_uri.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# transform-url
+# cf. https://tools.ietf.org/html/rfc3986#section-5 and
+# cf. https://tools.ietf.org/html/rfc3986#section-5.1
+# cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
+
+# TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
+
+transform_resource() { # 5.2.2
+	declare -A R B T           # reference, base url, target
+	eval "$(parse_url R "$2")" # XXX CHANGE
+	eval "$(parse_url B "$1")"
+	# Basically going to follow the pseudocode in the spec.
+	# the '+x' bit after the fields of the arrays tests if they're set
+	if [[ "${R['scheme']+x}" ]]; then
+		T['scheme']="${R['scheme']}"
+		T['authority']="${R['authority']}"
+		T['path']="$(remove_dot_segments "${R['path']}")"
+		T['query']="${R['query']}"
+	else
+		if [[ "${R['authority']+x}" ]]; then
+			T['authority']="${R['authority']}"
+			T['path']="$(remove_dot_segments "${R['path']}")"
+			T['query']="${R['query']}"
+		else
+			if [[ "${R['path']-x}" == "" ]]; then
+				T['path']="${B['path']}"
+				if [[ "${R['query']-x}" ]]; then
+					T['query']="${R['query']}"
+				else
+					T['query']="${B['query']}"
+				fi
+			else
+				if [[ "${R['path']}" == /* ]]; then
+					T['path']="$(remove_dot_segments "${R['path']}")"
+				else
+					T['path']="$(merge "${B['authority']-?}" \
+						"${B['path']}" "${R['path']}")"
+					T['path']="$(remove_dot_segments "${T['path']}")"
+				fi
+				T['query']="${R['query']}"
+			fi
+			T['authority']="${B['authority']}"
+		fi
+		T['scheme']="${B['scheme']}"
+	fi
+	T['fragment']="${R['fragment']}"
+	# 5.3 -- recomposition
+	local r=""
+	[[ "${T['scheme']-x}" ]] &&
+		r="$r${T['scheme']}:"
+	[[ "${T['authority']-x}" ]] &&
+		r="$r//${T['authority']}"
+	r="$r${T['path']}"
+	[[ "${T['query']-x}" ]] &&
+		r="$r?${T['query']}"
+	[[ "${T['fragment']-x}" ]] &&
+		r="$r#${T['fragment']}"
+	printf '%s\n' "$r"
+}
+
+merge() { # 5.2.3
+	#>If the base URI has a defined authority component and an empty
+	#>path, then return a string consisting of "/" concatenated with the
+	#>reference's path; otherwise,
+	#>return a string consisting of the reference's path component
+	#>appended to all but the last segment of the base URI's path (i.e.,
+	#>excluding any characters after the right-most "/" in the base URI
+	#>path, or excluding the entire base URI path if it does not contain
+	#>any "/" characters).
+	B_authority="$1" # if ? is here, it means undefined (see caller)
+	B_path="$2"
+	R_path="$3"
+	if [[ -z "$R_path" ]]; then
+		printf '%q\n' "$B_path" |
+			sed 's,//,/,g' # XXX is this okay....?
+		return
+	fi
+
+	if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
+		printf '/%q\n' "$R_path"
+	else
+		if [[ "$B_path" == */* ]]; then
+			B_path="${B_path%/*}/"
+		else
+			B_path=""
+		fi
+		printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
+	fi
+}
+
+# I can probably just use normalize_path already in bollux here
+remove_dot_segments() { # 5.2.4
+	local input="$1"
+	local output=
+	while [[ -n "$input" ]]; do
+		if [[ "$input" == ../* || "$input" == ./* ]]; then
+			input="${input#*/}"
+		elif [[ "$input" == /./* ]]; then
+			input="${input#/./}/"
+		elif [[ "$input" == /.* ]]; then
+			input="${input#/.}/b"
+		elif [[ "$input" == /../* ]]; then
+			input="${input#/../}/c"
+			output="${output%/*}"
+		elif [[ "$input" == /..* ]]; then
+			input="${input#/..}/d"
+			output="${output%/*}"
+		elif [[ "$input" == . || "$input" == .. ]]; then
+			input=
+		else
+			# move the first path segment in the input buffer to the end of
+			# the output buffer, including the initial "/" character (if
+			# any) and any subsequent characters up to, but not including,
+			# the next "/" character or the end of the input buffer.
+			[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
+			output="$output${BASH_REMATCH[1]}"
+			input="${BASH_REMATCH[2]}"
+		fi
+	done
+	printf '%s\n' "$output" |
+		sed 's,//,/,g' # XXX is this okay....?
+}
+
+# *FINDING* URLS ... IN PURE BASH !!!
+parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
+	local name="$1"
+	local string="$2"
+	local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
+	[[ $string =~ $re ]] || return $?
+
+	local scheme="${BASH_REMATCH[2]}"
+	local authority="${BASH_REMATCH[4]}"
+	local path="${BASH_REMATCH[5]}"
+	local query="${BASH_REMATCH[7]}"
+	local fragment="${BASH_REMATCH[9]}"
+
+	for c in scheme authority path query fragment; do
+		[[ "${!c}" ]] &&
+			printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
+			sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
+	done
+}
+
+# ease-of-life functions
+isdefined() { # isdefined NAME => tests if NAME is defined ONLY
+	[[ "${!1+x}" ]]
+}
+isempty() { # isempty NAME => tests if NAME is empty ONLY
+	[[ ! "${!1-x}" ]]
+}
+
+set -x
+transform_resource "$@"
+
+# NEXT ....
+# NORMALIZATION !!!
-- 
cgit 1.4.1-21-gabe81