about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2020-05-28 08:37:40 -0500
committerCase Duckworth2020-05-28 08:37:40 -0500
commit2e6b42e5c1e00d946691a0b40f64be1091338519 (patch)
treef837d1a91228815d31f98e10c3f0c4fdd4a9ee5c
parentChange typesetting (diff)
downloadbollux-2e6b42e5c1e00d946691a0b40f64be1091338519.tar.gz
bollux-2e6b42e5c1e00d946691a0b40f64be1091338519.zip
Start testing transform_uri
-rw-r--r--transform_uri.sh157
1 files changed, 157 insertions, 0 deletions
diff --git a/transform_uri.sh b/transform_uri.sh new file mode 100644 index 0000000..e9c9fc9 --- /dev/null +++ b/transform_uri.sh
@@ -0,0 +1,157 @@
1#!/usr/bin/env bash
2# transform-url
3# cf. https://tools.ietf.org/html/rfc3986#section-5 and
4# cf. https://tools.ietf.org/html/rfc3986#section-5.1
5# cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
6
7# TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
8
9transform_resource() { # 5.2.2
10 declare -A R B T # reference, base url, target
11 eval "$(parse_url R "$2")" # XXX CHANGE
12 eval "$(parse_url B "$1")"
13 # Basically going to follow the pseudocode in the spec.
14 # the '+x' bit after the fields of the arrays tests if they're set
15 if [[ "${R['scheme']+x}" ]]; then
16 T['scheme']="${R['scheme']}"
17 T['authority']="${R['authority']}"
18 T['path']="$(remove_dot_segments "${R['path']}")"
19 T['query']="${R['query']}"
20 else
21 if [[ "${R['authority']+x}" ]]; then
22 T['authority']="${R['authority']}"
23 T['path']="$(remove_dot_segments "${R['path']}")"
24 T['query']="${R['query']}"
25 else
26 if [[ "${R['path']-x}" == "" ]]; then
27 T['path']="${B['path']}"
28 if [[ "${R['query']-x}" ]]; then
29 T['query']="${R['query']}"
30 else
31 T['query']="${B['query']}"
32 fi
33 else
34 if [[ "${R['path']}" == /* ]]; then
35 T['path']="$(remove_dot_segments "${R['path']}")"
36 else
37 T['path']="$(merge "${B['authority']-?}" \
38 "${B['path']}" "${R['path']}")"
39 T['path']="$(remove_dot_segments "${T['path']}")"
40 fi
41 T['query']="${R['query']}"
42 fi
43 T['authority']="${B['authority']}"
44 fi
45 T['scheme']="${B['scheme']}"
46 fi
47 T['fragment']="${R['fragment']}"
48 # 5.3 -- recomposition
49 local r=""
50 [[ "${T['scheme']-x}" ]] &&
51 r="$r${T['scheme']}:"
52 [[ "${T['authority']-x}" ]] &&
53 r="$r//${T['authority']}"
54 r="$r${T['path']}"
55 [[ "${T['query']-x}" ]] &&
56 r="$r?${T['query']}"
57 [[ "${T['fragment']-x}" ]] &&
58 r="$r#${T['fragment']}"
59 printf '%s\n' "$r"
60}
61
62merge() { # 5.2.3
63 #>If the base URI has a defined authority component and an empty
64 #>path, then return a string consisting of "/" concatenated with the
65 #>reference's path; otherwise,
66 #>return a string consisting of the reference's path component
67 #>appended to all but the last segment of the base URI's path (i.e.,
68 #>excluding any characters after the right-most "/" in the base URI
69 #>path, or excluding the entire base URI path if it does not contain
70 #>any "/" characters).
71 B_authority="$1" # if ? is here, it means undefined (see caller)
72 B_path="$2"
73 R_path="$3"
74 if [[ -z "$R_path" ]]; then
75 printf '%q\n' "$B_path" |
76 sed 's,//,/,g' # XXX is this okay....?
77 return
78 fi
79
80 if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
81 printf '/%q\n' "$R_path"
82 else
83 if [[ "$B_path" == */* ]]; then
84 B_path="${B_path%/*}/"
85 else
86 B_path=""
87 fi
88 printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
89 fi
90}
91
92# I can probably just use normalize_path already in bollux here
93remove_dot_segments() { # 5.2.4
94 local input="$1"
95 local output=
96 while [[ -n "$input" ]]; do
97 if [[ "$input" == ../* || "$input" == ./* ]]; then
98 input="${input#*/}"
99 elif [[ "$input" == /./* ]]; then
100 input="${input#/./}/"
101 elif [[ "$input" == /.* ]]; then
102 input="${input#/.}/b"
103 elif [[ "$input" == /../* ]]; then
104 input="${input#/../}/c"
105 output="${output%/*}"
106 elif [[ "$input" == /..* ]]; then
107 input="${input#/..}/d"
108 output="${output%/*}"
109 elif [[ "$input" == . || "$input" == .. ]]; then
110 input=
111 else
112 # move the first path segment in the input buffer to the end of
113 # the output buffer, including the initial "/" character (if
114 # any) and any subsequent characters up to, but not including,
115 # the next "/" character or the end of the input buffer.
116 [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
117 output="$output${BASH_REMATCH[1]}"
118 input="${BASH_REMATCH[2]}"
119 fi
120 done
121 printf '%s\n' "$output" |
122 sed 's,//,/,g' # XXX is this okay....?
123}
124
125# *FINDING* URLS ... IN PURE BASH !!!
126parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
127 local name="$1"
128 local string="$2"
129 local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
130 [[ $string =~ $re ]] || return $?
131
132 local scheme="${BASH_REMATCH[2]}"
133 local authority="${BASH_REMATCH[4]}"
134 local path="${BASH_REMATCH[5]}"
135 local query="${BASH_REMATCH[7]}"
136 local fragment="${BASH_REMATCH[9]}"
137
138 for c in scheme authority path query fragment; do
139 [[ "${!c}" ]] &&
140 printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
141 sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
142 done
143}
144
145# ease-of-life functions
146isdefined() { # isdefined NAME => tests if NAME is defined ONLY
147 [[ "${!1+x}" ]]
148}
149isempty() { # isempty NAME => tests if NAME is empty ONLY
150 [[ ! "${!1-x}" ]]
151}
152
153set -x
154transform_resource "$@"
155
156# NEXT ....
157# NORMALIZATION !!!