about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2020-05-30 13:38:18 -0500
committerCase Duckworth2020-05-30 13:38:18 -0500
commitc62e428c168c151ac852df6f578e3349d4c831e3 (patch)
treed7e2caad96d48ac1d878ed2b4437306250dd0804
parentBackup (diff)
downloadbollux-c62e428c168c151ac852df6f578e3349d4c831e3.tar.gz
bollux-c62e428c168c151ac852df6f578e3349d4c831e3.zip
Bring URL transforming up to spec
URL transformation (formerly called "munging") is now (afaict) fully
compliant with the RFC spec. It's also implemented in pure bash and is
available as a library at https://git.sr.ht/~acdw/shurlie.
-rwxr-xr-xbollux198
1 files changed, 118 insertions, 80 deletions
diff --git a/bollux b/bollux index a74db7d..0686c21 100755 --- a/bollux +++ b/bollux
@@ -122,7 +122,7 @@ blastoff() { # load a url
122 URL="$1" 122 URL="$1"
123 123
124 if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then 124 if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
125 URL="$(run munge_url "$1" "$BOLLUX_URL")" 125 URL="$(run transform_resource "$BOLLUX_URL" "$1")"
126 fi 126 fi
127 [[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL" 127 [[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
128 URL="$(trim <<<"$URL")" 128 URL="$(trim <<<"$URL")"
@@ -134,95 +134,133 @@ blastoff() { # load a url
134 run handle_response "$URL" 134 run handle_response "$URL"
135} 135}
136 136
137munge_url() { 137transform_resource() { # transform_resource BASE_URL REFERENCE_URL
138 local -A new old u 138 declare -A R B T # reference, base url, target
139 eval "$(split_url new <<<"$1")" 139 eval "$(parse_url B "$1")"
140 for k in "${!new[@]}"; do log d "new[$k]=${new[$k]}"; done 140 eval "$(parse_url R "$2")"
141 eval "$(split_url old <<<"$2")" 141 # A non-strict parser may ignore a scheme in the reference
142 for k in "${!old[@]}"; do log d "old[$k]=${old[$k]}"; done 142 # if it is identical to the base URI's scheme.
143 143 if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
144 u['scheme']="${new['scheme']:-${old['scheme']:-}}" 144 unset "${R[scheme]}"
145 u['authority']="${new['authority']:-${old['authority']:-}}" 145 fi
146 # XXX this whole path thing is wack 146
147 if [[ "${new['path']+isset}" ]]; then 147 # basically pseudo-code from spec ported to bash
148 log d 'new path set' 148 if isdefined "R[scheme]"; then
149 if [[ "${new['path']}" == /* ]]; then 149 T[scheme]="${R[scheme]}"
150 log d 'new path == /*' 150 isdefined "R[authority]" && T[authority]="${R[authority]}"
151 u['path']="${new['path']}" 151 isdefined R[path] &&
152 elif [[ "${new['authority']}" == "${old['authority']}" || ! "${new['authority']+isset}" ]]; then 152 T[path]="$(remove_dot_segments "${R[path]}")"
153 p="${old['path']:-}/${new['path']}" 153 isdefined "R[query]" && T[query]="${R[query]}"
154 log d "$p ( $(normalize_path <<<"$p") )" 154 else
155 u['path']="$(normalize_path <<<"$p")" 155 if isdefined "R[authority]"; then
156 T[authority]="${R[authority]}"
157 isdefined "R[authority]" &&
158 T[path]="$(remove_dot_segments "${R[path]}")"
159 isdefined R[query] && T[query]="${R[query]}"
156 else 160 else
157 log d 'u path = new path' 161 if isempty "R[path]"; then
158 u['path']="${new['path']}" 162 T[path]="${B[path]}"
163 if isdefined R[query]; then
164 T[query]="${R[query]}"
165 else
166 T[query]="${B[query]}"
167 fi
168 else
169 if [[ "${R[path]}" == /* ]]; then
170 T[path]="$(remove_dot_segments "${R[path]}")"
171 else
172 T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
173 T[path]="$(remove_dot_segments "${T[path]}")"
174 fi
175 isdefined R[query] && T[query]="${R[query]}"
176 fi
177 T[authority]="${B[authority]}"
159 fi 178 fi
160 elif [[ "${new['query']+isset}" || "${new['fragment']+isset}" ]]; then 179 T[scheme]="${B[scheme]}"
161 log d 'u path = old path' 180 fi
162 u['path']="${old['path']}" 181 isdefined R[fragment] && T[fragment]="${R[fragment]}"
163 else 182 # cf. 5.3 -- recomposition
164 u['path']="/" 183 local r=""
184 isdefined "T[scheme]" && r="$r${T[scheme]}:"
185 isdefined "T[authority]" && r="$r//${T[authority]}"
186 r="$r${T[path]}"
187 isdefined T[query] && r="$r?${T[query]}"
188 isdefined T[fragment] && r="$r#${T[fragment]}"
189 printf '%s\n' "$r"
190}
191
192merge_paths() { # 5.2.3
193 # shellcheck disable=2034
194 B_authority="$1"
195 B_path="$2"
196 R_path="$3"
197 # if R_path is empty, get rid of // in B_path
198 if [[ -z "$R_path" ]]; then
199 printf '%s\n' "${B_path//\/\//\//}"
200 return
165 fi 201 fi
166 u['query']="${new['query']:-}"
167 u['fragment']="${new['fragment']:-}"
168 for k in "${!u[@]}"; do log d "u[$k]=${u[$k]}"; done
169 202
170 run printf '%s%s%s%s%s\n' \ 203 if isdefined "B_authority" && isempty "B_path"; then
171 "${u['scheme']}" "${u['authority']}" "${u['path']}" \ 204 printf '/%s\n' "${R_path//\/\//\//}"
172 "${u['query']}" "${u['fragment']}" 205 else
206 if [[ "$B_path" == */* ]]; then
207 B_path="${B_path%/*}/"
208 else
209 B_path=""
210 fi
211 printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
212 fi
173} 213}
174 214
175normalize_path() { 215remove_dot_segments() { # 5.2.4
176 gawk '{ 216 local input="$1"
177 split($0, path, /\//) 217 local output=
178 for (c in path) { 218 # ^/\.(/|$) - BASH_REMATCH[0]
179 if (path[c] == "" || path[c] == ".") { 219 while [[ "$input" ]]; do
180 continue 220 if [[ "$input" =~ ^\.\.?/ ]]; then
181 } 221 input="${input#${BASH_REMATCH[0]}}"
182 if (path[c] == "..") { 222 elif [[ "$input" =~ ^/\.(/|$) ]]; then
183 sub(/[^\/]+$/, "", ret) 223 input="/${input#${BASH_REMATCH[0]}}"
184 continue 224 elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
185 } 225 input="/${input#${BASH_REMATCH[0]}}"
186 if (! ret || match(ret, /\/$/)) { 226 [[ "$output" =~ /?[^/]+$ ]]
187 slash = "" 227 output="${output%${BASH_REMATCH[0]}}"
188 } else { 228 elif [[ "$input" == . || "$input" == .. ]]; then
189 slash = "/" 229 input=
190 } 230 else
191 ret = ret slash path[c] 231 [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
192 } 232 output="$output${BASH_REMATCH[1]}"
193 print (ret ~ /^\// ? "" : "/") ret 233 input="${BASH_REMATCH[2]}"
194 }' 234 fi
235 done
236 printf '%s\n' "${output//\/\//\//}"
195} 237}
196 238
197split_url() { 239parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
198 gawk -vvar="$1" '{ 240 local name="$1"
199 if (match($0, /^[A-Za-z]+:/)) { 241 local string="$2"
200 arr["scheme"] = substr($0, RSTART, RLENGTH) 242 local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
201 $0 = substr($0, RLENGTH + 1) 243 [[ $string =~ $re ]] || return $?
202 } 244
203 if (match($0, /^\/\/[^\/?#]+?/) || (match($0, /^[^\/?#]+?/) && scheme)) { 245 local scheme="${BASH_REMATCH[2]}"
204 arr["authority"] = substr($0, RSTART, RLENGTH) 246 local authority="${BASH_REMATCH[4]}"
205 $0 = substr($0, RLENGTH + 1) 247 local path="${BASH_REMATCH[5]}"
206 } 248 local query="${BASH_REMATCH[7]}"
207 if (match($0, /^\/?[^?#]+/)) { 249 local fragment="${BASH_REMATCH[9]}"
208 arr["path"] = substr($0, RSTART, RLENGTH) 250
209 $0 = substr($0, RLENGTH + 1) 251 for c in scheme authority query fragment; do
210 } 252 [[ "${!c}" ]] &&
211 if (match($0, /^\?[^#]+/)) { 253 printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
212 arr["query"] = substr($0, RSTART, RLENGTH) 254 done
213 $0 = substr($0, RLENGTH + 1) 255 # unclear if the path is always set even if empty but it looks that way
214 } 256 printf '%s[path]=%q\n' "$name" "$path"
215 if (match($0, /^#.*/)) {
216 arr["fragment"] = substr($0, RSTART, RLENGTH)
217 $0 = substr($0, RLENGTH + 1)
218 }
219 for (part in arr) {
220 sub(/[[:space:]]+$/, "", arr[part])
221 printf var "[\"%s\"]=\"%s\"\n", part, arr[part]
222 }
223 }'
224} 257}
225 258
259# is a NAME defined ('set' in bash)?
260isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
261# is a NAME defined AND empty?
262isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
263
226request_url() { 264request_url() {
227 local server="$1" 265 local server="$1"
228 local port="$2" 266 local port="$2"