diff options
author | Case Duckworth | 2020-06-18 08:24:01 -0500 |
---|---|---|
committer | Case Duckworth | 2020-06-18 08:24:01 -0500 |
commit | 1e06e8f5af834b02fdd7fc48413d3380fa93c974 (patch) | |
tree | 4a39a99e07185113e8ecd14280816f23c7031e3f | |
parent | Handle unknown protocols better (by not handling them) (diff) | |
download | bollux-1e06e8f5af834b02fdd7fc48413d3380fa93c974.tar.gz bollux-1e06e8f5af834b02fdd7fc48413d3380fa93c974.zip |
Change implementation of URL array
-rwxr-xr-x | bollux | 316 |
1 files changed, 182 insertions, 134 deletions
diff --git a/bollux b/bollux index 8f6b596..db47e9d 100755 --- a/bollux +++ b/bollux | |||
@@ -80,7 +80,7 @@ bollux() { | |||
80 | 80 | ||
81 | log d "BOLLUX_URL='$BOLLUX_URL'" | 81 | log d "BOLLUX_URL='$BOLLUX_URL'" |
82 | 82 | ||
83 | run blastoff "$BOLLUX_URL" | 83 | run blastoff -u "$BOLLUX_URL" |
84 | } | 84 | } |
85 | 85 | ||
86 | # process command-line arguments | 86 | # process command-line arguments |
@@ -142,6 +142,8 @@ bollux_config() { | |||
142 | : "${C_LIST:=0}" # list formatting | 142 | : "${C_LIST:=0}" # list formatting |
143 | : "${C_QUOTE:=3}" # quote formatting | 143 | : "${C_QUOTE:=3}" # quote formatting |
144 | : "${C_PRE:=0}" # preformatted text formatting | 144 | : "${C_PRE:=0}" # preformatted text formatting |
145 | ## state | ||
146 | UC_BLANK=':?:' | ||
145 | } | 147 | } |
146 | 148 | ||
147 | # quit happily | 149 | # quit happily |
@@ -170,123 +172,167 @@ prompt() { # prompt [-u] PROMPT [READ_ARGS...] | |||
170 | 172 | ||
171 | # load a URL | 173 | # load a URL |
172 | blastoff() { # blastoff [-u] URL | 174 | blastoff() { # blastoff [-u] URL |
173 | local well_formed=true | 175 | local u |
174 | local proto url | 176 | |
175 | if [[ "$1" == "-u" ]]; then | 177 | if [[ "$1" == "-u" ]]; then |
176 | well_formed=false | 178 | u="$(run uwellform "$2")" |
177 | shift | 179 | else |
180 | u="$1" | ||
178 | fi | 181 | fi |
179 | url="$1" | ||
180 | 182 | ||
181 | if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then | 183 | local -a url |
182 | url="$(run transform_resource "$BOLLUX_URL" "$1")" | 184 | run utransform url "$BOLLUX_URL" "$u" |
185 | if ! ucdef url[1]; then | ||
186 | run ucset url[1] "$BOLLUX_PROTO" | ||
183 | fi | 187 | fi |
184 | [[ "$url" != *://* ]] && url="$BOLLUX_PROTO://$url" | ||
185 | url="$(trim_string "$url")" | ||
186 | proto="${url%://*}" | ||
187 | |||
188 | log d "PROTO='$proto' URL='$url'" | ||
189 | 188 | ||
190 | { | 189 | { |
191 | if declare -Fp "${proto}_request" &>/dev/null; then | 190 | if declare -Fp "${url[1]}_request" >/dev/null 2>&1; then |
192 | run "${proto}_request" "$url" | 191 | run "${url[1]}_request" "$url" |
193 | else | 192 | else |
194 | die 99 "No request handler for '$proto'!" | 193 | die 99 "No request handler for '${url[1]}'" |
195 | fi | 194 | fi |
196 | } | run normalize | | 195 | } | run normalize | { |
197 | { | 196 | if declare -Fp "${url[1]}_response" >/dev/null 2>&1; then |
198 | if declare -Fp "${proto}_response" &>/dev/null; then | 197 | run "${url[1]}_response" "$url" |
199 | run "${proto}_response" "$url" | 198 | else |
200 | else | 199 | log d "No response handler for '${url[1]}', passing thru" |
201 | log x "No response handler for '$proto', passing through" | 200 | passthru |
202 | passthru | 201 | fi |
203 | fi | 202 | } |
204 | } | 203 | } |
204 | |||
205 | # URLS | ||
206 | ## https://tools.ietf.org/html/rfc3986 | ||
207 | uwellform() { | ||
208 | local u="$1" | ||
209 | |||
210 | if [[ "$u" != *://* ]]; then | ||
211 | u="$BOLLUX_PROTO://$u" | ||
212 | fi | ||
213 | |||
214 | u="$(trim_string "$u")" | ||
215 | |||
216 | printf '%s\n' "$u" | ||
217 | } | ||
218 | |||
219 | usplit() { # usplit NAME:ARRAY URL:STRING | ||
220 | local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' | ||
221 | [[ $2 =~ $re ]] || return $? | ||
222 | |||
223 | local scheme="${BASH_REMATCH[2]}" | ||
224 | local authority="${BASH_REMATCH[4]}" | ||
225 | local path="${BASH_REMATCH[5]}" | ||
226 | local query="${BASH_REMATCH[7]}" | ||
227 | local fragment="${BASH_REMATCH[9]}" | ||
228 | |||
229 | # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment | ||
230 | local i=1 c | ||
231 | for c in scheme authority path query fragment; do | ||
232 | if [[ "${!c}" || "$c" == path ]]; then | ||
233 | printf -v "$1[$i]" '%s' "${!c}" | ||
234 | else | ||
235 | printf -v "$1[$i]" "$UC_BLANK" | ||
236 | fi | ||
237 | ((i+=1)) | ||
238 | done | ||
239 | printf -v "$1[0]" "$(ujoin "$1")" # inefficient I'm sure | ||
205 | } | 240 | } |
206 | 241 | ||
207 | # transform a URI according to RFC 3986 sec 5.2.2 | 242 | ujoin() { # ujoin NAME:ARRAY |
208 | transform_resource() { # transform_resource BASE_URL REFERENCE_URL | 243 | local -n U="$1" |
209 | local -A R B T # reference, base url, target | 244 | |
210 | eval "$(run parse_url B "$1")" | 245 | if ucdef U[1]; then |
211 | eval "$(run parse_url R "$2")" | 246 | printf -v U[0] "%s:" "${U[1]}" |
212 | # A non-strict parser may ignore a scheme in the reference | ||
213 | # if it is identical to the base URI's scheme. | ||
214 | if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then | ||
215 | unset "${R[scheme]}" | ||
216 | fi | 247 | fi |
217 | 248 | ||
218 | # basically pseudo-code from spec ported to bash | 249 | if ucdef U[2]; then |
219 | if isdefined "R[scheme]"; then | 250 | printf -v U[0] "${U[0]}//%s" "${U[2]}" |
220 | T[scheme]="${R[scheme]}" | 251 | fi |
221 | isdefined "R[authority]" && T[authority]="${R[authority]}" | 252 | |
222 | isdefined R[path] && | 253 | printf -v U[0] "${U[0]}%s" "${U[3]}" |
223 | T[path]="$(run remove_dot_segments "${R[path]}")" | 254 | |
224 | isdefined "R[query]" && T[query]="${R[query]}" | 255 | if ucdef U[4]; then |
256 | printf -v U[0] "${U[0]}?%s" "${U[4]}" | ||
257 | fi | ||
258 | |||
259 | if ucdef U[5]; then | ||
260 | printf -v U[0] "${U[0]}#%s" "${U[5]}" | ||
261 | fi | ||
262 | |||
263 | log d "${U[0]}" | ||
264 | } | ||
265 | |||
266 | ucdef() { [[ "${!1}" != "$UC_BLANK" ]]; } # ucdef NAME | ||
267 | ucblank() { [[ -z "${!1}" ]]; } # ucblank NAME | ||
268 | ucset() { # ucset NAME VALUE | ||
269 | run eval "${1}='$2'" | ||
270 | run ujoin "${1/\[*\]}" | ||
271 | } | ||
272 | |||
273 | utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING | ||
274 | local -a B R # base, reference | ||
275 | local -n T="$1" # target | ||
276 | usplit B "$2" | ||
277 | usplit R "$3" | ||
278 | |||
279 | # initialize T | ||
280 | for ((i=1;i<=5;i++)); do | ||
281 | T[$i]="$UC_BLANK" | ||
282 | done | ||
283 | |||
284 | # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment | ||
285 | if ucdef R[1]; then | ||
286 | T[1]="${R[1]}" | ||
287 | if ucdef R[2]; then | ||
288 | T[2]="${R[2]}" | ||
289 | fi | ||
290 | if ucdef R[3]; then | ||
291 | T[3]="$(pundot "${R[3]}")" | ||
292 | fi | ||
293 | if ucdef R[4]; then | ||
294 | T[4]="${R[4]}" | ||
295 | fi | ||
225 | else | 296 | else |
226 | if isdefined "R[authority]"; then | 297 | if ucdef R[2]; then |
227 | T[authority]="${R[authority]}" | 298 | T[2]="${R[2]}" |
228 | isdefined "R[authority]" && | 299 | if ucdef R[2]; then |
229 | T[path]="$(remove_dot_segments "${R[path]}")" | 300 | T[3]="$(pundot "${R[3]}")" |
230 | isdefined R[query] && T[query]="${R[query]}" | 301 | fi |
302 | if ucdef R[4]; then | ||
303 | T[4]="${R[4]}" | ||
304 | fi | ||
231 | else | 305 | else |
232 | if isempty "R[path]"; then | 306 | if ucblank R[3]; then |
233 | T[path]="${B[path]}" | 307 | T[3]="${B[3]}" |
234 | if isdefined R[query]; then | 308 | if ucdef R[4]; then |
235 | T[query]="${R[query]}" | 309 | T[4]="${R[4]}" |
236 | else | 310 | else |
237 | T[query]="${B[query]}" | 311 | T[4]="${B[4]}" |
238 | fi | 312 | fi |
239 | else | 313 | else |
240 | if [[ "${R[path]}" == /* ]]; then | 314 | if [[ "${R[3]}" == /* ]]; then |
241 | T[path]="$(remove_dot_segments "${R[path]}")" | 315 | T[3]="$(pundot "${R[3]}")" |
242 | else | 316 | else |
243 | T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")" | 317 | T[3]="$(pmerge B R)" |
244 | T[path]="$(remove_dot_segments "${T[path]}")" | 318 | T[3]="$(pundot "${T[3]}")" |
319 | fi | ||
320 | if ucdef R[4]; then | ||
321 | T[4]="${R[4]}" | ||
245 | fi | 322 | fi |
246 | isdefined R[query] && T[query]="${R[query]}" | ||
247 | fi | 323 | fi |
248 | T[authority]="${B[authority]}" | 324 | T[2]="${B[2]}" |
249 | fi | 325 | fi |
250 | T[scheme]="${B[scheme]}" | 326 | T[1]="${B[1]}" |
251 | fi | 327 | fi |
252 | isdefined R[fragment] && T[fragment]="${R[fragment]}" | 328 | if ucdef R[5]; then |
253 | # cf. 5.3 -- recomposition | 329 | T[5]="${R[5]}" |
254 | local r | ||
255 | isdefined "T[scheme]" && r="$r${T[scheme]}:" | ||
256 | # remove the port from the authority | ||
257 | isdefined "T[authority]" && r="$r//${T[authority]%:*}" | ||
258 | r="$r${T[path]}" | ||
259 | isdefined T[query] && r="$r?${T[query]}" | ||
260 | isdefined T[fragment] && r="$r#${T[fragment]}" | ||
261 | printf '%s\n' "$r" | ||
262 | } | ||
263 | |||
264 | # merge URL paths according to RFC 3986 sec 5.2.3 | ||
265 | merge_paths() { # merge_paths BASE_AUTHORITY BASE_PATH REFERENCE_PATH | ||
266 | # shellcheck disable=2034 | ||
267 | local B_authority="$1" | ||
268 | local B_path="$2" | ||
269 | local R_path="$3" | ||
270 | # if R_path is empty, get rid of // in B_path | ||
271 | if [[ -z "$R_path" ]]; then | ||
272 | printf '%s\n' "${B_path//\/\//\//}" | ||
273 | return | ||
274 | fi | 330 | fi |
275 | 331 | ||
276 | if isdefined "B_authority" && isempty "B_path"; then | 332 | ujoin T |
277 | printf '/%s\n' "${R_path//\/\//\//}" | ||
278 | else | ||
279 | if [[ "$B_path" == */* ]]; then | ||
280 | B_path="${B_path%/*}/" | ||
281 | else | ||
282 | B_path="" | ||
283 | fi | ||
284 | printf '%s/%s\n' "${B_path%/}" "${R_path#/}" | ||
285 | fi | ||
286 | } | 333 | } |
287 | 334 | ||
288 | # remove dot segments in paths according to RFC 3986 sec 5.2.4 | 335 | pundot() { # pundot PATH:STRING |
289 | remove_dot_segments() { # remove_dot_segments PATH | ||
290 | local input="$1" | 336 | local input="$1" |
291 | local output | 337 | local output |
292 | while [[ "$input" ]]; do | 338 | while [[ "$input" ]]; do |
@@ -301,7 +347,7 @@ remove_dot_segments() { # remove_dot_segments PATH | |||
301 | elif [[ "$input" == . || "$input" == .. ]]; then | 347 | elif [[ "$input" == . || "$input" == .. ]]; then |
302 | input= | 348 | input= |
303 | else | 349 | else |
304 | [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || log debug NOMATCH | 350 | [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1 |
305 | output="$output${BASH_REMATCH[1]}" | 351 | output="$output${BASH_REMATCH[1]}" |
306 | input="${BASH_REMATCH[2]}" | 352 | input="${BASH_REMATCH[2]}" |
307 | fi | 353 | fi |
@@ -309,36 +355,28 @@ remove_dot_segments() { # remove_dot_segments PATH | |||
309 | printf '%s\n' "${output//\/\//\//}" | 355 | printf '%s\n' "${output//\/\//\//}" |
310 | } | 356 | } |
311 | 357 | ||
312 | # parse a url using the reference regex in RFC 3986 appendix B | 358 | pmerge() { |
313 | parse_url() { # eval "$(split_url NAME STRING)" => NAME[...] | 359 | local -n b="$1" |
314 | local name="$1" | 360 | local -n r="$2" |
315 | local string="$2" | ||
316 | local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' | ||
317 | [[ $string =~ $re ]] || return $? | ||
318 | 361 | ||
319 | local scheme="${BASH_REMATCH[2]}" | 362 | if ucblank r[3]; then |
320 | local authority="${BASH_REMATCH[4]}" | 363 | printf '%s\n' "${b[3]//\/\//\//}" |
321 | local path="${BASH_REMATCH[5]}" | 364 | return |
322 | local query="${BASH_REMATCH[7]}" | 365 | fi |
323 | local fragment="${BASH_REMATCH[9]}" | ||
324 | 366 | ||
325 | for c in scheme authority query fragment; do | 367 | if ucdef b[2] && ucblank b[3]; then |
326 | [[ "${!c}" ]] && | 368 | printf '/%s\n' "${r[3]//\/\//\//}" |
327 | run printf '%s[%s]=%q\n' "$name" "$c" "${!c}" | 369 | else |
328 | done | 370 | local bp="" |
329 | # unclear if the path is always set even if empty but it looks that way | 371 | if [[ "${b[3]}" == */* ]]; then |
330 | run printf '%s[path]=%q\n' "$name" "$path" | 372 | bp="${b[3]%/*}" |
373 | fi | ||
374 | printf '%s/%s\n' "${bp%/}" "${r[3]#/}" | ||
375 | fi | ||
331 | } | 376 | } |
332 | 377 | ||
333 | # is a NAME defined ('set' in bash)? | ||
334 | isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME | ||
335 | |||
336 | # is a NAME defined AND empty? | ||
337 | isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME | ||
338 | |||
339 | # work with URLs | ||
340 | # https://github.com/dylanaraps/pure-bash-bible/ | 378 | # https://github.com/dylanaraps/pure-bash-bible/ |
341 | urlencode() { # urlencode STRING | 379 | uencode() { # uencode URL:STRING |
342 | local LC_ALL=C | 380 | local LC_ALL=C |
343 | for ((i = 0; i < ${#1}; i++)); do | 381 | for ((i = 0; i < ${#1}; i++)); do |
344 | : "${1:i:1}" | 382 | : "${1:i:1}" |
@@ -355,7 +393,7 @@ urlencode() { # urlencode STRING | |||
355 | } | 393 | } |
356 | 394 | ||
357 | # https://github.com/dylanaraps/pure-bash-bible/ | 395 | # https://github.com/dylanaraps/pure-bash-bible/ |
358 | urldecode() { # urldecode STRING | 396 | udecode() { # udecode URL:STRING |
359 | : "${1//+/ }" | 397 | : "${1//+/ }" |
360 | printf '%b\n' "${_//%/\\x}" | 398 | printf '%b\n' "${_//%/\\x}" |
361 | } | 399 | } |
@@ -363,19 +401,28 @@ urldecode() { # urldecode STRING | |||
363 | # GEMINI | 401 | # GEMINI |
364 | # https://gemini.circumlunar.space/docs/specification.html | 402 | # https://gemini.circumlunar.space/docs/specification.html |
365 | gemini_request() { # gemini_request URL | 403 | gemini_request() { # gemini_request URL |
366 | local url port server | 404 | local -a url |
367 | local ssl_cmd | 405 | usplit url "$1" |
368 | url="$1" | 406 | |
369 | port=1965 | 407 | # get rid of userinfo |
370 | server="${url#*://}" | 408 | ucset url[2] "${url[2]#*@}" |
371 | server="${server%%/*}" | 409 | |
410 | local port | ||
411 | if [[ "${url[2]}" == *:* ]]; then | ||
412 | port="${url[2]#*:}" | ||
413 | ucset url[2] "${url[2]%:*}" | ||
414 | else | ||
415 | port=1965 # TODO variablize | ||
416 | fi | ||
372 | 417 | ||
373 | ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port") | 418 | local ssl_cmd=( |
374 | ssl_cmd+=(-servername "$server") # SNI | 419 | openssl s_client |
375 | # disable old TLS/SSL versions | 420 | -crlf -quiet -connect "${url[2]}:$port" |
376 | ssl_cmd+=(-no_ssl3 -no_tls1 -no_tls1_1) | 421 | -servername "${url[2]}" # SNI |
422 | -no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions | ||
423 | ) | ||
377 | 424 | ||
378 | run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null | 425 | run "${ssl_cmd[@]}" <<<"$url" |
379 | } | 426 | } |
380 | 427 | ||
381 | gemini_response() { # gemini_response URL | 428 | gemini_response() { # gemini_response URL |
@@ -399,7 +446,7 @@ gemini_response() { # gemini_response URL | |||
399 | 10) run prompt "$meta" ;; | 446 | 10) run prompt "$meta" ;; |
400 | 11) run prompt "$meta" -s ;; # password input | 447 | 11) run prompt "$meta" -s ;; # password input |
401 | esac | 448 | esac |
402 | run blastoff "?$(urlencode "$REPLY")" | 449 | run blastoff "?$(uencode "$REPLY")" |
403 | ;; | 450 | ;; |
404 | 2*) # OK | 451 | 2*) # OK |
405 | REDIRECTS=0 | 452 | REDIRECTS=0 |
@@ -480,7 +527,7 @@ gopher_response() { # gopher_response URL | |||
480 | cur_server="${BASH_REMATCH[1]}" | 527 | cur_server="${BASH_REMATCH[1]}" |
481 | type="${BASH_REMATCH[6]:-1}" | 528 | type="${BASH_REMATCH[6]:-1}" |
482 | 529 | ||
483 | run history_append "$url" "" # TODO: get the title ?? | 530 | run history_append "$url" "" # gopher doesn't really have titles, huh |
484 | 531 | ||
485 | log d "TYPE='$type'" | 532 | log d "TYPE='$type'" |
486 | 533 | ||
@@ -618,9 +665,10 @@ display() { # display METADATA [TITLE] | |||
618 | set_title "$title${title:+ - }bollux" | 665 | set_title "$title${title:+ - }bollux" |
619 | less_cmd=(less -R) # render ANSI color escapes | 666 | less_cmd=(less -R) # render ANSI color escapes |
620 | mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY") | 667 | mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY") |
668 | local helpline="o:open, g/G:goto, [:back, ]:forward, r:refresh" | ||
621 | less_cmd+=( | 669 | less_cmd+=( |
622 | -Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line | 670 | -Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line |
623 | -P='o\:open, g\:goto, [\:back, ]\:forward, r\:refresh$' # helpline | 671 | -P="$(less_prompt_escape "$helpline")$" # helpline |
624 | -m # start with statusline | 672 | -m # start with statusline |
625 | +k # float content to the top | 673 | +k # float content to the top |
626 | ) | 674 | ) |
@@ -910,7 +958,7 @@ handle_keypress() { # handle_keypress CODE | |||
910 | run blastoff "$BOLLUX_URL" | 958 | run blastoff "$BOLLUX_URL" |
911 | ;; | 959 | ;; |
912 | 53) # G - goto a url (pre-filled with current) | 960 | 53) # G - goto a url (pre-filled with current) |
913 | prompt -u GO | 961 | run prompt -u GO |
914 | run blastoff -u "$REPLY" | 962 | run blastoff -u "$REPLY" |
915 | ;; | 963 | ;; |
916 | *) # 54-57 -- still available for binding | 964 | *) # 54-57 -- still available for binding |