about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2020-06-18 08:24:01 -0500
committerCase Duckworth2020-06-18 08:24:01 -0500
commit1e06e8f5af834b02fdd7fc48413d3380fa93c974 (patch)
tree4a39a99e07185113e8ecd14280816f23c7031e3f
parentHandle unknown protocols better (by not handling them) (diff)
downloadbollux-1e06e8f5af834b02fdd7fc48413d3380fa93c974.tar.gz
bollux-1e06e8f5af834b02fdd7fc48413d3380fa93c974.zip
Change implementation of URL array
-rwxr-xr-xbollux316
1 files changed, 182 insertions, 134 deletions
diff --git a/bollux b/bollux index 8f6b596..db47e9d 100755 --- a/bollux +++ b/bollux
@@ -80,7 +80,7 @@ bollux() {
80 80
81 log d "BOLLUX_URL='$BOLLUX_URL'" 81 log d "BOLLUX_URL='$BOLLUX_URL'"
82 82
83 run blastoff "$BOLLUX_URL" 83 run blastoff -u "$BOLLUX_URL"
84} 84}
85 85
86# process command-line arguments 86# process command-line arguments
@@ -142,6 +142,8 @@ bollux_config() {
142 : "${C_LIST:=0}" # list formatting 142 : "${C_LIST:=0}" # list formatting
143 : "${C_QUOTE:=3}" # quote formatting 143 : "${C_QUOTE:=3}" # quote formatting
144 : "${C_PRE:=0}" # preformatted text formatting 144 : "${C_PRE:=0}" # preformatted text formatting
145 ## state
146 UC_BLANK=':?:'
145} 147}
146 148
147# quit happily 149# quit happily
@@ -170,123 +172,167 @@ prompt() { # prompt [-u] PROMPT [READ_ARGS...]
170 172
171# load a URL 173# load a URL
172blastoff() { # blastoff [-u] URL 174blastoff() { # blastoff [-u] URL
173 local well_formed=true 175 local u
174 local proto url 176
175 if [[ "$1" == "-u" ]]; then 177 if [[ "$1" == "-u" ]]; then
176 well_formed=false 178 u="$(run uwellform "$2")"
177 shift 179 else
180 u="$1"
178 fi 181 fi
179 url="$1"
180 182
181 if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then 183 local -a url
182 url="$(run transform_resource "$BOLLUX_URL" "$1")" 184 run utransform url "$BOLLUX_URL" "$u"
185 if ! ucdef url[1]; then
186 run ucset url[1] "$BOLLUX_PROTO"
183 fi 187 fi
184 [[ "$url" != *://* ]] && url="$BOLLUX_PROTO://$url"
185 url="$(trim_string "$url")"
186 proto="${url%://*}"
187
188 log d "PROTO='$proto' URL='$url'"
189 188
190 { 189 {
191 if declare -Fp "${proto}_request" &>/dev/null; then 190 if declare -Fp "${url[1]}_request" >/dev/null 2>&1; then
192 run "${proto}_request" "$url" 191 run "${url[1]}_request" "$url"
193 else 192 else
194 die 99 "No request handler for '$proto'!" 193 die 99 "No request handler for '${url[1]}'"
195 fi 194 fi
196 } | run normalize | 195 } | run normalize | {
197 { 196 if declare -Fp "${url[1]}_response" >/dev/null 2>&1; then
198 if declare -Fp "${proto}_response" &>/dev/null; then 197 run "${url[1]}_response" "$url"
199 run "${proto}_response" "$url" 198 else
200 else 199 log d "No response handler for '${url[1]}', passing thru"
201 log x "No response handler for '$proto', passing through" 200 passthru
202 passthru 201 fi
203 fi 202 }
204 } 203}
204
205# URLS
206## https://tools.ietf.org/html/rfc3986
207uwellform() {
208 local u="$1"
209
210 if [[ "$u" != *://* ]]; then
211 u="$BOLLUX_PROTO://$u"
212 fi
213
214 u="$(trim_string "$u")"
215
216 printf '%s\n' "$u"
217}
218
219usplit() { # usplit NAME:ARRAY URL:STRING
220 local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
221 [[ $2 =~ $re ]] || return $?
222
223 local scheme="${BASH_REMATCH[2]}"
224 local authority="${BASH_REMATCH[4]}"
225 local path="${BASH_REMATCH[5]}"
226 local query="${BASH_REMATCH[7]}"
227 local fragment="${BASH_REMATCH[9]}"
228
229 # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
230 local i=1 c
231 for c in scheme authority path query fragment; do
232 if [[ "${!c}" || "$c" == path ]]; then
233 printf -v "$1[$i]" '%s' "${!c}"
234 else
235 printf -v "$1[$i]" "$UC_BLANK"
236 fi
237 ((i+=1))
238 done
239 printf -v "$1[0]" "$(ujoin "$1")" # inefficient I'm sure
205} 240}
206 241
207# transform a URI according to RFC 3986 sec 5.2.2 242ujoin() { # ujoin NAME:ARRAY
208transform_resource() { # transform_resource BASE_URL REFERENCE_URL 243 local -n U="$1"
209 local -A R B T # reference, base url, target 244
210 eval "$(run parse_url B "$1")" 245 if ucdef U[1]; then
211 eval "$(run parse_url R "$2")" 246 printf -v U[0] "%s:" "${U[1]}"
212 # A non-strict parser may ignore a scheme in the reference
213 # if it is identical to the base URI's scheme.
214 if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
215 unset "${R[scheme]}"
216 fi 247 fi
217 248
218 # basically pseudo-code from spec ported to bash 249 if ucdef U[2]; then
219 if isdefined "R[scheme]"; then 250 printf -v U[0] "${U[0]}//%s" "${U[2]}"
220 T[scheme]="${R[scheme]}" 251 fi
221 isdefined "R[authority]" && T[authority]="${R[authority]}" 252
222 isdefined R[path] && 253 printf -v U[0] "${U[0]}%s" "${U[3]}"
223 T[path]="$(run remove_dot_segments "${R[path]}")" 254
224 isdefined "R[query]" && T[query]="${R[query]}" 255 if ucdef U[4]; then
256 printf -v U[0] "${U[0]}?%s" "${U[4]}"
257 fi
258
259 if ucdef U[5]; then
260 printf -v U[0] "${U[0]}#%s" "${U[5]}"
261 fi
262
263 log d "${U[0]}"
264}
265
266ucdef() { [[ "${!1}" != "$UC_BLANK" ]]; } # ucdef NAME
267ucblank() { [[ -z "${!1}" ]]; } # ucblank NAME
268ucset() { # ucset NAME VALUE
269 run eval "${1}='$2'"
270 run ujoin "${1/\[*\]}"
271}
272
273utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING
274 local -a B R # base, reference
275 local -n T="$1" # target
276 usplit B "$2"
277 usplit R "$3"
278
279 # initialize T
280 for ((i=1;i<=5;i++)); do
281 T[$i]="$UC_BLANK"
282 done
283
284 # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
285 if ucdef R[1]; then
286 T[1]="${R[1]}"
287 if ucdef R[2]; then
288 T[2]="${R[2]}"
289 fi
290 if ucdef R[3]; then
291 T[3]="$(pundot "${R[3]}")"
292 fi
293 if ucdef R[4]; then
294 T[4]="${R[4]}"
295 fi
225 else 296 else
226 if isdefined "R[authority]"; then 297 if ucdef R[2]; then
227 T[authority]="${R[authority]}" 298 T[2]="${R[2]}"
228 isdefined "R[authority]" && 299 if ucdef R[2]; then
229 T[path]="$(remove_dot_segments "${R[path]}")" 300 T[3]="$(pundot "${R[3]}")"
230 isdefined R[query] && T[query]="${R[query]}" 301 fi
302 if ucdef R[4]; then
303 T[4]="${R[4]}"
304 fi
231 else 305 else
232 if isempty "R[path]"; then 306 if ucblank R[3]; then
233 T[path]="${B[path]}" 307 T[3]="${B[3]}"
234 if isdefined R[query]; then 308 if ucdef R[4]; then
235 T[query]="${R[query]}" 309 T[4]="${R[4]}"
236 else 310 else
237 T[query]="${B[query]}" 311 T[4]="${B[4]}"
238 fi 312 fi
239 else 313 else
240 if [[ "${R[path]}" == /* ]]; then 314 if [[ "${R[3]}" == /* ]]; then
241 T[path]="$(remove_dot_segments "${R[path]}")" 315 T[3]="$(pundot "${R[3]}")"
242 else 316 else
243 T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")" 317 T[3]="$(pmerge B R)"
244 T[path]="$(remove_dot_segments "${T[path]}")" 318 T[3]="$(pundot "${T[3]}")"
319 fi
320 if ucdef R[4]; then
321 T[4]="${R[4]}"
245 fi 322 fi
246 isdefined R[query] && T[query]="${R[query]}"
247 fi 323 fi
248 T[authority]="${B[authority]}" 324 T[2]="${B[2]}"
249 fi 325 fi
250 T[scheme]="${B[scheme]}" 326 T[1]="${B[1]}"
251 fi 327 fi
252 isdefined R[fragment] && T[fragment]="${R[fragment]}" 328 if ucdef R[5]; then
253 # cf. 5.3 -- recomposition 329 T[5]="${R[5]}"
254 local r
255 isdefined "T[scheme]" && r="$r${T[scheme]}:"
256 # remove the port from the authority
257 isdefined "T[authority]" && r="$r//${T[authority]%:*}"
258 r="$r${T[path]}"
259 isdefined T[query] && r="$r?${T[query]}"
260 isdefined T[fragment] && r="$r#${T[fragment]}"
261 printf '%s\n' "$r"
262}
263
264# merge URL paths according to RFC 3986 sec 5.2.3
265merge_paths() { # merge_paths BASE_AUTHORITY BASE_PATH REFERENCE_PATH
266 # shellcheck disable=2034
267 local B_authority="$1"
268 local B_path="$2"
269 local R_path="$3"
270 # if R_path is empty, get rid of // in B_path
271 if [[ -z "$R_path" ]]; then
272 printf '%s\n' "${B_path//\/\//\//}"
273 return
274 fi 330 fi
275 331
276 if isdefined "B_authority" && isempty "B_path"; then 332 ujoin T
277 printf '/%s\n' "${R_path//\/\//\//}"
278 else
279 if [[ "$B_path" == */* ]]; then
280 B_path="${B_path%/*}/"
281 else
282 B_path=""
283 fi
284 printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
285 fi
286} 333}
287 334
288# remove dot segments in paths according to RFC 3986 sec 5.2.4 335pundot() { # pundot PATH:STRING
289remove_dot_segments() { # remove_dot_segments PATH
290 local input="$1" 336 local input="$1"
291 local output 337 local output
292 while [[ "$input" ]]; do 338 while [[ "$input" ]]; do
@@ -301,7 +347,7 @@ remove_dot_segments() { # remove_dot_segments PATH
301 elif [[ "$input" == . || "$input" == .. ]]; then 347 elif [[ "$input" == . || "$input" == .. ]]; then
302 input= 348 input=
303 else 349 else
304 [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || log debug NOMATCH 350 [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1
305 output="$output${BASH_REMATCH[1]}" 351 output="$output${BASH_REMATCH[1]}"
306 input="${BASH_REMATCH[2]}" 352 input="${BASH_REMATCH[2]}"
307 fi 353 fi
@@ -309,36 +355,28 @@ remove_dot_segments() { # remove_dot_segments PATH
309 printf '%s\n' "${output//\/\//\//}" 355 printf '%s\n' "${output//\/\//\//}"
310} 356}
311 357
312# parse a url using the reference regex in RFC 3986 appendix B 358pmerge() {
313parse_url() { # eval "$(split_url NAME STRING)" => NAME[...] 359 local -n b="$1"
314 local name="$1" 360 local -n r="$2"
315 local string="$2"
316 local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
317 [[ $string =~ $re ]] || return $?
318 361
319 local scheme="${BASH_REMATCH[2]}" 362 if ucblank r[3]; then
320 local authority="${BASH_REMATCH[4]}" 363 printf '%s\n' "${b[3]//\/\//\//}"
321 local path="${BASH_REMATCH[5]}" 364 return
322 local query="${BASH_REMATCH[7]}" 365 fi
323 local fragment="${BASH_REMATCH[9]}"
324 366
325 for c in scheme authority query fragment; do 367 if ucdef b[2] && ucblank b[3]; then
326 [[ "${!c}" ]] && 368 printf '/%s\n' "${r[3]//\/\//\//}"
327 run printf '%s[%s]=%q\n' "$name" "$c" "${!c}" 369 else
328 done 370 local bp=""
329 # unclear if the path is always set even if empty but it looks that way 371 if [[ "${b[3]}" == */* ]]; then
330 run printf '%s[path]=%q\n' "$name" "$path" 372 bp="${b[3]%/*}"
373 fi
374 printf '%s/%s\n' "${bp%/}" "${r[3]#/}"
375 fi
331} 376}
332 377
333# is a NAME defined ('set' in bash)?
334isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
335
336# is a NAME defined AND empty?
337isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
338
339# work with URLs
340# https://github.com/dylanaraps/pure-bash-bible/ 378# https://github.com/dylanaraps/pure-bash-bible/
341urlencode() { # urlencode STRING 379uencode() { # uencode URL:STRING
342 local LC_ALL=C 380 local LC_ALL=C
343 for ((i = 0; i < ${#1}; i++)); do 381 for ((i = 0; i < ${#1}; i++)); do
344 : "${1:i:1}" 382 : "${1:i:1}"
@@ -355,7 +393,7 @@ urlencode() { # urlencode STRING
355} 393}
356 394
357# https://github.com/dylanaraps/pure-bash-bible/ 395# https://github.com/dylanaraps/pure-bash-bible/
358urldecode() { # urldecode STRING 396udecode() { # udecode URL:STRING
359 : "${1//+/ }" 397 : "${1//+/ }"
360 printf '%b\n' "${_//%/\\x}" 398 printf '%b\n' "${_//%/\\x}"
361} 399}
@@ -363,19 +401,28 @@ urldecode() { # urldecode STRING
363# GEMINI 401# GEMINI
364# https://gemini.circumlunar.space/docs/specification.html 402# https://gemini.circumlunar.space/docs/specification.html
365gemini_request() { # gemini_request URL 403gemini_request() { # gemini_request URL
366 local url port server 404 local -a url
367 local ssl_cmd 405 usplit url "$1"
368 url="$1" 406
369 port=1965 407 # get rid of userinfo
370 server="${url#*://}" 408 ucset url[2] "${url[2]#*@}"
371 server="${server%%/*}" 409
410 local port
411 if [[ "${url[2]}" == *:* ]]; then
412 port="${url[2]#*:}"
413 ucset url[2] "${url[2]%:*}"
414 else
415 port=1965 # TODO variablize
416 fi
372 417
373 ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port") 418 local ssl_cmd=(
374 ssl_cmd+=(-servername "$server") # SNI 419 openssl s_client
375 # disable old TLS/SSL versions 420 -crlf -quiet -connect "${url[2]}:$port"
376 ssl_cmd+=(-no_ssl3 -no_tls1 -no_tls1_1) 421 -servername "${url[2]}" # SNI
422 -no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions
423 )
377 424
378 run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null 425 run "${ssl_cmd[@]}" <<<"$url"
379} 426}
380 427
381gemini_response() { # gemini_response URL 428gemini_response() { # gemini_response URL
@@ -399,7 +446,7 @@ gemini_response() { # gemini_response URL
399 10) run prompt "$meta" ;; 446 10) run prompt "$meta" ;;
400 11) run prompt "$meta" -s ;; # password input 447 11) run prompt "$meta" -s ;; # password input
401 esac 448 esac
402 run blastoff "?$(urlencode "$REPLY")" 449 run blastoff "?$(uencode "$REPLY")"
403 ;; 450 ;;
404 2*) # OK 451 2*) # OK
405 REDIRECTS=0 452 REDIRECTS=0
@@ -480,7 +527,7 @@ gopher_response() { # gopher_response URL
480 cur_server="${BASH_REMATCH[1]}" 527 cur_server="${BASH_REMATCH[1]}"
481 type="${BASH_REMATCH[6]:-1}" 528 type="${BASH_REMATCH[6]:-1}"
482 529
483 run history_append "$url" "" # TODO: get the title ?? 530 run history_append "$url" "" # gopher doesn't really have titles, huh
484 531
485 log d "TYPE='$type'" 532 log d "TYPE='$type'"
486 533
@@ -618,9 +665,10 @@ display() { # display METADATA [TITLE]
618 set_title "$title${title:+ - }bollux" 665 set_title "$title${title:+ - }bollux"
619 less_cmd=(less -R) # render ANSI color escapes 666 less_cmd=(less -R) # render ANSI color escapes
620 mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY") 667 mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY")
668 local helpline="o:open, g/G:goto, [:back, ]:forward, r:refresh"
621 less_cmd+=( 669 less_cmd+=(
622 -Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line 670 -Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line
623 -P='o\:open, g\:goto, [\:back, ]\:forward, r\:refresh$' # helpline 671 -P="$(less_prompt_escape "$helpline")$" # helpline
624 -m # start with statusline 672 -m # start with statusline
625 +k # float content to the top 673 +k # float content to the top
626 ) 674 )
@@ -910,7 +958,7 @@ handle_keypress() { # handle_keypress CODE
910 run blastoff "$BOLLUX_URL" 958 run blastoff "$BOLLUX_URL"
911 ;; 959 ;;
912 53) # G - goto a url (pre-filled with current) 960 53) # G - goto a url (pre-filled with current)
913 prompt -u GO 961 run prompt -u GO
914 run blastoff -u "$REPLY" 962 run blastoff -u "$REPLY"
915 ;; 963 ;;
916 *) # 54-57 -- still available for binding 964 *) # 54-57 -- still available for binding