From 1e06e8f5af834b02fdd7fc48413d3380fa93c974 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Thu, 18 Jun 2020 08:24:01 -0500 Subject: Change implementation of URL array --- bollux | 316 +++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 182 insertions(+), 134 deletions(-) diff --git a/bollux b/bollux index 8f6b596..db47e9d 100755 --- a/bollux +++ b/bollux @@ -80,7 +80,7 @@ bollux() { log d "BOLLUX_URL='$BOLLUX_URL'" - run blastoff "$BOLLUX_URL" + run blastoff -u "$BOLLUX_URL" } # process command-line arguments @@ -142,6 +142,8 @@ bollux_config() { : "${C_LIST:=0}" # list formatting : "${C_QUOTE:=3}" # quote formatting : "${C_PRE:=0}" # preformatted text formatting + ## state + UC_BLANK=':?:' } # quit happily @@ -170,123 +172,167 @@ prompt() { # prompt [-u] PROMPT [READ_ARGS...] # load a URL blastoff() { # blastoff [-u] URL - local well_formed=true - local proto url + local u + if [[ "$1" == "-u" ]]; then - well_formed=false - shift + u="$(run uwellform "$2")" + else + u="$1" fi - url="$1" - if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then - url="$(run transform_resource "$BOLLUX_URL" "$1")" + local -a url + run utransform url "$BOLLUX_URL" "$u" + if ! ucdef url[1]; then + run ucset url[1] "$BOLLUX_PROTO" fi - [[ "$url" != *://* ]] && url="$BOLLUX_PROTO://$url" - url="$(trim_string "$url")" - proto="${url%://*}" - - log d "PROTO='$proto' URL='$url'" { - if declare -Fp "${proto}_request" &>/dev/null; then - run "${proto}_request" "$url" + if declare -Fp "${url[1]}_request" >/dev/null 2>&1; then + run "${url[1]}_request" "$url" else - die 99 "No request handler for '$proto'!" + die 99 "No request handler for '${url[1]}'" fi - } | run normalize | - { - if declare -Fp "${proto}_response" &>/dev/null; then - run "${proto}_response" "$url" - else - log x "No response handler for '$proto', passing through" - passthru - fi - } + } | run normalize | { + if declare -Fp "${url[1]}_response" >/dev/null 2>&1; then + run "${url[1]}_response" "$url" + else + log d "No response handler for '${url[1]}', passing thru" + passthru + fi + } +} + +# URLS +## https://tools.ietf.org/html/rfc3986 +uwellform() { + local u="$1" + + if [[ "$u" != *://* ]]; then + u="$BOLLUX_PROTO://$u" + fi + + u="$(trim_string "$u")" + + printf '%s\n' "$u" +} + +usplit() { # usplit NAME:ARRAY URL:STRING + local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' + [[ $2 =~ $re ]] || return $? + + local scheme="${BASH_REMATCH[2]}" + local authority="${BASH_REMATCH[4]}" + local path="${BASH_REMATCH[5]}" + local query="${BASH_REMATCH[7]}" + local fragment="${BASH_REMATCH[9]}" + + # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment + local i=1 c + for c in scheme authority path query fragment; do + if [[ "${!c}" || "$c" == path ]]; then + printf -v "$1[$i]" '%s' "${!c}" + else + printf -v "$1[$i]" "$UC_BLANK" + fi + ((i+=1)) + done + printf -v "$1[0]" "$(ujoin "$1")" # inefficient I'm sure } -# transform a URI according to RFC 3986 sec 5.2.2 -transform_resource() { # transform_resource BASE_URL REFERENCE_URL - local -A R B T # reference, base url, target - eval "$(run parse_url B "$1")" - eval "$(run parse_url R "$2")" - # A non-strict parser may ignore a scheme in the reference - # if it is identical to the base URI's scheme. - if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then - unset "${R[scheme]}" +ujoin() { # ujoin NAME:ARRAY + local -n U="$1" + + if ucdef U[1]; then + printf -v U[0] "%s:" "${U[1]}" fi - # basically pseudo-code from spec ported to bash - if isdefined "R[scheme]"; then - T[scheme]="${R[scheme]}" - isdefined "R[authority]" && T[authority]="${R[authority]}" - isdefined R[path] && - T[path]="$(run remove_dot_segments "${R[path]}")" - isdefined "R[query]" && T[query]="${R[query]}" + if ucdef U[2]; then + printf -v U[0] "${U[0]}//%s" "${U[2]}" + fi + + printf -v U[0] "${U[0]}%s" "${U[3]}" + + if ucdef U[4]; then + printf -v U[0] "${U[0]}?%s" "${U[4]}" + fi + + if ucdef U[5]; then + printf -v U[0] "${U[0]}#%s" "${U[5]}" + fi + + log d "${U[0]}" +} + +ucdef() { [[ "${!1}" != "$UC_BLANK" ]]; } # ucdef NAME +ucblank() { [[ -z "${!1}" ]]; } # ucblank NAME +ucset() { # ucset NAME VALUE + run eval "${1}='$2'" + run ujoin "${1/\[*\]}" +} + +utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING + local -a B R # base, reference + local -n T="$1" # target + usplit B "$2" + usplit R "$3" + + # initialize T + for ((i=1;i<=5;i++)); do + T[$i]="$UC_BLANK" + done + + # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment + if ucdef R[1]; then + T[1]="${R[1]}" + if ucdef R[2]; then + T[2]="${R[2]}" + fi + if ucdef R[3]; then + T[3]="$(pundot "${R[3]}")" + fi + if ucdef R[4]; then + T[4]="${R[4]}" + fi else - if isdefined "R[authority]"; then - T[authority]="${R[authority]}" - isdefined "R[authority]" && - T[path]="$(remove_dot_segments "${R[path]}")" - isdefined R[query] && T[query]="${R[query]}" + if ucdef R[2]; then + T[2]="${R[2]}" + if ucdef R[2]; then + T[3]="$(pundot "${R[3]}")" + fi + if ucdef R[4]; then + T[4]="${R[4]}" + fi else - if isempty "R[path]"; then - T[path]="${B[path]}" - if isdefined R[query]; then - T[query]="${R[query]}" + if ucblank R[3]; then + T[3]="${B[3]}" + if ucdef R[4]; then + T[4]="${R[4]}" else - T[query]="${B[query]}" + T[4]="${B[4]}" fi else - if [[ "${R[path]}" == /* ]]; then - T[path]="$(remove_dot_segments "${R[path]}")" + if [[ "${R[3]}" == /* ]]; then + T[3]="$(pundot "${R[3]}")" else - T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")" - T[path]="$(remove_dot_segments "${T[path]}")" + T[3]="$(pmerge B R)" + T[3]="$(pundot "${T[3]}")" + fi + if ucdef R[4]; then + T[4]="${R[4]}" fi - isdefined R[query] && T[query]="${R[query]}" fi - T[authority]="${B[authority]}" + T[2]="${B[2]}" fi - T[scheme]="${B[scheme]}" + T[1]="${B[1]}" fi - isdefined R[fragment] && T[fragment]="${R[fragment]}" - # cf. 5.3 -- recomposition - local r - isdefined "T[scheme]" && r="$r${T[scheme]}:" - # remove the port from the authority - isdefined "T[authority]" && r="$r//${T[authority]%:*}" - r="$r${T[path]}" - isdefined T[query] && r="$r?${T[query]}" - isdefined T[fragment] && r="$r#${T[fragment]}" - printf '%s\n' "$r" -} - -# merge URL paths according to RFC 3986 sec 5.2.3 -merge_paths() { # merge_paths BASE_AUTHORITY BASE_PATH REFERENCE_PATH - # shellcheck disable=2034 - local B_authority="$1" - local B_path="$2" - local R_path="$3" - # if R_path is empty, get rid of // in B_path - if [[ -z "$R_path" ]]; then - printf '%s\n' "${B_path//\/\//\//}" - return + if ucdef R[5]; then + T[5]="${R[5]}" fi - if isdefined "B_authority" && isempty "B_path"; then - printf '/%s\n' "${R_path//\/\//\//}" - else - if [[ "$B_path" == */* ]]; then - B_path="${B_path%/*}/" - else - B_path="" - fi - printf '%s/%s\n' "${B_path%/}" "${R_path#/}" - fi + ujoin T } -# remove dot segments in paths according to RFC 3986 sec 5.2.4 -remove_dot_segments() { # remove_dot_segments PATH +pundot() { # pundot PATH:STRING local input="$1" local output while [[ "$input" ]]; do @@ -301,7 +347,7 @@ remove_dot_segments() { # remove_dot_segments PATH elif [[ "$input" == . || "$input" == .. ]]; then input= else - [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || log debug NOMATCH + [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1 output="$output${BASH_REMATCH[1]}" input="${BASH_REMATCH[2]}" fi @@ -309,36 +355,28 @@ remove_dot_segments() { # remove_dot_segments PATH printf '%s\n' "${output//\/\//\//}" } -# parse a url using the reference regex in RFC 3986 appendix B -parse_url() { # eval "$(split_url NAME STRING)" => NAME[...] - local name="$1" - local string="$2" - local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' - [[ $string =~ $re ]] || return $? +pmerge() { + local -n b="$1" + local -n r="$2" - local scheme="${BASH_REMATCH[2]}" - local authority="${BASH_REMATCH[4]}" - local path="${BASH_REMATCH[5]}" - local query="${BASH_REMATCH[7]}" - local fragment="${BASH_REMATCH[9]}" + if ucblank r[3]; then + printf '%s\n' "${b[3]//\/\//\//}" + return + fi - for c in scheme authority query fragment; do - [[ "${!c}" ]] && - run printf '%s[%s]=%q\n' "$name" "$c" "${!c}" - done - # unclear if the path is always set even if empty but it looks that way - run printf '%s[path]=%q\n' "$name" "$path" + if ucdef b[2] && ucblank b[3]; then + printf '/%s\n' "${r[3]//\/\//\//}" + else + local bp="" + if [[ "${b[3]}" == */* ]]; then + bp="${b[3]%/*}" + fi + printf '%s/%s\n' "${bp%/}" "${r[3]#/}" + fi } -# is a NAME defined ('set' in bash)? -isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME - -# is a NAME defined AND empty? -isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME - -# work with URLs # https://github.com/dylanaraps/pure-bash-bible/ -urlencode() { # urlencode STRING +uencode() { # uencode URL:STRING local LC_ALL=C for ((i = 0; i < ${#1}; i++)); do : "${1:i:1}" @@ -355,7 +393,7 @@ urlencode() { # urlencode STRING } # https://github.com/dylanaraps/pure-bash-bible/ -urldecode() { # urldecode STRING +udecode() { # udecode URL:STRING : "${1//+/ }" printf '%b\n' "${_//%/\\x}" } @@ -363,19 +401,28 @@ urldecode() { # urldecode STRING # GEMINI # https://gemini.circumlunar.space/docs/specification.html gemini_request() { # gemini_request URL - local url port server - local ssl_cmd - url="$1" - port=1965 - server="${url#*://}" - server="${server%%/*}" + local -a url + usplit url "$1" + + # get rid of userinfo + ucset url[2] "${url[2]#*@}" + + local port + if [[ "${url[2]}" == *:* ]]; then + port="${url[2]#*:}" + ucset url[2] "${url[2]%:*}" + else + port=1965 # TODO variablize + fi - ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port") - ssl_cmd+=(-servername "$server") # SNI - # disable old TLS/SSL versions - ssl_cmd+=(-no_ssl3 -no_tls1 -no_tls1_1) + local ssl_cmd=( + openssl s_client + -crlf -quiet -connect "${url[2]}:$port" + -servername "${url[2]}" # SNI + -no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions + ) - run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null + run "${ssl_cmd[@]}" <<<"$url" } gemini_response() { # gemini_response URL @@ -399,7 +446,7 @@ gemini_response() { # gemini_response URL 10) run prompt "$meta" ;; 11) run prompt "$meta" -s ;; # password input esac - run blastoff "?$(urlencode "$REPLY")" + run blastoff "?$(uencode "$REPLY")" ;; 2*) # OK REDIRECTS=0 @@ -480,7 +527,7 @@ gopher_response() { # gopher_response URL cur_server="${BASH_REMATCH[1]}" type="${BASH_REMATCH[6]:-1}" - run history_append "$url" "" # TODO: get the title ?? + run history_append "$url" "" # gopher doesn't really have titles, huh log d "TYPE='$type'" @@ -618,9 +665,10 @@ display() { # display METADATA [TITLE] set_title "$title${title:+ - }bollux" less_cmd=(less -R) # render ANSI color escapes mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY") + local helpline="o:open, g/G:goto, [:back, ]:forward, r:refresh" less_cmd+=( -Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line - -P='o\:open, g\:goto, [\:back, ]\:forward, r\:refresh$' # helpline + -P="$(less_prompt_escape "$helpline")$" # helpline -m # start with statusline +k # float content to the top ) @@ -910,7 +958,7 @@ handle_keypress() { # handle_keypress CODE run blastoff "$BOLLUX_URL" ;; 53) # G - goto a url (pre-filled with current) - prompt -u GO + run prompt -u GO run blastoff -u "$REPLY" ;; *) # 54-57 -- still available for binding -- cgit 1.4.1-21-gabe81