From 183617a85d7e0fee7ff0d3489a2bd86cf7916420 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Thu, 4 Mar 2021 17:49:18 -0600 Subject: Fix a bug with name collision --- bollux | 759 ++++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 447 insertions(+), 312 deletions(-) diff --git a/bollux b/bollux index 2de37ab..ebdb22f 100755 --- a/bollux +++ b/bollux @@ -1,8 +1,9 @@ #!/usr/bin/env bash -# bollux: a bash gemini client +################################################################################ +# BOLLUX: a bash gemini client # Author: Case Duckworth # License: MIT -# Version: 0.4.0 +# Version: 0.4.1 # # Commentary: # @@ -46,6 +47,7 @@ # [9]: OpenSSL `s_client' online manual # https://www.openssl.org/docs/manmaster/man1/openssl-s_client.html # +################################################################################ # Code: # Program information @@ -62,139 +64,13 @@ usage: flags: -h show this help and exit -q be quiet: log no messages - -v verbose: log more messages + -v be verbose: log more messages parameters: URL the URL to start in If not provided, the user will be prompted. END } -# UTILITY FUNCTIONS ############################################################ - -# Run a command, but log it first. -# -# See `log' for the available levels. -run() { # run COMMAND... - # I have to add a `trap' here for SIGINT to work properly. - trap bollux_quit SIGINT - log debug "$*" - "$@" -} - -# Exit with an error and a message describing it. -die() { # die EXIT_CODE MESSAGE - local ec="$1" - shift - log error "$*" - exit "$ec" -} - -# Exit with success, printing a fun message. -# -# The default message is from the wonderful show "Cowboy Bebop." -bollux_quit() { - printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG" - exit -} -# SIGINT is C-c, and I want to make sure bollux quits when it's typed. -trap bollux_quit SIGINT - -# Trim leading and trailing whitespace from a string. -# -# [1]: #trim-leading-and-trailing-white-space-from-string -trim_string() { # trim_string STRING - : "${1#"${1%%[![:space:]]*}"}" - : "${_%"${_##*[![:space:]]}"}" - printf '%s\n' "$_" -} - -# Cycle a variable. -# -# e.g. 'cycle_list one,two,three' => 'two,three,one' -cycle_list() { # cycle_list LIST DELIM - local list="${!1}" delim="$2" - local first="${list%%${delim}*}" - local rest="${list#*${delim}}" - printf -v "$1" '%s%s%s' "${rest}" "${delim}" "${first}" -} - -# Determine the first element of a delimited list. -# -# e.g. 'first one,two,three' => 'one' -first() { # first LIST DELIM - local list="${!1}" delim="$2" - printf '%s\n' "${list%%${delim}*}" -} - -# Log a message to stderr (&2). -# -# TODO: document -log() { # log LEVEL MESSAGE - [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return - local fmt - - case "$1" in - ([dD]*) # debug - [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return - fmt=34 - ;; - ([eE]*) # error - fmt=31 - ;; - (*) fmt=1 ;; - esac - shift - - printf >&2 '\e[%sm%s:%s:\e[0m\t%s\n' "$fmt" "$PRGN" "${FUNCNAME[1]}" "$*" -} - -# Set the terminal title. -set_title() { # set_title STRING - printf '\e]2;%s\007' "$*" -} - -# Prompt the user for input. -# -# This is a thin wrapper around `read', a bash built-in. Because of the -# way bollux messes around with stein and stdout, I need to read directly from -# the TTY with this function. -prompt() { # prompt [-u] PROMPT [READ_ARGS...] - local read_cmd=(read -e -r) - if [[ "$1" == "-u" ]]; then - read_cmd+=(-i "$BOLLUX_URL") - shift - fi - local prompt="$1" - shift - read_cmd+=(-p "$prompt> ") - "${read_cmd[@]}" <(:) || : -} - -# MAIN BOLLUX DISPATCH FUNCTIONS ############################################### - # Main entry point into `bollux'. # # See the `if' block at the bottom of this script. @@ -251,10 +127,15 @@ bollux_config() { if [ -f "$BOLLUX_CONFIG" ]; then log debug "Loading config file '$BOLLUX_CONFIG'" + # Shellcheck gets mad when we try to source a file behind a + # variable -- it doesn't know where it is. This line ignores + # that warning, since the user can put $BOLLUX_CONFIG wherever. # shellcheck disable=1090 . "$BOLLUX_CONFIG" else - log debug "Can't load config file '$BOLLUX_CONFIG'." + # It's an error if bollux can't find the config file, but I + # don't want to kill the program over it. + log error "Can't load config file '$BOLLUX_CONFIG'." fi ## behavior @@ -301,67 +182,185 @@ bollux_config() { UC_BLANK=':?:' # internal use only, should be non-URL chars } -# Load a URL. +# Initialize bollux state +bollux_init() { + # Trap `bollux_cleanup' on quit and exit + trap bollux_cleanup INT QUIT EXIT + # Trap `bollux_quit' on interrupt (C-c) + trap bollux_quit SIGINT + + # Disable pathname expansion. + # + # It's very unlikely the user will want to navigate to a file when + # answering the GO prompt. + set -f + + # Initialize state + # + # Other than $REDIRECTS, bollux's mutable state includes + # $BOLLUX_URL, but that's initialized elsewhere (possibly even by + # the user) + REDIRECTS=0 + + # History + # + # See also `history_append', `history_back', `history_forward' + declare -a HISTORY # history is kept in an array + HN=0 # position of history in the array + run mkdir -p "${BOLLUX_HISTFILE%/*}" + + # Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds) + run rm -f "$BOLLUX_LESSKEY" + mklesskey +} + +# Cleanup on exit +bollux_cleanup() { + # Stubbed in case of need in future + : +} + +# Exit with success, printing a fun message. # -# I was feeling fancy when I named this function -- a more descriptive name -# would be 'bollux_goto' or something. -blastoff() { # blastoff [-u] URL - local u +# The default message is from the wonderful show "Cowboy Bebop." +bollux_quit() { + printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG" + exit +} - # `blastoff' assumes a "well-formed" URL by default -- i.e., a URL with - # a protocol string and no extraneous whitespace. Since bollux can't - # trust the user to input a proper URL at a prompt, nor capsule authors - # to fully-form their URLs, so the -u flag is necessary for those - # use-cases. Otherwise, bollux knows the URL is well-formed -- or - # should be, due to the Gemini specification. +# UTILITY FUNCTIONS ############################################################ + +# Run a command, but log it first. +# +# See `log' for the available levels. +run() { # run COMMAND... + # I have to add a `trap' here for SIGINT to work properly. + trap bollux_quit SIGINT + LOG_FUNC=2 log debug "> $*" + "$@" +} + +# Log a message to stderr (&2). +# +# `log' in this script can take 3 different parameters: `d', `e', and `x', where +# `x' is any other string (though I usually use `x'), followed by the message to +# log. Most messages are either `d' (debug) level or `x' (diagnostic) level, +# meaning I want to show them all the time or only when bollux is called with +# `-v' (verbose). The levels are somewhat arbitrary, like I suspect all logging +# levels are, but you can read the rest of bollux to see what I've chosen to +# classify as what. +log() { # log LEVEL MESSAGE... + # 'QUIET' means don't log anything. + [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return + local fmt # ANSI escape code + + case "$1" in + ([dD]*) # Debug level -- only print if bollux -v. + [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return + fmt=34 # Blue + ;; + ([eE]*) # Error level -- always print. + fmt=31 # Red + ;; + (*) # Diagnostic level -- print unless QUIET. + fmt=1 # Bold + ;; + esac + shift + + printf >&2 '\e[%sm%s:%-16s:\e[0m %s\n' \ + "$fmt" "$PRGN" "${FUNCNAME[${LOG_FUNC:-1}]}" "$*" +} + +# Exit with an error and a message describing it. +die() { # die EXIT_CODE MESSAGE + local exit_code="$1" + shift + log error "$*" + exit "$exit_code" +} + +# Trim leading and trailing whitespace from a string. +# +# [1]: #trim-leading-and-trailing-white-space-from-string +trim_string() { # trim_string STRING + : "${1#"${1%%[![:space:]]*}"}" + : "${_%"${_##*[![:space:]]}"}" + printf '%s\n' "$_" +} + +# Cycle a variable in a list given a delimiter. +# +# e.g. 'list_cycle one,two,three ,' => 'two,three,one' +list_cycle() { # list_cycle LIST DELIM + # I could've set up `list_cycle' to use an array instead of a delimited + # string, but the one variable this function is used for is + # T_PRE_DISPLAY, which is user-configurable. I wanted it to be as easy + # to configure for users who might not immediately know the bash array + # syntax, but can figure out 'variable=value' without much thought. + local list="${!1}" # Pass the list by name, not value + local delim="$2" # The delimiter of the string + local first="${list%%${delim}*}" # The first element + local rest="${list#*${delim}}" # The rest of the elements + # -v prints to the variable specified. + printf -v "$1" '%s%s%s' "${rest}" "${delim}" "${first}" +} + +# Set the terminal title. +set_title() { # set_title TITLE... + printf '\e]2;%s\007' "$*" +} + +# Prompt the user for input. +# +# This is a thin wrapper around `read', a bash built-in. Because of the +# way bollux messes around with stdin and stdout, I need to read directly from +# the TTY with this function. +prompt() { # prompt [-u] PROMPT [READ_ARGS...] + # `-e' gets the line "interactively", so it can see history and stuff + # `-r' reads a "raw" string, i.e., without backslash escaping + local read_cmd=(read -e -r) if [[ "$1" == "-u" ]]; then - u="$(run uwellform "$2")" - else - u="$1" + # `-i TEXT' uses TEXT as the initial text for `read' + read_cmd+=(-i "$BOLLUX_URL") + shift fi + local prompt="$1" # How to prompt the user + shift + read_cmd+=(-p "$prompt> ") + "${read_cmd[@]}" /dev/null 2>&1; then - run "${url[1]}_request" "$url" - else - die 99 "No request handler for '${url[1]}'" - fi - } | run normalize | { - if declare -F "${url[1]}_response" >/dev/null 2>&1; then - run "${url[1]}_response" "$url" - else - log d \ - "No response handler for '${url[1]}';" \ - " passing thru" - passthru - fi - } +# Bash built-in replacement for `sleep' +# +# The commentary for `passthru' applies here as well, though I didn't write this +# function -- Dylan Araps did. +# +# [1]: #use-read-as-an-alternative-to-the-sleep-command +sleep() { # sleep SECONDS + read -rt "$1" <> <(:) || : +} + +# Normalize files. +normalize() { + shopt -s extglob # for the printf call below + while IFS= read -r; do + # Normalize line endings to Unix-style (LF) + printf '%s\n' "${REPLY//$'\r'?($'\n')/}" + done + shopt -u extglob # reset 'extglob' } # URLS ######################################################################### @@ -382,16 +381,16 @@ blastoff() { # blastoff [-u] URL # trim whitespace. # # Useful for URLs that were probably input by humans. -uwellform() { - local u="$1" +uwellform() { # uwellform URL + local url="$1" - if [[ "$u" != *://* ]]; then - u="$BOLLUX_PROTO://$u" + if [[ "$url" != *://* ]]; then + url="$BOLLUX_PROTO://$url" fi - u="$(trim_string "$u")" + url="$(trim_string "$url")" - printf '%s\n' "$u" + printf '%s\n' "$url" } # Split a URL into its constituent parts, placing them all in the given array. @@ -406,58 +405,94 @@ uwellform() { # takes the matched URL, splits it using the regex, then assigns each part to an # element of the url array NAME by using `printf -v', which prints to a # variable. -usplit() { # usplit NAME:ARRAY URL:STRING +usplit() { # usplit URL_ARRAY URL + # Note: URL_ARRAY isn't assigned in `usplit', because it should + # already exist. Pass /only/ the name of URL_ARRAY to this + # function, not its contents. local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' - [[ $2 =~ $re ]] || return $? + local u="$2" + [[ "$u" =~ $re ]] || { + exit_code=$? + log error "usplit: '$2' doesn't match '$re'" + return $? + } # ShellCheck doesn't see that I'm using these variables in the `for' # loop below, because I'm not technically using them /as/ variables, but # as names to the variables. The ${!c} formation in the `printf' call # below performs a reverse lookup on the name to get the actual data. # shellcheck disable=2034 - local url="${BASH_REMATCH[0]}" \ - scheme="${BASH_REMATCH[2]}" \ - authority="${BASH_REMATCH[4]}" \ - path="${BASH_REMATCH[5]}" \ - query="${BASH_REMATCH[7]}" \ - fragment="${BASH_REMATCH[9]}" - + local entire_url="${BASH_REMATCH[0]}" \ + scheme="${BASH_REMATCH[2]}" \ + authority="${BASH_REMATCH[4]}" \ + path="${BASH_REMATCH[5]}" \ + query="${BASH_REMATCH[7]}" \ + fragment="${BASH_REMATCH[9]}" + + # Iterate through the 5 components of a URL and assign them to elements + # of URL_ARRAY, as follows: # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment - local i=1 c + run printf -v "$1[0]" '%s' "$entire_url" + # This loop tests whether the component exists first -- if it + # doesn't, the special variable $UC_BLANK is used in the spot + # instead. Bash doesn't have a useful way of differentiating an + # /unset/ element of an array, versus an /empty/ element. + # The only exception is that 'path' component, which always exists + # in a URL (I think the simplest URL possible is '/', the empty + # path). + local i=1 # begin at 1 -- the full URL is [0]. for c in scheme authority path query fragment; do if [[ "${!c}" || "$c" == path ]]; then - printf -v "$1[$i]" '%s' "${!c}" + run printf -v "$1[$i]" '%s' "${!c}" else - printf -v "$1[$i]" '%s' "$UC_BLANK" + run printf -v "$1[$i]" '%s' "$UC_BLANK" fi ((i += 1)) done - printf -v "$1[0]" '%s' "$url" -} -# Join a URL array (NAME) back into a string. -ujoin() { # ujoin NAME:ARRAY - local -n U="$1" +} - if ucdef U[1]; then - printf -v U[0] "%s:" "${U[1]}" +# Join a URL array, split with `usplit', back into a string, assigning +# it to the 0th element of the array. +ujoin() { # ujoin URL_ARRAY + # Here's the documentation for the '-n' flag: + # + # Give each name the nameref attribute, making it a name reference + # to another variable. That other variable is defined by the value of + # name. All references, assignments, and attribute modifications to + # name, except for those using or changing the -n attribute itself, + # are performed on the variable referenced by name's value. The + # nameref attribute cannot be applied to array variables. + # + # Pretty handy for passing-by-name! Except that last part -- "The + # nameref attribute cannot be applied to array variables." However, + # I've found a clever hack -- you can use 'printf -v' to print the + # value to the array element. + local -n URL_ARRAY="$1" + + # For each possible URL component, check if it exists with `ucdef'. + # If it does, append it (with the correct component delimiter) to + # URL_ARRAY[0]. + if ucdef URL_ARRAY[1]; then + printf -v URL_ARRAY[0] "%s:" "${URL_ARRAY[1]}" fi - if ucdef U[2]; then - printf -v U[0] "${U[0]}//%s" "${U[2]}" + if ucdef URL_ARRAY[2]; then + printf -v URL_ARRAY[0] "${URL_ARRAY[0]}//%s" "${URL_ARRAY[2]}" fi - printf -v U[0] "${U[0]}%s" "${U[3]}" + # The path component is required. + printf -v URL_ARRAY[0] "${URL_ARRAY[0]}%s" "${URL_ARRAY[3]}" - if ucdef U[4]; then - printf -v U[0] "${U[0]}?%s" "${U[4]}" + if ucdef URL_ARRAY[4]; then + printf -v URL_ARRAY[0] "${URL_ARRAY[0]}?%s" "${URL_ARRAY[4]}" fi - if ucdef U[5]; then - printf -v U[0] "${U[0]}#%s" "${U[5]}" + if ucdef URL_ARRAY[5]; then + printf -v URL_ARRAY[0] "${URL_ARRAY[0]}#%s" "${URL_ARRAY[5]}" fi - log d "${U[0]}" + log d "${URL_ARRAY[0]}" } # `ucdef' checks whether a URL component is blank or not -- if a component @@ -466,26 +501,39 @@ ujoin() { # ujoin NAME:ARRAY # not going to really be in a URL). I tried really hard to differentiate an # unset array element from a simply empty one, but like, as far as I could tell, # you can't do that in Bash. -ucdef() { # ucdef NAME - [[ "${!1}" != "$UC_BLANK" ]] +ucdef() { # ucdef COMPONENT + local component="$1" + [[ "${!component}" != "$UC_BLANK" ]] } # `ucblank' determines whether a URL component is blank (""), as opposed to # undefined. -ucblank() { # ucblank NAME - [[ -z "${!1}" ]] +ucblank() { # ucblank COMPONENT + local component="$1" + [[ -z "${!component}" ]] } # `ucset' sets one component of a URL array and setting the 0th element to the # new full URL. Use it instead of directly setting the array element with U[x], # because U[0] will fall out of sync with the rest of the contents. -ucset() { # ucset NAME VALUE - run eval "${1}='$2'" - run ujoin "${1/\[*\]/}" +ucset() { # ucset URL_ARRAY_INDEX NEW_VALUE + local url_array_component="$1" # Of form 'URL_ARRAY[INDEX]' + local value="$2" + + # Assign $value to $url_array_component. + # + # Wrapped in an 'eval' for the extra layer of indirection. + run eval "${url_array_component}='$value'" + + # Rejoin the URL_ARRAY with the changed value. + # + # The substitution here strips the array index subscript (i.e., + # URL[4] => URL), passing the name of the full array to `ujoin'. + run ujoin "${url_array_component/\[*\]/}" } -# [1]: encode a URL using percent-encoding. -uencode() { # uencode URL:STRING +# [1]: Encode a URL using percent-encoding. +uencode() { # uencode URL local LC_ALL=C for ((i = 0; i < ${#1}; i++)); do : "${1:i:1}" @@ -497,14 +545,14 @@ uencode() { # uencode URL:STRING printf '\n' } -# [1]: decode a percent-encoded URL. -udecode() { # udecode URL:STRING +# [1]: Decode a percent-encoded URL. +udecode() { # udecode URL : "${1//+/ }" printf '%b\n' "${_//%/\\x}" } -# Implement [2] § 5.2.4, "Remove Dot Segments" -pundot() { # pundot PATH:STRING +# Implement [2]: 5.2.4, "Remove Dot Segments". +pundot() { # pundot PATH local input="$1" local output while [[ "$input" ]]; do @@ -527,28 +575,28 @@ pundot() { # pundot PATH:STRING printf '%s\n' "${output//\/\//\//}" } -# Implement [2] § 5.2.3, "Merge Paths" -pmerge() { # pmerge BASE:ARRAY REFERENCE:ARRAY - local -n b="$1" - local -n r="$2" +# Implement [2] Section 5.2.3, "Merge Paths". +pmerge() { # pmerge BASE_PATH REFERENCE_PATH + local -n base_path="$1" + local -n reference_path="$2" - if ucblank r[3]; then - printf '%s\n' "${b[3]//\/\//\//}" + if ucblank reference_path[3]; then + printf '%s\n' "${base_path[3]//\/\//\//}" return fi - if ucdef b[2] && ucblank b[3]; then - printf '/%s\n' "${r[3]//\/\//\//}" + if ucdef base_path[2] && ucblank base_path[3]; then + printf '/%s\n' "${reference_path[3]//\/\//\//}" else local bp="" - if [[ "${b[3]}" == */* ]]; then - bp="${b[3]%/*}" + if [[ "${base_path[3]}" == */* ]]; then + bp="${base_path[3]%/*}" fi - printf '%s/%s\n' "${bp%/}" "${r[3]#/}" + printf '%s/%s\n' "${bp%/}" "${reference_path[3]#/}" fi } -# `utransform' implements [2]6 § 5.2.2, "Transform Resources." +# `utransform' implements [2]6 Section 5.2.2, "Transform Resources." # # That section conveniently lays out a pseudocode algorithm describing how URL # resources should be transformed from one to another. This function just @@ -624,19 +672,21 @@ utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING # ################################################################################ -# Request a resource from a gemini server - see [3] §§ 2, 4. +# Request a resource from a gemini server - see [3] Sections 2, 4. gemini_request() { # gemini_request URL local -a url - usplit url "$1" + run usplit url "$1" + log debug "${url[@]}" # Remove user info from the URL. # # URLs can technically be of the form ://:@ - # (see [2], § 3.2, "Authority"). I don't know of any Gemini servers + # (see [2] Section 3.2, "Authority"). I don't know of any Gemini servers # that use the or parts, so `gemini_request' just strips # them from the requested URL. This will need to be changed if servers # decide to use this method of authentication. - ucset url[2] "${url[2]#*@}" + log debug "Removing user info from the URL" + run ucset url[2] "${url[2]#*@}" # Determine the port to request. # @@ -645,6 +695,7 @@ gemini_request() { # gemini_request URL # port can be specified after the domain, separated with a colon. The # user can also request a different default port, for whatever reason, # by setting the variable $BOLLUX_GEMINI_PORT. + log debug "Determining the port to request" local port if [[ "${url[2]}" == *:* ]]; then port="${url[2]#*:}" @@ -680,7 +731,7 @@ gemini_request() { # gemini_request URL run "${ssl_cmd[@]}" <<<"$url" } -# Handle the gemini response - see [3] § 3. +# Handle the gemini response - see [3] Section 3. gemini_response() { # gemini_response URL local code meta # received on the first line of the response local title # determined by a clunky heuristic, see read loop: (2*) @@ -700,12 +751,12 @@ gemini_response() { # gemini_response URL # `download', below), but I'm not sure how to remedy that issue either. # It requires more research. while read -t "$BOLLUX_TIMEOUT" -r code meta || - { (($? > 128)) && die 99 "Timeout."; }; do + { (($? > 128)) && die 99 "Timeout."; }; do break done log d "[$code] $meta" - # Branch depending on the status code. See [3], Appendix 1. + # Branch depending on the status code. See [3] Appendix 1. # # Notes: # - All codes other than 3* (Redirects) reset the REDIRECTS counter. @@ -735,7 +786,7 @@ gemini_response() { # gemini_response URL # # This while loop reads through the file looking for a line # starting with `#', which is a level-one heading in text/gemini - # (see [3], § 5). It assumes that the first such heading is the + # (see [3] Section 5). It assumes that the first such heading is the # title of the page, and uses that title for the terminal title # and for the history. local pretitle @@ -771,7 +822,7 @@ gemini_response() { # gemini_response URL # distinction. I'm not sure what the difference would be in # practice, anyway. # - # Per [4], bollux limits the number of redirects a page is + # Per [4] bollux limits the number of redirects a page is # allowed to make (by default, five). Change `$BOLLUX_MAXREDIR' # to customize that limit. ((REDIRECTS += 1)) @@ -788,7 +839,7 @@ gemini_response() { # gemini_response URL run blastoff "$meta" # TODO: confirm redirect ;; (4*) # TEMPORARY ERROR - # Since the 4* codes ([3], Appendix 1) are all server issues, + # Since the 4* codes ([3] Appendix 1) are all server issues, # bollux can treat them all basically the same. This is an area # that could use some expansion. local desc="Temporary error" @@ -862,7 +913,7 @@ gemini_response() { # gemini_response URL gopher_request() { # gopher_request URL local url="$1" - # [7] § 2.1 + # [7] Section 2.1 [[ "$url" =~ gopher://([^/?#:]*)(:([0-9]+))?(/((.))?(/?.*))?$ ]] local server="${BASH_REMATCH[1]}" \ port="${BASH_REMATCH[3]:-$BOLLUX_GOPHER_PORT}" \ @@ -881,7 +932,7 @@ gopher_request() { # gopher_request URL # Handle a server response. gopher_response() { # gopher_response URL local url="$1" pre=false - # [7] § 2.1 + # [7] Section 2.1 # # Note that this duplicates the code in `gopher_request'. There might # be a good way to thread this data through so that it's not computed @@ -896,7 +947,7 @@ gopher_response() { # gopher_response URL # basically, each line in a gophermap starts with a character, its type, # and then is followed by a series of tab-separated fields describing # where that type is and how to display it. The full list of original - # line types can be found in [6] § 3.8, though the types have also been + # line types can be found in [6] Section 3.8, though the types have also been # extended over the years. Since bollux can only display types that are # text-ish, it only concerns itself with those in this case statement. # All the others are simply downloaded. @@ -930,7 +981,7 @@ gopher_response() { # gopher_response URL fi ;; (*) # Anything else - # The list at [6] § 3.8 includes the following (noted where it + # The list at [6] Section 3.8 includes the following (noted where it # might be good to differently handle them in the future): # # 2. Item is a CSO phone-book server ***** @@ -955,7 +1006,7 @@ gopher_response() { # gopher_response URL # Convert a gophermap naively to a gemini page. # -# Based strongly on [8], but bash-ified. Due to the properties of link lines in +# Based strongly on [8] but bash-ified. Due to the properties of link lines in # gemini, many of the item types in `gemini_reponse' can be linked to the proper # protocol handlers here -- so if a user is trying to reach a TCP link through # gopher, bollux won't have to handle it, for example.* @@ -1013,7 +1064,7 @@ gopher_convert() { pre=false fi printf '=> telnet://%s:%s/%s%s %s\n' \ - "$server" "$port" "$type" "$path" "$label" + "$server" "$port" "$type" "$path" "$label" ;; (*) # other type if $pre; then @@ -1021,7 +1072,7 @@ gopher_convert() { pre=false fi printf '=> gopher://%s:%s/%s%s %s\n' \ - "$server" "$port" "$type" "$path" "$label" + "$server" "$port" "$type" "$path" "$label" ;; esac done @@ -1043,7 +1094,8 @@ gopher_convert() { # display the fetched content display() { # display METADATA [TITLE] local -a less_cmd - local i mime charset + local mime charset + # split header line local -a hdr IFS=';' read -ra hdr <<<"$1" @@ -1156,16 +1208,6 @@ END fi } -# normalize files -normalize() { - shopt -s extglob - while IFS= read -r; do - # normalize line endings - printf '%s\n' "${REPLY//$'\r'?($'\n')/}" - done - shopt -u extglob -} - # typeset a text/gemini document typeset_gemini() { local pre=false @@ -1203,7 +1245,7 @@ typeset_gemini() { ;; (alt | both) $pre && PRE_LINE_FORCE=true \ - gemini_pre "${REPLY#\`\`\`}" + gemini_pre "${REPLY#\`\`\`}" ;; esac continue @@ -1240,13 +1282,13 @@ gemini_link() { printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" printf "\e[${C_LINK_NUMBER}m[%d]${C_RESET} " "$ln" fold_line -n -B "\e[${C_LINK_TITLE}m" -A "${C_RESET}" \ - -l "$((${#ln} + 3))" -m "${T_MARGIN}" \ - "$WIDTH" "$(trim_string "$t")" + -l "$((${#ln} + 3))" -m "${T_MARGIN}" \ + "$WIDTH" "$(trim_string "$t")" fold_line -B " \e[${C_LINK_URL}m" \ - -A "${C_RESET}" \ - -l "$((${#ln} + 3 + ${#t}))" \ - -m "$((T_MARGIN + ${#ln} + 2))" \ - "$WIDTH" "$a" + -A "${C_RESET}" \ + -l "$((${#ln} + 3 + ${#t}))" \ + -m "$((T_MARGIN + ${#ln} + 2))" \ + "$WIDTH" "$a" else gemini_pre "$1" fi @@ -1264,7 +1306,7 @@ gemini_header() { printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" fold_line -B "\e[${hdrfmt}m" -A "${C_RESET}" -m "${T_MARGIN}" \ - "$WIDTH" "$t" + "$WIDTH" "$t" else gemini_pre "$1" fi @@ -1279,7 +1321,7 @@ gemini_list() { printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" fold_line -B "\e[${C_LIST}m" -A "${C_RESET}" -m "$T_MARGIN" \ - "$WIDTH" "$t" + "$WIDTH" "$t" else gemini_pre "$1" fi @@ -1294,7 +1336,7 @@ gemini_quote() { printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" fold_line -B "\e[${C_QUOTE}m" -A "${C_RESET}" -m "$T_MARGIN" \ - "$WIDTH" "$t" + "$WIDTH" "$t" else gemini_pre "$1" fi @@ -1304,7 +1346,7 @@ gemini_text() { if ! ${2-false}; then printf "%${S_MARGIN}s " ' ' fold_line -m "$T_MARGIN" \ - "$WIDTH" "$1" + "$WIDTH" "$1" else gemini_pre "$1" fi @@ -1411,7 +1453,7 @@ handle_keypress() { # handle_keypress CODE run blastoff -u "$REPLY" ;; (54) # ` - change alt-text visibility and refresh - run cycle_list T_PRE_DISPLAY , + run list_cycle T_PRE_DISPLAY , run blastoff "$BOLLUX_URL" ;; (55) # 55-57 -- still available for binding @@ -1457,7 +1499,19 @@ extract_links() { done } -# download $BOLLUX_URL +# Download a file. +# +# Any non-otherwise-handled MIME type will be downloaded using this function. +# It uses 'dd' to download the resource to a temporary file, then attempts to +# move it to $BOLLUX_DOWNDIR (by default, $PWD). If that's not possible (either +# because the target file already exists or the 'mv' invocation fails for some +# reason), `download' logs the error and alerts the user where the temporary +# file is saved. +# +# `download' works by reading the end of the pipe from `display', which means +# that sometimes, due to something with the way bash or while or ... something +# ... chunks the data, sometimes binary data gets corrupted. This is an area +# that requires more research. download() { tn="$(mktemp)" log x "Downloading: '$BOLLUX_URL' => '$tn'..." @@ -1472,60 +1526,141 @@ download() { fi } -# initialize bollux -bollux_init() { - # Trap cleanup - trap bollux_cleanup INT QUIT EXIT - # State - REDIRECTS=0 - set -f - # History - declare -a HISTORY # history is kept in an array - HN=0 # position of history in the array - run mkdir -p "${BOLLUX_HISTFILE%/*}" - # Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds) - run rm -f "$BOLLUX_LESSKEY" - mklesskey -} - -# clean up on exit -bollux_cleanup() { - # Stubbed in case of need in future - : -} +# HISTORY ##################################################################### +# +# While bollux saves history to a file ($BOLLUX_HISTFILE), it doesn't /do/ +# anything with the history that's been saved. When I do implement the history +# functionality, it'll probably be on top of a file:// protocol, which will make +# it very simple to also implement bookmarks and the previewing of pages. In +# fact, I should be able to implement this change by the weekend (2021-03-07). +# +############################################################################### -# append a URL to history +# Append a URL to history. history_append() { # history_append URL TITLE - BOLLUX_URL="$1" - # date/time, url, title (best guess) - run printf '%(%FT%T)T\t%s\t%s\n' -1 "$1" "$2" >>"$BOLLUX_HISTFILE" - HISTORY[$HN]="$BOLLUX_URL" + local url="$1" + local title="$2" + + # Print the URL and its title (if given) to $BOLLUX_HISTFILE. + local fmt='' + fmt+='%(%FT%T)T\t' # %(_)T calls directly to 'strftime'. + if (( $# == 2 )); then + fmt+='%s\t' # $url + fmt+='%s\n' # $title + else + fmt+='%s%s\n' # printf needs a field for every argument. + fi + run printf -- "$fmt" -1 "$url" "$title" >>"$BOLLUX_HISTFILE" + + # Add the URL to the HISTORY array and increment the pointer. + HISTORY[$HN]="$url" ((HN += 1)) + + # Update $BOLLUX_URL. + BOLLUX_URL="$url" } -# move back in history (session) +# Move back in session history. history_back() { log d "HN=$HN" + # We need to subtract 2 from HN because it automatically increases by + # one with each call to `history_append'. If we subtract 1, we'll just + # be at the end of the array again, reloading the page. ((HN -= 2)) + if ((HN < 0)); then HN=0 log e "Beginning of history." return 1 fi + run blastoff "${HISTORY[$HN]}" } -# move forward in history (session) +# Move forward in session history. history_forward() { log d "HN=$HN" + if ((HN >= ${#HISTORY[@]})); then HN="${#HISTORY[@]}" log e "End of history." return 1 fi + run blastoff "${HISTORY[$HN]}" } +# Load a URL. +# +# I was feeling fancy when I named this function -- a more descriptive name +# would be 'bollux_goto' or something. +blastoff() { # blastoff [-u] URL + local u + + # `blastoff' assumes a "well-formed" URL by default -- i.e., a URL with + # a protocol string and no extraneous whitespace. Since bollux can't + # trust the user to input a proper URL at a prompt, nor capsule authors + # to fully-form their URLs, so the -u flag is necessary for those + # use-cases. Otherwise, bollux knows the URL is well-formed -- or + # should be, due to the Gemini specification. + if [[ "$1" == "-u" ]]; then + u="$(run uwellform "$2")" + else + u="$1" + fi + + # After ensuring the URL is well-formed, `blastoff' needs to transform + # it according to the transform rules of RFC 3986 (see Section 5.2.2), which + # turns relative references into absolute references that bollux can use + # in its request to the server. That's followed by a check that the + # protocol is set, defaulting to Gemini if it isn't. + # + # Implementation detail: because Bash is really stupid when it comes to + # arrays, the URL functions u* (see below) work with an array defined + # with `local -a' and passed by name, not by value. Thus, the + # `urltransform url ...' instead of `urltransform "${url[@]}"' or + # similar. In addition, the `ucdef' and `ucset' functions take the name + # of the array element as parameters, not the element itself. + local -a url + run utransform url "$BOLLUX_URL" "$u" + if ! ucdef url[1]; then + run ucset url[1] "$BOLLUX_PROTO" + fi + + # To try and keep `bollux' as extensible as possible, I've written it + # only to expect two functions for every protocol it supports: + # `x_request' and `x_response', where `x' is the name of the protocol + # (the first element of the built `url' array). `declare -F' looks only + # for functions in the current scope, failing if it doesn't exist. + # + # In between `x_request' and `x_response', `blastoff' normalizes the + # line endings to UNIX-style (LF) for ease of display. + { + if declare -F "${url[1]}_request" >/dev/null 2>&1; then + run "${url[1]}_request" "$url" + else + die 99 "No request handler for '${url[1]}'" + fi + } | run normalize | { + if declare -F "${url[1]}_response" >/dev/null 2>&1; then + run "${url[1]}_response" "$url" + else + log d \ + "No response handler for '${url[1]}';" \ + " passing thru" + passthru + fi + } +} + +# $BASH_SOURCE is an array that stores the "stack" of source calls in bash. If +# the first element of that array is "bollux", that means the user called this +# script, instead of sourcing it. In that case, and ONLY in that case, should +# bollux actually enter the main loop of the program. Otherwise, allow the +# sourcing environment to simply source this script. +# +# This is basically the equivalent of python's 'if __name__ == "__main__":' +# block. if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then ${DEBUG:-false} && set -x run bollux "$@" -- cgit 1.4.1-21-gabe81