From c274c4f7234bca5d0e4768902be051199d97b5d0 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 3 Mar 2021 12:56:42 -0600 Subject: Keep documenting --- bollux | 432 ++++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 270 insertions(+), 162 deletions(-) diff --git a/bollux b/bollux index a4e9584..46c381b 100755 --- a/bollux +++ b/bollux @@ -23,17 +23,28 @@ # things. That's a major project though, and I'm scared. # # The following works were referenced when writing this, and I've tried to -# credit them in comments below. Following each link, I'll include a "short -# code" that I'll use to reference them in those comments, if necessary to keep -# them shorter than 80 characters. +# credit them in comments below. Further in the commentary on this script, I'll +# include the following link numbers to refer to these documents, in order to +# keep the line length as short as possible. # -# [1]: https://github.com/dylanaraps/pure-bash-bible [PBB] -# [2]: https://tools.ietf.org/html/rfc3986 [URLspec] -# [3]: https://gemini.circumlunar.space/docs/specification.html [GEMspec] -# [4]: https://tools.ietf.org/html/rfc1436 [GOPHERprotocol] -# [5]: https://tools.ietf.org/html/rfc4266 [GOPHERurl] -# [6]: [GOPHER_GEMINI]: +# [1]: Pure Bash Bible +# https://github.com/dylanaraps/pure-bash-bible +# [2]: URL Specification +# https://tools.ietf.org/html/rfc3986 +# [3]: Gemini Specification +# https://gemini.circumlunar.space/docs/specification.html +# [4]: Gemini Best Practices +# https://gemini.circumlunar.space/docs/best-practices.gmi +# [5]: Gemini FAQ +# https://gemini.circumlunar.space/docs/faq.gmi +# [6]: Gopher Specification +# https://tools.ietf.org/html/rfc1436 +# [7]: Gopher URLs +# https://tools.ietf.org/html/rfc4266 +# [8]: Gophermap to Gemini script (by tomasino) # https://github.com/jamestomasino/dotfiles-minimal/blob/master/bin/gophermap2gemini.awk +# [9]: OpenSSL `s_client' online manual +# https://www.openssl.org/docs/manmaster/man1/openssl-s_client.html # # Code: @@ -90,14 +101,14 @@ trap bollux_quit SIGINT # Bash built-in replacement for `sleep` # -# PBB: #use-read-as-an-alternative-to-the-sleep-command +# [1]: #use-read-as-an-alternative-to-the-sleep-command sleep() { # sleep SECONDS read -rt "$1" <> <(:) || : } # Trim leading and trailing whitespace from a string. # -# PBB: #trim-leading-and-trailing-white-space-from-string +# [1]: #trim-leading-and-trailing-white-space-from-string trim_string() { # trim_string STRING : "${1#"${1%%[![:space:]]*}"}" : "${_%"${_##*[![:space:]]}"}" @@ -130,14 +141,14 @@ log() { # log LEVEL MESSAGE local fmt case "$1" in - [dD]*) # debug + ([dD]*) # debug [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return fmt=34 ;; - [eE]*) # error + ([eE]*) # error fmt=31 ;; - *) fmt=1 ;; + (*) fmt=1 ;; esac shift @@ -190,14 +201,14 @@ bollux() { bollux_args() { while getopts :hvq OPT; do case "$OPT" in - h) + (h) bollux_usage exit ;; - v) BOLLUX_LOGLEVEL=DEBUG ;; - q) BOLLUX_LOGLEVEL=QUIET ;; - :) die 1 "Option -$OPTARG requires an argument" ;; - *) die 1 "Unknown option: -$OPTARG" ;; + (v) BOLLUX_LOGLEVEL=DEBUG ;; + (q) BOLLUX_LOGLEVEL=QUIET ;; + (:) die 1 "Option -$OPTARG requires an argument" ;; + (*) die 1 "Unknown option: -$OPTARG" ;; esac done shift $((OPTIND - 1)) @@ -337,7 +348,8 @@ blastoff() { # blastoff [-u] URL } } -# URLS: https://tools.ietf.org/html/rfc3986 #################################### +# URLS ######################################################################### +# https://tools.ietf.org/html/rfc3986 [2] # # Most of these functions are Bash implementations of functionality laid out in # the linked RFC specification. I'll refer to the section numbers above each @@ -369,8 +381,8 @@ uwellform() { # Split a URL into its constituent parts, placing them all in the given array. # # The regular expression given at the top of the function ($re) is taken -# directly from RFC 3986, Appendix B -- and if the URL provided doesn't match -# it, the function bails. +# directly from [2] Appendix B -- and if the URL provided doesn't match it, the +# function bails. # # `usplit' takes advantage of bash's regex abilities: when the regex comparison # operator `=~' is used, bash populates the array $BASH_REMATCH with the groups @@ -432,8 +444,6 @@ ujoin() { # ujoin NAME:ARRAY log d "${U[0]}" } -# Three small utility functions for dealing with URL components. -# # `ucdef' checks whether a URL component is blank or not -- if a component # doesn't exist, `usplit' writes $UC_BLANK there instead (which is :?: by # default, though it really doesn't matter much *what* it is, as long as it's @@ -458,6 +468,75 @@ ucset() { # ucset NAME VALUE run ujoin "${1/\[*\]/}" } +# [1]: encode a URL using percent-encoding. +uencode() { # uencode URL:STRING + local LC_ALL=C + for ((i = 0; i < ${#1}; i++)); do + : "${1:i:1}" + case "$_" in + ([a-zA-Z0-9.~_-]) printf '%s' "$_" ;; + (*) printf '%%%02X' "'$_" ;; + esac + done + printf '\n' +} + +# [1]: decode a percent-encoded URL. +udecode() { # udecode URL:STRING + : "${1//+/ }" + printf '%b\n' "${_//%/\\x}" +} + +# Implement [2] § 5.2.4, "Remove Dot Segments" +pundot() { # pundot PATH:STRING + local input="$1" + local output + while [[ "$input" ]]; do + if [[ "$input" =~ ^\.\.?/ ]]; then + input="${input#${BASH_REMATCH[0]}}" + elif [[ "$input" =~ ^/\.(/|$) ]]; then + input="/${input#${BASH_REMATCH[0]}}" + elif [[ "$input" =~ ^/\.\.(/|$) ]]; then + input="/${input#${BASH_REMATCH[0]}}" + [[ "$output" =~ /?[^/]+$ ]] + output="${output%${BASH_REMATCH[0]}}" + elif [[ "$input" == . || "$input" == .. ]]; then + input= + else + [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1 + output="$output${BASH_REMATCH[1]}" + input="${BASH_REMATCH[2]}" + fi + done + printf '%s\n' "${output//\/\//\//}" +} + +# Implement [2] § 5.2.3, "Merge Paths" +pmerge() { # pmerge BASE:ARRAY REFERENCE:ARRAY + local -n b="$1" + local -n r="$2" + + if ucblank r[3]; then + printf '%s\n' "${b[3]//\/\//\//}" + return + fi + + if ucdef b[2] && ucblank b[3]; then + printf '/%s\n' "${r[3]//\/\//\//}" + else + local bp="" + if [[ "${b[3]}" == */* ]]; then + bp="${b[3]%/*}" + fi + printf '%s/%s\n' "${bp%/}" "${r[3]#/}" + fi +} + +# `utransform' implements [2]6 § 5.2.2, "Transform Resources." +# +# That section conveniently lays out a pseudocode algorithm describing how URL +# resources should be transformed from one to another. This function just +# implements that pseudocode in Bash, using the helper functions defined above. utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING local -a B R # base, reference local -n T="$1" # target @@ -520,128 +599,136 @@ utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING ujoin T } -pundot() { # pundot PATH:STRING - local input="$1" - local output - while [[ "$input" ]]; do - if [[ "$input" =~ ^\.\.?/ ]]; then - input="${input#${BASH_REMATCH[0]}}" - elif [[ "$input" =~ ^/\.(/|$) ]]; then - input="/${input#${BASH_REMATCH[0]}}" - elif [[ "$input" =~ ^/\.\.(/|$) ]]; then - input="/${input#${BASH_REMATCH[0]}}" - [[ "$output" =~ /?[^/]+$ ]] - output="${output%${BASH_REMATCH[0]}}" - elif [[ "$input" == . || "$input" == .. ]]; then - input= - else - [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1 - output="$output${BASH_REMATCH[1]}" - input="${BASH_REMATCH[2]}" - fi - done - printf '%s\n' "${output//\/\//\//}" -} - -pmerge() { - local -n b="$1" - local -n r="$2" - - if ucblank r[3]; then - printf '%s\n' "${b[3]//\/\//\//}" - return - fi - - if ucdef b[2] && ucblank b[3]; then - printf '/%s\n' "${r[3]//\/\//\//}" - else - local bp="" - if [[ "${b[3]}" == */* ]]; then - bp="${b[3]%/*}" - fi - printf '%s/%s\n' "${bp%/}" "${r[3]#/}" - fi -} - -# PBB -uencode() { # uencode URL:STRING - local LC_ALL=C - for ((i = 0; i < ${#1}; i++)); do - : "${1:i:1}" - case "$_" in - [a-zA-Z0-9.~_-]) - printf '%s' "$_" - ;; - *) - printf '%%%02X' "'$_" - ;; - esac - done - printf '\n' -} - -# PBB -udecode() { # udecode URL:STRING - : "${1//+/ }" - printf '%b\n' "${_//%/\\x}" -} +# GEMINI ####################################################################### +# https://gemini.circumlunar.space/docs/specification.html [3] +# +# The reason we're all here, folks. Gemini is a new protocol that aims to be a +# middle ground between Gopher and HTTP, blah blah. You know the spiel. I know +# the spiel. It's great stuff! +# +################################################################################ -# GEMINI -# https://gemini.circumlunar.space/docs/specification.html +# Request a resource from a gemini server - see [3] §§ 2, 4. gemini_request() { # gemini_request URL local -a url usplit url "$1" - # get rid of userinfo + # Remove user info from the URL. + # + # URLs can technically be of the form ://:@ + # (see [2], § 3.2, "Authority"). I don't know of any Gemini servers + # that use the or parts, so `gemini_request' just strips + # them from the requested URL. This will need to be changed if servers + # decide to use this method of authentication. ucset url[2] "${url[2]#*@}" + # Determine the port to request. + # + # The default port for Gemini is 1965 (the year of the first Gemini + # space mission), but some servers use a different port. In a URL, a + # port can be specified after the domain, separated with a colon. The + # user can also request a different default port, for whatever reason, + # by setting the variable $BOLLUX_GEMINI_PORT. local port if [[ "${url[2]}" == *:* ]]; then port="${url[2]#*:}" ucset url[2] "${url[2]%:*}" else - port=1965 # TODO variablize + port="$BOLLUX_GEMINI_PORT" fi + + # Build the SSL command to request the resource. + # + # This is the beating heart of bollux, the command that does all the + # important work of actually fetching the gemini content the user wants + # to read. I've broken it out into an array for ease of editing (and + # now, commenting!). local ssl_cmd=( + # `s_client' is OpenSSL's reference client implementation In the + # manual [9] it says not to use it, but who reads the manual, + # anyway? openssl s_client - -crlf -quiet -connect "${url[2]}:$port" - -servername "${url[2]}" # SNI - -no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions + -crlf # Automatically add CR+LF to line + -quiet # Don't print all the cert stuff + # -ign_eof # `-quiet' implies `-ign_eof' + -connect "${url[2]}:$port" # The server and port to connect + -servername "${url[2]}" # SNI: Server Name Identification + -no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions ) + # Actually request the resource. + # + # I could probably use 'printf '%s\r\n' "$url" | run "${ssl_cmd[@]}", + # and maybe I should. I wrote this little line a while ago. run "${ssl_cmd[@]}" <<<"$url" } +# Handle the gemini response - see [3] § 3. gemini_response() { # gemini_response URL - local url code meta - local title - url="$1" + local code meta # received on the first line of the response + local title # determined by a clunky heuristic, see read loop: (2*) + local url="$1" # the currently-visited URL. - # we need a loop here so it waits for the first line + # Read the first line. + # + # The first line of a Gemini response is the "header line," which is of + # the format "STATUS METADATA\r\n". I use a `while' loop using `read' + # with a timeout to handle non-responsive servers. Technically, + # METADATA shouldn't exceed 1024 bytes, but I can't think of a good way + # to break at that point -- so bollux is not quite spec-compliant in + # this regard. + # + # Additionally, there are sometimes bugs with caching and + # byte-shifting(?) when trying to download a binary file (see + # `download', below), but I'm not sure how to remedy that issue either. + # It requires more research. while read -t "$BOLLUX_TIMEOUT" -r code meta || { (($? > 128)) && die 99 "Timeout."; }; do break done - log d "[$code] $meta" + # Branch depending on the status code. See [3], Appendix 1. + # + # Notes: + # - All codes other than 3* (Redirects) reset the REDIRECTS counter. + # - I branch on the first digit of the status code, instead of both, to + # minimize the amount of duplicated code I need to write. case "$code" in - 1*) # input + (1*) # INPUT + # Gemini allows GET-style requests, and the INPUT family of + # response codes facilitate them. `10' is for standard input, + # and `11' is for sensitive information, like passwords. REDIRECTS=0 BOLLUX_URL="$url" case "$code" in - 10) run prompt "$meta" ;; - 11) run prompt "$meta" -s ;; # password input + (10) run prompt "$meta" ;; + (11) run prompt "$meta" -s ;; # sensitive input esac + run history_append "$url" "${title:-}" run blastoff "?$(uencode "$REPLY")" ;; - 2*) # OK + (2*) # OK + # The `20' family of requests is like HTTP's `200' family: it + # means that the request worked and the server is sending the + # requested content. REDIRECTS=0 BOLLUX_URL="$url" - # read ahead to find a title + # Janky heuristic to guess the title of a page. + # + # This while loop reads through the file looking for a line + # starting with `#', which is a level-one heading in text/gemini + # (see [3], § 5). It assumes that the first such heading is the + # title of the page, and uses that title for the terminal title + # and for the history. local pretitle while read -r; do + # Since looping through the file consumes it (that is, + # the file pointer (I think?) moves away from the + # beginning of the file), the content we've read so far + # must be saved in a `pretitle' variable, so it can be + # printed later with the rest of the page. pretitle="$pretitle$REPLY"$'\n' if [[ "$REPLY" =~ ^#[[:space:]]*(.*) ]]; then title="${BASH_REMATCH[1]}" @@ -649,35 +736,55 @@ gemini_response() { # gemini_response URL fi done run history_append "$url" "${title:-}" - # read the body out and pipe it to display + # Print the pretitle and the rest of the document (`passthru' is + # a pure-bash rewrite of `cat'), and pipe it through `display' + # for typesetting. { printf '%s' "$pretitle" passthru } | run display "$meta" "${title:-}" ;; - 3*) # redirect + (3*) # REDIRECT + # Redirects are a fundamental part of any hypertext framework, + # and if I remember correctly, one of the main reasons + # solderpunk and others began thinking about gemini (the others + # being TLS and URLs, I believe). + # + # Note that although [3] specifies both a temporary (30) and + # permanent (31) redirect, bollux isn't smart enough to make a + # distinction. I'm not sure what the difference would be in + # practice, anyway. + # + # Per [4], bollux limits the number of redirects a page is + # allowed to make (by default, five). Change `$BOLLUX_MAXREDIR' + # to customize that limit. ((REDIRECTS += 1)) if ((REDIRECTS > BOLLUX_MAXREDIR)); then die $((100 + code)) "Too many redirects!" fi BOLLUX_URL="$url" + # Another discussion on [4] pertains to the value of alerting + # the user to (A) a cross-domain redirect, or even (B) all + # redirects. I have yet to implement that particular + # functionality, and even when I do implement it I don't think + # (B) will be the default. Perhaps (A) though. No notification + # will also be an option, however. run blastoff "$meta" # TODO: confirm redirect ;; - 4*) # temporary error + (4*) # TEMPORARY ERROR REDIRECTS=0 die "$((100 + code))" "Temporary error [$code]: $meta" ;; - 5*) # permanent error + (5*) # PERMANENT ERROR REDIRECTS=0 die "$((100 + code))" "Permanent error [$code]: $meta" ;; - 6*) # certificate error + (6*) # CERTIFICATE ERROR REDIRECTS=0 log d "Not implemented: Client certificates" - # TODO: recheck the speck die "$((100 + code))" "[$code] $meta" ;; - *) + (*) [[ -z "${code-}" ]] && die 100 "Empty response code." die "$((100 + code))" "Unknown response code: $code." ;; @@ -720,16 +827,16 @@ gopher_response() { # gopher_response URL log d "TYPE='$type'" case "$type" in - 0) # text + (0) # text run display text/plain ;; - 1) # menu + (1) # menu run gopher_convert | run display text/gemini ;; - 3) # failure + (3) # failure die 203 "GOPHER: failed" ;; - 7) # search + (7) # search if [[ "$url" =~ $'\t' ]]; then run gopher_convert | run display text/gemini else @@ -737,19 +844,12 @@ gopher_response() { # gopher_response URL run blastoff "$url $REPLY" fi ;; - *) # something else + (*) # something else run download "$url" ;; esac } -# 'cat' but in pure bash -passthru() { - while IFS= read -r; do - printf '%s\n' "$REPLY" - done -} - # convert gophermap to text/gemini (probably naive) gopher_convert() { local type label path server port regex @@ -768,19 +868,19 @@ gopher_convert() { continue fi case "$type" in - .) # end of file + (.) # end of file printf '.\n' break ;; - i) # label + (i) # label case "$label" in - '#'* | '*'[[:space:]]*) + ('#'* | '*'[[:space:]]*) if $pre; then printf '%s\n' '```' pre=false fi ;; - *) + (*) if ! $pre; then printf '%s\n' '```' pre=true @@ -789,14 +889,14 @@ gopher_convert() { esac printf '%s\n' "$label" ;; - h) # html link + (h) # html link if $pre; then printf '%s\n' '```' pre=false fi printf '=> %s %s\n' "${path:4}" "$label" ;; - T) # telnet link + (T) # telnet link if $pre; then printf '%s\n' '```' pre=false @@ -804,7 +904,7 @@ gopher_convert() { printf '=> telnet://%s:%s/%s%s %s\n' \ "$server" "$port" "$type" "$path" "$label" ;; - *) # other type + (*) # other type if $pre; then printf '%s\n' '```' pre=false @@ -822,6 +922,14 @@ gopher_convert() { exec 9>&- } + +# 'cat' but in pure bash +passthru() { + while IFS= read -r; do + printf '%s\n' "$REPLY" + done +} + # display the fetched content display() { # display METADATA [TITLE] local -a less_cmd @@ -839,7 +947,7 @@ display() { # display METADATA [TITLE] for ((i = 1; i <= "${#hdr[@]}"; i++)); do h="${hdr[$i]}" case "$h" in - *charset=*) charset="${h#*=}" ;; + (*charset=*) charset="${h#*=}" ;; esac done @@ -849,7 +957,7 @@ display() { # display METADATA [TITLE] log debug "mime='$mime'; charset='$charset'" case "$mime" in - text/*) + (text/*) set_title "$title${title:+ - }bollux" # render ANSI color escapes and don't wrap pre-formatted blocks less_cmd=(less -RS) @@ -886,7 +994,7 @@ display() { # display METADATA [TITLE] run "${less_cmd[@]}" && bollux_quit } || run handle_keypress "$?" ;; - *) run download "$BOLLUX_URL" ;; + (*) run download "$BOLLUX_URL" ;; esac } @@ -896,8 +1004,8 @@ less_prompt_escape() { # less_prompt_escape STRING for ((i = 0; i < ${#1}; i++)); do : "${1:i:1}" case "$_" in - [\?:\.%\\]) printf '\%s' "$_" ;; - *) printf '%s' "$_" ;; + ([\?:\.%\\]) printf '\%s' "$_" ;; + (*) printf '%s' "$_" ;; esac done printf '\n' @@ -965,7 +1073,7 @@ typeset_gemini() { while IFS= read -r; do case "$REPLY" in - '```'*) + ('```'*) PRE_LINE_FORCE=false if $pre; then pre=false @@ -973,28 +1081,28 @@ typeset_gemini() { pre=true fi case "${T_PRE_DISPLAY%%,*}" in - pre) + (pre) : ;; - alt | both) + (alt | both) $pre && PRE_LINE_FORCE=true \ gemini_pre "${REPLY#\`\`\`}" ;; esac continue ;; - '=>'*) + ('=>'*) : $((ln += 1)) gemini_link "$REPLY" $pre "$ln" ;; - '#'*) gemini_header "$REPLY" $pre ;; - '*'[[:space:]]*) + ('#'*) gemini_header "$REPLY" $pre ;; + ('*'[[:space:]]*) gemini_list "$REPLY" $pre ;; - '>'*) + ('>'*) gemini_quote "$REPLY" $pre ;; - *) gemini_text "$REPLY" $pre ;; + (*) gemini_text "$REPLY" $pre ;; esac done } @@ -1103,25 +1211,25 @@ fold_line() { # fold_line [OPTIONS...] WIDTH TEXT OPTIND=0 while getopts nm:f:l:B:A: OPT; do case "$OPT" in - n) # -n = no trailing newline + (n) # -n = no trailing newline newline=false ;; - m) # -m MARGIN = margin for all lines + (m) # -m MARGIN = margin for all lines margin_all="$OPTARG" ;; - f) # -f MARGIN = margin for first line + (f) # -f MARGIN = margin for first line margin_first="$OPTARG" ;; - l) # -l LENGTH = length of line before starting fold + (l) # -l LENGTH = length of line before starting fold ll="$OPTARG" ;; - B) # -B BEFORE = text to insert before each line + (B) # -B BEFORE = text to insert before each line before="$OPTARG" ;; - A) # -A AFTER = text to insert after each line + (A) # -A AFTER = text to insert after each line after="$OPTARG" ;; - *) return 1 ;; + (*) return 1 ;; esac done shift "$((OPTIND - 1))" @@ -1159,37 +1267,37 @@ fold_line() { # fold_line [OPTIONS...] WIDTH TEXT # use the exit code from less (see mklesskey) to do things handle_keypress() { # handle_keypress CODE case "$1" in - 48) # o - open a link -- show a menu of links on the page + (48) # o - open a link -- show a menu of links on the page run select_url "$BOLLUX_PAGESRC" ;; - 49) # g - goto a url -- input a new url + (49) # g - goto a url -- input a new url prompt GO run blastoff -u "$REPLY" ;; - 50) # [ - back in the history + (50) # [ - back in the history run history_back || { sleep 0.5 run blastoff "$BOLLUX_URL" } ;; - 51) # ] - forward in the history + (51) # ] - forward in the history run history_forward || { sleep 0.5 run blastoff "$BOLLUX_URL" } ;; - 52) # r - re-request the current resource + (52) # r - re-request the current resource run blastoff "$BOLLUX_URL" ;; - 53) # G - goto a url (pre-filled with current) + (53) # G - goto a url (pre-filled with current) run prompt -u GO run blastoff -u "$REPLY" ;; - 54) # ` - change alt-text visibility and refresh + (54) # ` - change alt-text visibility and refresh run cycle_list T_PRE_DISPLAY , run blastoff "$BOLLUX_URL" ;; - 55) # 55-57 -- still available for binding + (55) # 55-57 -- still available for binding die "$?" "less(1) error" ;; esac @@ -1206,8 +1314,8 @@ select_url() { # select_url FILE PS3="OPEN> " select u in "${MAPFILE[@]}"; do case "$REPLY" in - q) bollux_quit ;; - [^0-9]*) run blastoff -u "$REPLY" && break ;; + (q) bollux_quit ;; + ([^0-9]*) run blastoff -u "$REPLY" && break ;; esac run blastoff "${u%%[[:space:]]*}" && break done