From afb216344735397615466174b8ca184691ea66f8 Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Tue, 2 Mar 2021 12:57:38 -0600
Subject: Add commentary

---
 bollux | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 178 insertions(+), 51 deletions(-)

diff --git a/bollux b/bollux
index 26dc316..a7c5f93 100644
--- a/bollux
+++ b/bollux
@@ -3,11 +3,45 @@
 # Author: Case Duckworth
 # License: MIT
 # Version: 0.4.0
+#
+# Commentary:
+#
+# The impetus for this program came from a Mastodon conversation I had where
+# someone mentioned the "simplest possible Gemini client" was this:
+#
+# openssl  s_client -gin_foe -quiet -connect $server:1965 <<< "$url"
+#
+# That's still at the heart of this program (see `gemini_request'): `bollux' is
+# basically a half-functioning convenience wrapper around that openssl call.
+# The first versions of `bollux' used `gawk' and a lot of other tools on top of
+# bash, but after reading Dylan Araps' Pure Bash Bible[1] and other works, I
+# decided to make as much of it in Bash as possible.  Thus, currently `bollux'
+# requires `bash' v. 4+, `less' (a recent, non-busybox version), `dd' for
+# downloads, `openssl' for requests, and `iconv' to convert pages to UTF-8.
+# Future versions will hopefully have a pager fully implemented in bash, so that
+# I won't have to worry about less's weird incompatibilities and keybinding
+# things.  That's a major project though, and I'm scared.
+#
+# The following works were referenced when writing this, and I've tried to
+# credit them in comments below.  Following each link, I'll include a "short
+# code" that I'll use to reference them in those comments, if necessary to keep
+# them shorter than 80 characters.
+#
+# [1]: https://github.com/dylanaraps/pure-bash-bible [PBB]
+# [2]: https://tools.ietf.org/html/rfc3986 [URLspec]
+# [3]: https://gemini.circumlunar.space/docs/specification.html [GEMspec]
+# [4]: https://tools.ietf.org/html/rfc1436 [GOPHERprotocol]
+# [5]: https://tools.ietf.org/html/rfc4266 [GOPHERurl]
+# [6]: [GOPHER_GEMINI]:
+# https://github.com/jamestomasino/dotfiles-minimal/blob/master/bin/gophermap2gemini.awk
+#
+# Code:
 
 # Program information
 PRGN="${0##*/}"			# Easiest way to get the script name
 VRSN=0.4.1			# I /try/ to follow semver?  IDK.
 
+# Print a useful help message (`bollux -h').
 bollux_usage() {
 	cat <<END
 $PRGN (v. $VRSN): a bash gemini client
@@ -24,12 +58,19 @@ parameters:
 END
 }
 
+# UTILITY FUNCTIONS ############################################################
+
+# Run a command, but log it first.
+#
+# See `log' for the available levels.
 run() { # run COMMAND...
+	# I have to add a `trap' here for SIGINT (C-c) to work properly.
 	trap bollux_quit SIGINT
 	log debug "$*"
 	"$@"
 }
 
+# Exit with an error and a message describing it.
 die() { # die EXIT_CODE MESSAGE
 	local ec="$1"
 	shift
@@ -37,20 +78,35 @@ die() { # die EXIT_CODE MESSAGE
 	exit "$ec"
 }
 
-# builtin replacement for `sleep`
-# https://github.com/dylanaraps/pure-bash-bible#use-read-as-an-alternative-to-the-sleep-command
+# Exit with success, printing a fun message.
+#
+# The default message is from the wonderful show "Cowboy Bebop."
+bollux_quit() {
+	printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG"
+	exit
+}
+# SIGINT is C-c, and I want to make sure bollux quits when it's typed.
+trap bollux_quit SIGINT
+
+# Bash built-in replacement for `sleep`
+#
+# PBB: #use-read-as-an-alternative-to-the-sleep-command
 sleep() { # sleep SECONDS
 	read -rt "$1" <> <(:) || :
 }
 
-# https://github.com/dylanaraps/pure-bash-bible/
+# Trim leading and trailing whitespace from a string.
+#
+# PBB: #trim-leading-and-trailing-white-space-from-string
 trim_string() { # trim_string STRING
 	: "${1#"${1%%[![:space:]]*}"}"
 	: "${_%"${_##*[![:space:]]}"}"
 	printf '%s\n' "$_"
 }
 
-# cycle a variable, e.g. from 'one,two,three' => 'two,three,one'
+# Cycle a variable.
+#
+# e.g. 'cycle_list one,two,three' => 'two,three,one'
 cycle_list() { # cycle_list LIST DELIM
 	local list="${!1}" delim="$2"
 	local first="${list%%${delim}*}"
@@ -58,12 +114,17 @@ cycle_list() { # cycle_list LIST DELIM
 	printf -v "$1" '%s%s%s' "${rest}" "${delim}" "${first}"
 }
 
-# determine the first element of a list, e.g. 'one,two,three' => 'one'
+# Determine the first element of a delimited list.
+#
+# e.g. 'first one,two,three' => 'one'
 first() { # first LIST DELIM
 	local list="${!1}" delim="$2"
 	printf '%s\n' "${list%%${delim}*}"
 }
 
+# Log a message to stderr (&2).
+#
+# TODO: document
 log() { # log LEVEL MESSAGE
 	[[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
 	local fmt
@@ -83,22 +144,49 @@ log() { # log LEVEL MESSAGE
 	printf >&2 '\e[%sm%s:%s:\e[0m\t%s\n' "$fmt" "$PRGN" "${FUNCNAME[1]}" "$*"
 }
 
-# main entry point
+# Set the terminal title.
+set_title() { # set_title STRING
+	printf '\e]2;%s\007' "$*"
+}
+
+# Prompt the user for input.
+#
+# This is a thin wrapper around `read', a bash built-in.  Because of the
+# way bollux messes around with stein and stdout, I need to read directly from
+# the TTY with this function.
+prompt() { # prompt [-u] PROMPT [READ_ARGS...]
+	local read_cmd=(read -e -r)
+	if [[ "$1" == "-u" ]]; then
+		read_cmd+=(-i "$BOLLUX_URL")
+		shift
+	fi
+	local prompt="$1"
+	shift
+	read_cmd+=(-p "$prompt> ")
+	"${read_cmd[@]}" </dev/tty "$@"
+}
+
+# MAIN BOLLUX DISPATCH FUNCTIONS ###############################################
+
+# Main entry point into `bollux'.
+#
+# See the `if' block at the bottom of this script.
 bollux() {
 	run bollux_config    # TODO: figure out better config method
 	run bollux_args "$@" # and argument parsing
 	run bollux_init
 
+	# If the user hasn't configured a home page, $BOLLUX_URL will be blank.
+	# So, prompt the user where to go.
 	if [[ ! "${BOLLUX_URL:+x}" ]]; then
 		run prompt GO BOLLUX_URL
 	fi
-
 	log d "BOLLUX_URL='$BOLLUX_URL'"
 
-	run blastoff -u "$BOLLUX_URL"
+	run blastoff -u "$BOLLUX_URL" # Visit the specified URL.
 }
 
-# process command-line arguments
+# Process command-line arguments.
 bollux_args() {
 	while getopts :hvq OPT; do
 		case "$OPT" in
@@ -113,14 +201,26 @@ bollux_args() {
 		esac
 	done
 	shift $((OPTIND - 1))
+
+	# If there's a leftover argument, it's the URL to visit.
 	if (($# == 1)); then
 		BOLLUX_URL="$1"
 	fi
 }
 
-# process config file and set variables
+# Source the configuration file and set remaining variables.
+#
+# Since `bollux_config' is loaded before `bollux_args', there's no way to
+# specify a configuration file from the command line.  I run `bollux_args'
+# second so that command-line options (mostly $BOLLUX_URL) can supersede
+# config-file options, and I'm not sure how to rectify the situation.
+#
+# Anyway, the config file `bollux.conf' is just a bash file that's sourced in
+# this function.  After that, I use a little bash trick to set all the remaining
+# variables to default values with `: "${VAR:=value}"'.
 bollux_config() {
-	: "${BOLLUX_CONFIG:=${XDG_CONFIG_HOME:-$HOME/.config}/bollux/bollux.conf}"
+	: "${BOLLUX_CONF_DIR:=${XDG_CONFIG_HOME:-$HOME/.config}/bollux}"
+	: "${BOLLUX_CONFIG:=$BOLLUX_CONF_DIR/bollux.conf}"
 
 	if [ -f "$BOLLUX_CONFIG" ]; then
 		log debug "Loading config file '$BOLLUX_CONFIG'"
@@ -145,7 +245,7 @@ bollux_config() {
 	: "${KEY_FORWARD:=']'}" # go forward in the history
 	: "${KEY_REFRESH:=r}"	# refresh the page
 	: "${KEY_CYCLE_PRE:=p}" # cycle T_PRE_DISPLAY
-	: "${BOLLUX_CUSTOM_LESSKEY:=${XDG_CONFIG_HOME:-$HOME/.config}/bollux/bollux.lesskey}"
+	: "${BOLLUX_CUSTOM_LESSKEY:=$BOLLUX_CONF_DIR/bollux.lesskey}"
 	## files
 	: "${BOLLUX_DATADIR:=${XDG_DATA_HOME:-$HOME/.local/share}/bollux}"
 	: "${BOLLUX_DOWNDIR:=.}"                       # where to save downloads
@@ -154,7 +254,8 @@ bollux_config() {
 	BOLLUX_HISTFILE="$BOLLUX_DATADIR/history"      # where to save history
 	## typesetting
 	: "${T_MARGIN:=4}"                 # left and right margin
-	: "${T_WIDTH:=0}"                  # width of the viewport -- 0 = get term width
+	: "${T_WIDTH:=0}"                  # width of the view port
+					   # 0 = get term width
 	: "${T_PRE_DISPLAY:=both,pre,alt}" # how to view PRE blocks
 	# colors -- these will be wrapped in \e[ __ m
 	C_RESET='\e[0m'         # reset
@@ -169,59 +270,63 @@ bollux_config() {
 	: "${C_QUOTE:=3}"       # quote formatting
 	: "${C_PRE:=0}"         # preformatted text formatting
 	## state
-	UC_BLANK=':?:'
+	UC_BLANK=':?:'		# internal use only, should be non-URL chars
 }
 
-# quit happily
-bollux_quit() {
-	printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG"
-	exit
-}
-# trap C-c
-trap bollux_quit SIGINT
 
-# set the terminal title
-set_title() { # set_title STRING
-	printf '\e]2;%s\007' "$*"
-}
-
-# prompt for input
-prompt() { # prompt [-u] PROMPT [READ_ARGS...]
-	local read_cmd=(read -e -r)
-	if [[ "$1" == "-u" ]]; then
-		read_cmd+=(-i "$BOLLUX_URL")
-		shift
-	fi
-	local prompt="$1"
-	shift
-	read_cmd+=(-p "$prompt> ")
-	"${read_cmd[@]}" </dev/tty "$@"
-}
-
-# load a URL
+# Load a URL.
+#
+# I was feeling fancy when I named this function -- a more descriptive name
+# would be 'bollux_goto' or something.
 blastoff() { # blastoff [-u] URL
 	local u
 
+	# `blastoff' assumes a "well-formed" URL by default -- i.e., a URL with
+	# a protocol string and no extraneous whitespace.  Since bollux can't
+	# trust the user to input a proper URL at a prompt, nor capsule authors
+	# to fully-form their URLs, so the -u flag is necessary for those
+	# use-cases.  Otherwise, bollux knows the URL is well-formed -- or
+	# should be, due to the Gemini specification.
 	if [[ "$1" == "-u" ]]; then
 		u="$(run uwellform "$2")"
 	else
 		u="$1"
 	fi
 
+	# After ensuring the URL is well-formed, `blastoff' needs to transform
+	# it according to the transform rules of RFC 3986 (see §5.2.2), which
+	# turns relative references into absolute references that bollux can use
+	# in its request to the server.  That's followed by a check that the
+	# protocol is set, defaulting to Gemini if it isn't.
+	#
+	# Implementation detail: because Bash is really stupid when it comes to
+	# arrays, the URL functions u* (see below) work with an array defined
+	# with `local -a' and passed by name, not by value.  Thus, the
+	# `urltransform url ...' instead of `urltransform "${url[@]}"' or
+	# similar.  In addition, the `ucdef' and `ucset' functions take the name
+	# of the array element as parameters, not the element itself.
 	local -a url
 	run utransform url "$BOLLUX_URL" "$u"
 	if ! ucdef url[1]; then
 		run ucset url[1] "$BOLLUX_PROTO"
 	fi
 
+	# To try and keep `bollux' as extensible as possible, I've written it
+	# only to expect two functions for every protocol it supports:
+	# `x_request' and `x_response', where `x' is the name of the protocol
+	# (the first element of the built `url' array).  `declare -F' looks only
+	# for functions in the current scope, failing if it doesn't exist.
+	#
+	# In between `x_request' and `x_response', `blastoff' normalizes the
+	# line endings to UNIX-style (LF) for ease of display.
 	{
-		if declare -Fp "${url[1]}_request" >/dev/null 2>&1; then
+		if declare -F "${url[1]}_request" >/dev/null 2>&1; then
 			run "${url[1]}_request" "$url"
 		else
 			die 99 "No request handler for '${url[1]}'"
 		fi
 	} | run normalize | {
-		if declare -Fp "${url[1]}_response" >/dev/null 2>&1; then
+		if declare -F "${url[1]}_response" >/dev/null 2>&1; then
 			run "${url[1]}_response" "$url"
 		else
 			log d \
@@ -232,8 +337,23 @@ blastoff() { # blastoff [-u] URL
 	}
 }
 
-# URLS
-## https://tools.ietf.org/html/rfc3986
+# URLS: https://tools.ietf.org/html/rfc3986 ####################################
+#
+# Most of these functions are Bash implementations of functionality laid out in
+# the linked RFC specification.  I'll refer to the section numbers above each
+# function.
+#
+# In addition, most of these functions take arrays or array elements passed /by
+# name/, instead of /value/ -- i.e., instead of calling `usplit $url', call
+# `usplit url'.  Passing values by name is necessary because of Bash's weird
+# array handling.
+#
+################################################################################
+
+# Make sure a URL is "well-formed:" add a default protocol if it's missing and
+# trim whitespace.
+#
+# Useful for URLs that were probably input by humans.
 uwellform() {
 	local u="$1"
 
@@ -246,6 +366,13 @@ uwellform() {
 	printf '%s\n' "$u"
 }
 
+# Split a URL into its constituent parts, placing them all in the given array.
+#
+# The regular expression given at the top of the function ($re) is taken
+# directly from RFC 3986, Appendix B -- and if the URL provided doesn't match
+# it, the function bails.
+#
+# `usplit' takes advantage ... [CONTINUE HERE]
 usplit() { # usplit NAME:ARRAY URL:STRING
 	local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
 	[[ $2 =~ $re ]] || return $?
@@ -408,7 +535,7 @@ pmerge() {
 	fi
 }
 
-# https://github.com/dylanaraps/pure-bash-bible/
+# PBB
 uencode() { # uencode URL:STRING
 	local LC_ALL=C
 	for ((i = 0; i < ${#1}; i++)); do
@@ -425,7 +552,7 @@ uencode() { # uencode URL:STRING
 	printf '\n'
 }
 
-# https://github.com/dylanaraps/pure-bash-bible/
+# PBB
 udecode() { # udecode URL:STRING
 	: "${1//+/ }"
 	printf '%b\n' "${_//%/\\x}"
@@ -598,7 +725,7 @@ passthru() {
 # convert gophermap to text/gemini (probably naive)
 gopher_convert() {
 	local type label path server port regex
-	# cf. https://github.com/jamestomasino/dotfiles-minimal/blob/master/bin/gophermap2gemini.awk
+	# [GOPHER_GEMINI]
 	while IFS= read -r; do
 		printf -v regex '(.)([^\t]*)(\t([^\t]*)\t([^\t]*)\t([^\t]*))?'
 		if [[ "$REPLY" =~ $regex ]]; then
@@ -753,7 +880,9 @@ mklesskey() { # mklesskey
 	if [[ -f "$BOLLUX_CUSTOM_LESSKEY" ]]; then
 		log d "Using custom lesskey: '$BOLLUX_CUSTOM_LESSKEY'"
 		BOLLUX_LESSKEY="${BOLLUX_CUSTOM_LESSKEY}"
-	elif [[ ! -f "$BOLLUX_LESSKEY" ]]; then
+	elif [[ -f "$BOLLUX_LESSKEY" ]]; then
+		log d "Found lesskey: '$BOLLUX_LESSKEY'"
+	else
 		log d "Generating lesskey: '$BOLLUX_LESSKEY'"
 		lesskey -o "$BOLLUX_LESSKEY" - <<END
 #command
@@ -771,8 +900,6 @@ l right-scroll
 ? status   # 'status' will show a little help thing.
 = noaction
 END
-	else
-		log d "Found lesskey: '$BOLLUX_LESSKEY'"
 	fi
 }
 
@@ -1104,7 +1231,7 @@ bollux_init() {
 	HN=0               # position of history in the array
 	run mkdir -p "${BOLLUX_HISTFILE%/*}"
 	# Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds)
-	run rm "$BOLLUX_LESSKEY"
+	run rm -f "$BOLLUX_LESSKEY"
 	mklesskey
 }
 
-- 
cgit 1.4.1-21-gabe81