#!/usr/bin/env bash ################################################################################ # BOLLUX: a bash gemini client # Author: Case Duckworth # License: MIT # Version: 0.4.1 # # Commentary: # # The impetus for this program came from a Mastodon conversation I had where # someone mentioned the "simplest possible Gemini client" was this: # # openssl s_client -ign_eof -quiet -connect $server:1965 <<< "$url" # # That's still at the heart of this program (see `gemini_request'): `bollux' is # basically a half-functioning convenience wrapper around that openssl call. # The first versions of `bollux' used `gawk' and a lot of other tools on top of # bash, but after reading Dylan Araps' Pure Bash Bible[1] and other works, I # decided to make as much of it in Bash as possible. Thus, currently `bollux' # requires `bash' v. 4+, `less' (a recent, non-busybox version), `dd' for # downloads, `openssl' for requests, and `iconv' to convert pages to UTF-8. # Future versions will hopefully have a pager fully implemented in bash, so that # I won't have to worry about less's weird incompatibilities and keybinding # things. That's a major project though, and I'm scared. # # The following works were referenced when writing this, and I've tried to # credit them in comments below. Further in the commentary on this script, I'll # include the following link numbers to refer to these documents, in order to # keep the line length as short as possible. # # [1]: Pure Bash Bible # https://github.com/dylanaraps/pure-bash-bible # [2]: URL Specification # https://tools.ietf.org/html/rfc3986 # [3]: Gemini Specification # https://gemini.circumlunar.space/docs/specification.html # [4]: Gemini Best Practices # https://gemini.circumlunar.space/docs/best-practices.gmi # [5]: Gemini FAQ # https://gemini.circumlunar.space/docs/faq.gmi # [6]: Gopher Specification # https://tools.ietf.org/html/rfc1436 # [7]: Gopher URLs # https://tools.ietf.org/html/rfc4266 # [8]: Gophermap to Gemini script (by tomasino) # https://github.com/jamestomasino/dotfiles-minimal/blob/master/bin/gophermap2gemini.awk # [9]: OpenSSL `s_client' online manual # https://www.openssl.org/docs/manmaster/man1/openssl-s_client.html # ################################################################################ # Code: # Program information PRGN="${0##*/}" # Easiest way to get the script name VRSN=0.4.1 # I /try/ to follow semver? IDK. # Print a useful help message (`bollux -h'). bollux_usage() { cat <, #, ##, ###, *, ```) : "${C_LINK_NUMBER:=1}" # link number : "${C_LINK_TITLE:=4}" # link title : "${C_LINK_URL:=36}" # link URL : "${C_HEADER1:=1;4}" # header 1 formatting : "${C_HEADER2:=1}" # header 2 formatting : "${C_HEADER3:=3}" # header 3 formatting : "${C_LIST:=0}" # list formatting : "${C_QUOTE:=3}" # quote formatting : "${C_PRE:=0}" # preformatted text formatting ## state UC_BLANK=':?:' # internal use only, should be non-URL chars } # Initialize bollux state bollux_init() { # Trap `bollux_cleanup' on quit and exit trap bollux_cleanup INT QUIT EXIT # Trap `bollux_quit' on interrupt (C-c) trap bollux_quit SIGINT # Disable pathname expansion. # # It's very unlikely the user will want to navigate to a file when # answering the GO prompt. set -f # Initialize state # # Other than $REDIRECTS, bollux's mutable state includes # $BOLLUX_URL, but that's initialized elsewhere (possibly even by # the user) REDIRECTS=0 # History # # See also `history_append', `history_back', `history_forward' declare -a HISTORY # history is kept in an array HN=0 # position of history in the array run mkdir -p "${BOLLUX_HISTFILE%/*}" # Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds) run rm -f "$BOLLUX_LESSKEY" } # Cleanup on exit bollux_cleanup() { # Stubbed in case of need in future : } # Exit with success, printing a fun message. # # The default message is from the wonderful show "Cowboy Bebop." bollux_quit() { printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG" exit } # UTILITY FUNCTIONS ############################################################ # Run a command, but log it first. # # See `log' for the available levels. run() { # run COMMAND... # I have to add a `trap' here for SIGINT to work properly. trap bollux_quit SIGINT LOG_FUNC=2 log debug "> $*" "$@" } # Log a message to stderr (&2). # # `log' in this script can take 3 different parameters: `d', `e', and `x', where # `x' is any other string (though I usually use `x'), followed by the message to # log. Most messages are either `d' (debug) level or `x' (diagnostic) level, # meaning I want to show them all the time or only when bollux is called with # `-v' (verbose). The levels are somewhat arbitrary, like I suspect all logging # levels are, but you can read the rest of bollux to see what I've chosen to # classify as what. log() { # log LEVEL MESSAGE... # 'QUIET' means don't log anything. [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return local fmt # ANSI escape code case "$1" in [dD]*) # Debug level -- only print if bollux -v. [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return fmt=34 # Blue ;; [eE]*) # Error level -- always print. fmt=31 # Red ;; *) # Diagnostic level -- print unless QUIET. fmt=1 # Bold ;; esac shift printf >&2 '\e[%sm%s:%-16s:\e[0m %s\n' \ "$fmt" "$PRGN" "${FUNCNAME[${LOG_FUNC:-1}]}" "$*" } # Exit with an error and a message describing it. die() { # die EXIT_CODE MESSAGE local exit_code="$1" shift log error "$*" exit "$exit_code" } # Trim leading and trailing whitespace from a string. # # [1]: #trim-leading-and-trailing-white-space-from-string trim_string() { # trim_string STRING : "${1#"${1%%[![:space:]]*}"}" : "${_%"${_##*[![:space:]]}"}" printf '%s\n' "$_" } # Cycle a variable in a list given a delimiter. # # e.g. 'list_cycle one,two,three ,' => 'two,three,one' list_cycle() { # list_cycle LIST DELIM # I could've set up `list_cycle' to use an array instead of a delimited # string, but the one variable this function is used for is # T_PRE_DISPLAY, which is user-configurable. I wanted it to be as easy # to configure for users who might not immediately know the bash array # syntax, but can figure out 'variable=value' without much thought. local list="${!1}" # Pass the list by name, not value local delim="$2" # The delimiter of the string local first="${list%%${delim}*}" # The first element local rest="${list#*${delim}}" # The rest of the elements # -v prints to the variable specified. printf -v "$1" '%s%s%s' "${rest}" "${delim}" "${first}" } # Set the terminal title. set_title() { # set_title TITLE... printf '\e]2;%s\007' "$*" } # Prompt the user for input. # # This is a thin wrapper around `read', a bash built-in. Because of the # way bollux messes around with stdin and stdout, I need to read directly from # the TTY with this function. prompt() { # prompt [-u] PROMPT [READ_ARGS...] # `-e' gets the line "interactively", so it can see history and stuff # `-r' reads a "raw" string, i.e., without backslash escaping local read_cmd=(read -e -r) if [[ "$1" == "-u" ]]; then # `-i TEXT' uses TEXT as the initial text for `read' read_cmd+=(-i "$BOLLUX_URL") shift fi local prompt="$1" # How to prompt the user shift read_cmd+=(-p "$prompt> ") "${read_cmd[@]}" <(:) || : } # Normalize files. normalize() { shopt -s extglob # for the printf call below while IFS= read -r; do # Normalize line endings to Unix-style (LF) printf '%s\n' "${REPLY//$'\r'?($'\n')/}" done shopt -u extglob # reset 'extglob' } # URLS ######################################################################### # https://tools.ietf.org/html/rfc3986 [2] # # Most of these functions are Bash implementations of functionality laid out in # the linked RFC specification. I'll refer to the section numbers above each # function. # # In addition, most of these functions take arrays or array elements passed /by # name/, instead of /value/ -- i.e., instead of calling `usplit $url', call # `usplit url'. Passing values by name is necessary because of Bash's weird # array handling. # ################################################################################ # Make sure a URL is "well-formed:" add a default protocol if it's missing and # trim whitespace. # # Useful for URLs that were probably input by humans. uwellform() { # uwellform URL local url="$1" if [[ "$url" != *://* ]]; then url="$BOLLUX_PROTO://$url" fi url="$(trim_string "$url")" printf '%s\n' "$url" } # Split a URL into its constituent parts, placing them all in the given array. # # The regular expression given at the top of the function ($re) is taken # directly from [2] Appendix B -- and if the URL provided doesn't match it, the # function bails. # # `usplit' takes advantage of bash's regex abilities: when the regex comparison # operator `=~' is used, bash populates the array $BASH_REMATCH with the groups # matched, and ${BASH_REMATCH[0]} is the entirety of the match. So `usplit' # takes the matched URL, splits it using the regex, then assigns each part to an # element of the url array NAME by using `printf -v', which prints to a # variable. usplit() { # usplit URL_ARRAY URL # Note: URL_ARRAY isn't assigned in `usplit', because it should # already exist. Pass /only/ the name of URL_ARRAY to this # function, not its contents. local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' local u="$2" [[ "$u" =~ $re ]] || { exit_code=$? log error "usplit: '$2' doesn't match '$re'" return $? } # ShellCheck doesn't see that I'm using these variables in the `for' # loop below, because I'm not technically using them /as/ variables, but # as names to the variables. The ${!c} formation in the `printf' call # below performs a reverse lookup on the name to get the actual data. # shellcheck disable=2034 local entire_url="${BASH_REMATCH[0]}" \ scheme="${BASH_REMATCH[2]}" \ authority="${BASH_REMATCH[4]}" \ path="${BASH_REMATCH[5]}" \ query="${BASH_REMATCH[7]}" \ fragment="${BASH_REMATCH[9]}" # Iterate through the 5 components of a URL and assign them to elements # of URL_ARRAY, as follows: # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment run printf -v "$1[0]" '%s' "$entire_url" # This loop tests whether the component exists first -- if it # doesn't, the special variable $UC_BLANK is used in the spot # instead. Bash doesn't have a useful way of differentiating an # /unset/ element of an array, versus an /empty/ element. # The only exception is that 'path' component, which always exists # in a URL (I think the simplest URL possible is '/', the empty # path). local i=1 # begin at 1 -- the full URL is [0]. for c in scheme authority path query fragment; do if [[ "${!c}" || "$c" == path ]]; then run printf -v "$1[$i]" '%s' "${!c}" else run printf -v "$1[$i]" '%s' "$UC_BLANK" fi ((i += 1)) done } # Join a URL array, split with `usplit', back into a string, assigning # it to the 0th element of the array. ujoin() { # ujoin URL_ARRAY # Here's the documentation for local's '-n' flag: # # Give each name the nameref attribute, making it a name reference # to another variable. That other variable is defined by the value of # name. All references, assignments, and attribute modifications to # name, except for those using or changing the -n attribute itself, # are performed on the variable referenced by name's value. The # nameref attribute cannot be applied to array variables. # # Pretty handy for passing-by-name! Except that last part -- "The # nameref attribute cannot be applied to array variables." However, # I've found a clever hack -- you can use 'printf -v' to print the # value to the array element. local -n URL_ARRAY="$1" # For each possible URL component, check if it exists with `ucdef'. # If it does, append it (with the correct component delimiter) to # URL_ARRAY[0]. if ucdef URL_ARRAY[1]; then printf -v URL_ARRAY[0] "%s:" "${URL_ARRAY[1]}" fi # Need special casing for file: protocol: # https://datatracker.ietf.org/doc/html/rfc1738#section-3.10 if ucdef URL_ARRAY[2] || [[ "${URL_ARRAY[1]}" == file ]]; then printf -v URL_ARRAY[0] "${URL_ARRAY[0]}//%s" "${URL_ARRAY[2]}" fi # The path component is required. printf -v URL_ARRAY[0] "${URL_ARRAY[0]}%s" "${URL_ARRAY[3]}" if ucdef URL_ARRAY[4]; then printf -v URL_ARRAY[0] "${URL_ARRAY[0]}?%s" "${URL_ARRAY[4]}" fi if ucdef URL_ARRAY[5]; then printf -v URL_ARRAY[0] "${URL_ARRAY[0]}#%s" "${URL_ARRAY[5]}" fi log d "${URL_ARRAY[0]}" } # `ucdef' checks whether a URL component is blank or not -- if a component # doesn't exist, `usplit' writes $UC_BLANK there instead (which is :?: by # default, though it really doesn't matter much *what* it is, as long as it's # not going to really be in a URL). I tried really hard to differentiate an # unset array element from a simply empty one, but like, as far as I could tell, # you can't do that in Bash. ucdef() { # ucdef COMPONENT local component="$1" [[ "${!component}" != "$UC_BLANK" ]] } # `ucblank' determines whether a URL component is blank (""), as opposed to # undefined. ucblank() { # ucblank COMPONENT local component="$1" [[ -z "${!component}" ]] } # `ucset' sets one component of a URL array and setting the 0th element to the # new full URL. Use it instead of directly setting the array element with U[x], # because U[0] will fall out of sync with the rest of the contents. ucset() { # ucset URL_ARRAY_INDEX NEW_VALUE local url_array_component="$1" # Of form 'URL_ARRAY[INDEX]' local value="$2" # Assign $value to $url_array_component. # # Wrapped in an 'eval' for the extra layer of indirection. run eval "${url_array_component}='$value'" # Rejoin the URL_ARRAY with the changed value. # # The substitution here strips the array index subscript (i.e., # URL[4] => URL), passing the name of the full array to `ujoin'. run ujoin "${url_array_component/\[*\]/}" } # [1]: Encode a URL using percent-encoding. uencode() { # uencode URL local LC_ALL=C for ((i = 0; i < ${#1}; i++)); do : "${1:i:1}" case "$_" in [a-zA-Z0-9.~_-]) printf '%s' "$_" ;; *) printf '%%%02X' "'$_" ;; esac done printf '\n' } # [1]: Decode a percent-encoded URL. udecode() { # udecode URL : "${1//+/ }" printf '%b\n' "${_//%/\\x}" } # Implement [2]: 5.2.4, "Remove Dot Segments". pundot() { # pundot PATH local input="$1" local output while [[ "$input" ]]; do if [[ "$input" =~ ^\.\.?/ ]]; then input="${input#${BASH_REMATCH[0]}}" elif [[ "$input" =~ ^/\.(/|$) ]]; then input="/${input#${BASH_REMATCH[0]}}" elif [[ "$input" =~ ^/\.\.(/|$) ]]; then input="/${input#${BASH_REMATCH[0]}}" [[ "$output" =~ /?[^/]+$ ]] output="${output%${BASH_REMATCH[0]}}" elif [[ "$input" == . || "$input" == .. ]]; then input= else [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1 output="$output${BASH_REMATCH[1]}" input="${BASH_REMATCH[2]}" fi done printf '%s\n' "${output//\/\//\//}" } # Implement [2] Section 5.2.3, "Merge Paths". pmerge() { # pmerge BASE_PATH REFERENCE_PATH local -n base_path="$1" local -n reference_path="$2" if ucblank reference_path[3]; then printf '%s\n' "${base_path[3]//\/\//\//}" return fi if ucdef base_path[2] && ucblank base_path[3]; then printf '/%s\n' "${reference_path[3]//\/\//\//}" else local bp="" if [[ "${base_path[3]}" == */* ]]; then bp="${base_path[3]%/*}" fi printf '%s/%s\n' "${bp%/}" "${reference_path[3]#/}" fi } # `utransform' implements [2]6 Section 5.2.2, "Transform Resources." # # That section conveniently lays out a pseudocode algorithm describing how URL # resources should be transformed from one to another. This function just # implements that pseudocode in Bash, using the helper functions defined above. utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING local -a B R # base, reference local -n T="$1" # target usplit B "$2" usplit R "$3" # initialize T for ((i = 1; i <= 5; i++)); do T[$i]="$UC_BLANK" done # 0=url 1=scheme 2=authority 3=path 4=query 5=fragment if ucdef R[1]; then T[1]="${R[1]}" if ucdef R[2]; then T[2]="${R[2]}" fi if ucdef R[3]; then T[3]="$(pundot "${R[3]}")" fi if ucdef R[4]; then T[4]="${R[4]}" fi else if ucdef R[2]; then T[2]="${R[2]}" if ucdef R[2]; then T[3]="$(pundot "${R[3]}")" fi if ucdef R[4]; then T[4]="${R[4]}" fi else if ucblank R[3]; then T[3]="${B[3]}" if ucdef R[4]; then T[4]="${R[4]}" else T[4]="${B[4]}" fi else if [[ "${R[3]}" == /* ]]; then T[3]="$(pundot "${R[3]}")" else T[3]="$(pmerge B R)" T[3]="$(pundot "${T[3]}")" fi if ucdef R[4]; then T[4]="${R[4]}" fi fi T[2]="${B[2]}" fi T[1]="${B[1]}" fi if ucdef R[5]; then T[5]="${R[5]}" fi ujoin T } # GEMINI ####################################################################### # https://gemini.circumlunar.space/docs/specification.html [3] # # The reason we're all here, folks. Gemini is a new protocol that aims to be a # middle ground between Gopher and HTTP, blah blah. You know the spiel. I know # the spiel. It's great stuff! # ################################################################################ # Request a resource from a gemini server - see [3] Sections 2, 4. gemini_request() { # gemini_request URL local -a url run usplit url "$1" log debug "${url[@]}" # Remove user info from the URL. # # URLs can technically be of the form ://:@ # (see [2] Section 3.2, "Authority"). I don't know of any Gemini servers # that use the or parts, so `gemini_request' just strips # them from the requested URL. This will need to be changed if servers # decide to use this method of authentication. log debug "Removing user info from the URL" run ucset url[2] "${url[2]#*@}" # Determine the port to request. # # The default port for Gemini is 1965 (the year of the first Gemini # space mission), but some servers use a different port. In a URL, a # port can be specified after the domain, separated with a colon. The # user can also request a different default port, for whatever reason, # by setting the variable $BOLLUX_GEMINI_PORT. log debug "Determining the port to request" local port if [[ "${url[2]}" == *:* ]]; then port="${url[2]#*:}" ucset url[2] "${url[2]%:*}" else port="$BOLLUX_GEMINI_PORT" fi # Build the SSL command to request the resource. # # This is the beating heart of bollux, the command that does all the # important work of actually fetching the gemini content the user wants # to read. I've broken it out into an array for ease of editing (and # now, commenting!). local ssl_cmd=( # `s_client' is OpenSSL's reference client implementation In the # manual [9] it says not to use it, but who reads the manual, # anyway? openssl s_client -crlf # Automatically add CR+LF to line -quiet # Don't print all the cert stuff # -ign_eof # `-quiet' implies `-ign_eof' -connect "${url[2]}:$port" # The server and port to connect -servername "${url[2]}" # SNI: Server Name Identification -no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions ) # Actually request the resource. # # I could probably use 'printf '%s\r\n' "$url" | run "${ssl_cmd[@]}", # and maybe I should. I wrote this little line a while ago. run "${ssl_cmd[@]}" <<<"$url" } # Handle the gemini response - see [3] Section 3. gemini_response() { # gemini_response URL local code meta # received on the first line of the response local title # determined by a clunky heuristic, see read loop: (2*) local url="$1" # the currently-visited URL. # Read the first line. # # The first line of a Gemini response is the "header line," which is of # the format "STATUS METADATA\r\n". I use a `while' loop using `read' # with a timeout to handle non-responsive servers. Technically, # METADATA shouldn't exceed 1024 bytes, but I can't think of a good way # to break at that point -- so bollux is not quite spec-compliant in # this regard. # # Additionally, there are sometimes bugs with caching and # byte-shifting(?) when trying to download a binary file (see # `download', below), but I'm not sure how to remedy that issue either. # It requires more research. while read -t "$BOLLUX_TIMEOUT" -r code meta || { (($? > 128)) && die 99 "Timeout."; }; do break done log d "[$code] $meta" # Branch depending on the status code. See [3] Appendix 1. # # Notes: # - All codes other than 3* (Redirects) reset the REDIRECTS counter. # - I branch on the first digit of the status code, instead of both, to # minimize the amount of duplicated code I need to write. case "$code" in 1*) # INPUT # Gemini allows GET-style requests, and the INPUT family of # response codes facilitate them. `10' is for standard input, # and `11' is for sensitive information, like passwords. REDIRECTS=0 BOLLUX_URL="$url" case "$code" in 10) run prompt "$meta" ;; 11) run prompt "$meta" -s ;; # sensitive input esac run history_append "$url" "${title:-}" run blastoff "?$(uencode "$REPLY")" ;; 2*) # OK # The `20' family of requests is like HTTP's `200' family: it # means that the request worked and the server is sending the # requested content. REDIRECTS=0 BOLLUX_URL="$url" # Janky heuristic to guess the title of a page. # # This while loop reads through the file looking for a line # starting with `#', which is a level-one heading in text/gemini # (see [3] Section 5). It assumes that the first such heading is the # title of the page, and uses that title for the terminal title # and for the history. local pretitle while read -r; do # Since looping through the file consumes it (that is, # the file pointer (I think?) moves away from the # beginning of the file), the content we've read so far # must be saved in a `pretitle' variable, so it can be # printed later with the rest of the page. pretitle="$pretitle$REPLY"$'\n' if [[ "$REPLY" =~ ^#[[:space:]]*(.*) ]]; then title="${BASH_REMATCH[1]}" break fi done run history_append "$url" "${title:-}" # Print the pretitle and the rest of the document (`passthru' is # a pure-bash rewrite of `cat'), and pipe it through `display' # for typesetting. { printf '%s' "$pretitle" passthru } | run display "$meta" "${title:-}" ;; 3*) # REDIRECT # Redirects are a fundamental part of any hypertext framework, # and if I remember correctly, one of the main reasons # solderpunk and others began thinking about gemini (the others # being TLS and URLs, I believe). # # Note that although [3] specifies both a temporary (30) and # permanent (31) redirect, bollux isn't smart enough to make a # distinction. I'm not sure what the difference would be in # practice, anyway. # # Per [4] bollux limits the number of redirects a page is # allowed to make (by default, five). Change `$BOLLUX_MAXREDIR' # to customize that limit. ((REDIRECTS += 1)) if ((REDIRECTS > BOLLUX_MAXREDIR)); then die $((100 + code)) "Too many redirects!" fi BOLLUX_URL="$url" # Another discussion on [4] pertains to the value of alerting # the user to (A) a cross-domain redirect, or even (B) all # redirects. I have yet to implement that particular # functionality, and even when I do implement it I don't think # (B) will be the default. Perhaps (A) though. No notification # will also be an option, however. run blastoff "$meta" # TODO: confirm redirect ;; 4*) # TEMPORARY ERROR # Since the 4* codes ([3] Appendix 1) are all server issues, # bollux can treat them all basically the same. This is an area # that could use some expansion. local desc="Temporary error" case "$code" in 41) desc+=" (server unavailable)" ;; 42) desc+=" (CGI error)" ;; 43) desc+=" (proxy error)" ;; 44) desc+=" (slow down)" ;; # could be particularly improved esac REDIRECTS=0 die "$((100 + code))" "$desc [$code]: $meta" ;; 5*) # PERMANENT ERROR # The situation with the 5* codes is basically similar to the 4* # codes. It could maybe use more thought as to what behavior to # implement. Maybe adding the (bad) requests to history, # subject to configuration? local desc="Permanent failure" case "$code" in 51) desc+=" (not found)" ;; 52) desc+=" (gone)" ;; 53) desc+=" (proxy request refused)" ;; # For some reason, codes 54--58 inclusive aren't used. 59) desc+=" (bad request)" ;; esac REDIRECTS=0 die "$((100 + code))" "$desc [$code]: $meta" ;; 6*) # CERTIFICATE ERROR (TODO) # Dealing with certificates is honestly the most important # feature missing from bollux to get it to 1.0. Right now, # bollux deals with 6* status codes identically to 4* and 5* # codes. This is not ideal, in the slightest. local desc="Client certificate required" case "$code" in 61) desc+=" (certificate not authorized)" ;; 62) desc+=" (certificate not valid)" ;; esac REDIRECTS=0 log d "Not implemented: Client certificates" die "$((100 + code))" "[$code] $meta" ;; *) # UNKNOWN # Just in case we get a weird, un-spec-compliant status code. [[ -z "${code-}" ]] && die 100 "Empty response code." die "$((100 + code))" "Unknown response code: $code." ;; esac } # GOPHER ####################################################################### # https://tools.ietf.org/html/rfc1436 protocol # https://tools.ietf.org/html/rfc4266 url # # Gopher is the grand-daddy of gemini (or maybe just weird uncle? hm..), # invented in 1991 as a fancier FTP. There's been a sort of resurgence in it as # a consequence of the shittifying of the WWW, but it's shown its age (which is # why Gemini was born). But why am I telling you this? You're reading the # source code of a Gemini browser! You're a meganerd just like me. Welcome to # the club, kid. # # Since gopher is so old, it actually has two RFCs: RFC 1436 [6] for the # protocol itself, and RFC 4266 [7] for the URL format (gopher predates the # URL!). However, requesting and handling responses is still fundamentally the # same to gemini, so it was pretty easy to implement this. I don't think bollux # handles all the possible item types, but it should get the main ones. # ################################################################################ # Request a resource. gopher_request() { # gopher_request URL local url="$1" # [7] Section 2.1 [[ "$url" =~ gopher://([^/?#:]*)(:([0-9]+))?(/((.))?(/?.*))?$ ]] local server="${BASH_REMATCH[1]}" \ port="${BASH_REMATCH[3]:-$BOLLUX_GOPHER_PORT}" \ type="${BASH_REMATCH[6]:-1}" \ path="${BASH_REMATCH[7]}" log d "URL='$url' SERVER='$server' TYPE='$type' PATH='$path'" # Bash has this really neat feature where it can open a TCP socket # directly. bollux uses that feature here to ask the server for the # resource and then `passthru' it to the next thing. exec 9<>"/dev/tcp/$server/$port" printf '%s\r\n' "$path" >&9 passthru <&9 } # Handle a server response. gopher_response() { # gopher_response URL local url="$1" pre=false # [7] Section 2.1 # # Note that this duplicates the code in `gopher_request'. There might # be a good way to thread this data through so that it's not computed # twice. [[ "$url" =~ gopher://([^/?#:]*)(:([0-9]+))?(/((.))?(/?.*))?$ ]] local cur_server="${BASH_REMATCH[1]}" local type="${BASH_REMATCH[6]:-1}" run history_append "$url" "" # gopher doesn't really have titles, huh # Gopher has a concept of 'line types', or maybe 'item types' -- # basically, each line in a gophermap starts with a character, its type, # and then is followed by a series of tab-separated fields describing # where that type is and how to display it. The full list of original # line types can be found in [6] Section 3.8, though the types have also been # extended over the years. Since bollux can only display types that are # text-ish, it only concerns itself with those in this case statement. # All the others are simply downloaded. case "$type" in 0) # Item is a file # Since gopher doesn't send MIME-type information in-band, we # just assume it's text/plain, and try to convert it later to # UTF-8 with `iconv'. run display text/plain ;; 1) # Item is a directory [gophermap] # Since I've already written all the code to typeset gemini # well, it's easy to convert a gophermap to text/gemini and # display it than to write a whole new gophermap typesetter. run gopher_convert | run display text/gemini ;; 3) # Error # I don't know all the gopher error cases, and the spec is # pretty quiet on them. So bollux just signals failure and # bails. die 203 "GOPHER: failed" ;; 7) # Item is an Index-Search server # Gopher search queries are separated from their resources by a # TAB. It's wild. if [[ "$url" =~ $'\t' ]]; then run gopher_convert | run display text/gemini else run prompt 'SEARCH' run blastoff "$url $REPLY" fi ;; *) # Anything else # The list at [6] Section 3.8 includes the following (noted where it # might be good to differently handle them in the future): # # 2. Item is a CSO phone-book server ***** # 4. Item is a BinHexed Macintosh file # 5. Item is DOS binary archive of some sort # 6. Item is a UNIX uuencoded file # 8. Item points to a text-based telnet session ***** # 9. Item is a binary file! [exclamation point sic. -- ed.] # +. Item is a redundant server ***** # T. Item points to a text-based tn3270 session # g. Item is a GIF format graphics file # I. Item is some kind of image file # # As mentioned, there are other line types floating around as # well. Since I don't browse gopher much, there's not much # personal motivation to extend `gopher_response'; however pull # requests are always welcome. run download "$url" ;; esac } # Convert a gophermap naively to a gemini page. # # Based strongly on [8] but bash-ified. Due to the properties of link lines in # gemini, many of the item types in `gemini_reponse' can be linked to the proper # protocol handlers here -- so if a user is trying to reach a TCP link through # gopher, bollux won't have to handle it, for example.* # # * Ideally -- right now, bollux simply errors out on all unknown protocols. # More research needs to be done into how to farm out to `xdg-open' or a # similar generic opener. gopher_convert() { local type label path server port regex while IFS= read -r; do printf -v regex '(.)([^\t]*)(\t([^\t]*)\t([^\t]*)\t([^\t]*))?' if [[ "$REPLY" =~ $regex ]]; then type="${BASH_REMATCH[1]}" label="${BASH_REMATCH[2]}" path="${BASH_REMATCH[4]:-/}" server="${BASH_REMATCH[5]:-$cur_server}" port="${BASH_REMATCH[6]}" else log e "CAN'T PARSE LINE" printf '%s\n' "$REPLY" continue fi case "$type" in .) # end of file printf '.\n' break ;; i) # label case "$label" in '#'* | '*'[[:space:]]*) if $pre; then printf '%s\n' '```' pre=false fi ;; *) if ! $pre; then printf '%s\n' '```' pre=true fi ;; esac printf '%s\n' "$label" ;; h) # html link if $pre; then printf '%s\n' '```' pre=false fi printf '=> %s %s\n' "${path:4}" "$label" ;; T) # telnet link if $pre; then printf '%s\n' '```' pre=false fi printf '=> telnet://%s:%s/%s%s %s\n' \ "$server" "$port" "$type" "$path" "$label" ;; *) # other type if $pre; then printf '%s\n' '```' pre=false fi printf '=> gopher://%s:%s/%s%s %s\n' \ "$server" "$port" "$type" "$path" "$label" ;; esac done if $pre; then printf '%s\n' '```' fi # close the connection exec 9<&- exec 9>&- } # HANDLING CONTENT ############################################################# # # After fetching the resource requested by the user, bollux needs to display or # otherwise 'give' the resource to the user for consumption. # ################################################################################ # display the fetched content display() { # display METADATA [TITLE] local -a less_cmd local mime charset # split header line local -a hdr IFS=';' read -ra hdr <<<"$1" # title is optional but nice looking local title if (($# == 2)); then title="$2" fi mime="$(trim_string "${hdr[0],,}")" for ((i = 1; i <= "${#hdr[@]}"; i++)); do h="${hdr[$i]}" case "$h" in *charset=*) charset="${h#*=}" ;; esac done [[ -z "$mime" ]] && mime="text/gemini" [[ -z "$charset" ]] && charset="utf-8" log debug "mime='$mime'; charset='$charset'" case "$mime" in text/*) set_title "$title${title:+ - }bollux" # Build the `less' command less_cmd=(less) # Render ANSI color escapes ONLY (as opposed to `-r', which # renders all escapes) less_cmd+=(-R) # Don't wrap text. `fold_line' takes care of wrapping normal # text, and pre-formatted text shouldn't wrap. less_cmd+=(-S) # Load the keybindings (see `lesskey'). mklesskey && less_cmd+=(-k "$BOLLUX_LESSKEY") local helpline="${KEY_OPEN}:open, " helpline+="${KEY_GOTO}/" helpline+="${KEY_GOTO_FROM}:goto, " helpline+="${KEY_BACK}:back, " helpline+="${KEY_FORWARD}:forward, " helpline+="${KEY_REFRESH}:refresh" less_cmd+=( # 'status'line -Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # helpline -P="$(less_prompt_escape "$helpline")$" # start with statusline -m # float content to the top +k ) local typeset local submime="${mime#*/}" if declare -Fp "typeset_$submime" &>/dev/null; then typeset="typeset_$submime" else typeset="passthru" fi { run iconv -f "${charset^^}" -t "UTF-8" | run tee "$BOLLUX_PAGESRC" | run "$typeset" | #cat run "${less_cmd[@]}" && bollux_quit } || run handle_keypress "$?" ;; *) run download "$BOLLUX_URL" ;; esac } # escape strings for the less prompt less_prompt_escape() { # less_prompt_escape STRING local i for ((i = 0; i < ${#1}; i++)); do : "${1:i:1}" case "$_" in [\?:\.%\\]) printf '\%s' "$_" ;; *) printf '%s' "$_" ;; esac done printf '\n' } ## Generate a lesskey(1) file for custom keybinds # After less 582, less itself can read lesskey configuration files, rendering # the lesskey *program* deprecated. The exact message is, apparently, this: # # NOTE: lesskey is deprecated. # It is no longer necessary to run lesskey, # when using less version 582 and later. # # Now I don't have less 582 available on my machine (Debian of course!), so I'm # relying on a report from 'bencollver' on tildegit. (Thanks for the report, # ben!) # # That being said, this may not work. *Please test!* --- acdw 2022-08-09 # # PS. Here's a link to less's homepage in case something else comes up: # https://www.greenwoodsoftware.com/less/ mklesskey() { # mklesskey if [[ -f "$BOLLUX_CUSTOM_LESSKEY" ]]; then log d "Using custom lesskey: '$BOLLUX_CUSTOM_LESSKEY'" BOLLUX_LESSKEY="${BOLLUX_CUSTOM_LESSKEY}" return fi less_version="$(less --version | awk '{print $2;exit;}')" lesskey_ft="$(file -i "$BOLUX_LESSKEY")" lesskey_make=false if [[ "$lesskey_ft" =~ .*application.* && less_version -lt 582 ]] || [[ "$lesskey_ft" =~ .*text.* && less_version -ge 582 ]]; then mv "$BOLLUX_LESSKEY" "${BOLLUX_LESSKEY}.bak" log e "Moved incompatible lesskey '$BOLLUX_LESSKEY' to '${BOLLUX_LESSKEY}.bak'." lesskey_make=true fi if [[ ! -f "$BOLLUX_LESSKEY" ]]; then lesskey_make=true fi if "$lesskey_make"; then log d "Generating lesskey..." cat >/tmp/bollux_lesskey <<-EOF #command ${KEY_OPEN} quit 0 # 48 open a link ${KEY_GOTO} quit 1 # 49 goto a url ${KEY_BACK} quit 2 # 50 back ${KEY_FORWARD} quit 3 # 51 forward ${KEY_REFRESH} quit 4 # 52 re-request / download ${KEY_GOTO_FROM} quit 5 # 53 goto a url (pre-filled) ${KEY_CYCLE_PRE} quit 6 # 54 cycle T_PRE_DISPLAY and refresh # other keybinds \\40 forw-screen-force h left-scroll l right-scroll ? status # 'status' will show a little help thing. = noaction EOF if ((less_version >= 582)); then mv /tmp/bollux_lesskey "$BOLLUX_LESSKEY" else lesskey -o "$BOLLUX_LESSKEY" /tmp/bollux_lesskey fi else log d "Lesskey found: $BOLLUX_LESSKEY" fi } # typeset a text/gemini document typeset_gemini() { local pre=false local ln=0 # link number if ((T_WIDTH == 0)); then shopt -s checkwinsize ( : : ) # dumb formatting brought to you by shfmt log d "LINES=$LINES; COLUMNS=$COLUMNS" T_WIDTH=$COLUMNS fi WIDTH=$((T_WIDTH - T_MARGIN)) ((WIDTH < 0)) && WIDTH=80 # default if dumb S_MARGIN=$((T_MARGIN - 1)) # spacing log d "T_WIDTH=$T_WIDTH" log d "WIDTH=$WIDTH" log d "$T_PRE_DISPLAY" while IFS= read -r; do case "$REPLY" in '```'*) PRE_LINE_FORCE=false if $pre; then pre=false else pre=true fi case "${T_PRE_DISPLAY%%,*}" in pre) : ;; alt | both) $pre && PRE_LINE_FORCE=true \ gemini_pre "${REPLY#\`\`\`}" ;; esac continue ;; '=>'*) : $((ln += 1)) gemini_link "$REPLY" $pre "$ln" ;; '#'*) gemini_header "$REPLY" $pre ;; '*'[[:space:]]*) gemini_list "$REPLY" $pre ;; '>'*) gemini_quote "$REPLY" $pre ;; *) gemini_text "$REPLY" $pre ;; esac done } gemini_link() { local re="^(=>)[[:blank:]]*([^[:blank:]]+)[[:blank:]]*(.*)" local s t a # sigil, text, annotation(url) local ln="$3" if ! ${2-false} && [[ "$1" =~ $re ]]; then s="${BASH_REMATCH[1]}" a="${BASH_REMATCH[2]}" t="${BASH_REMATCH[3]}" if [[ -z "$t" ]]; then t="$a" a= fi printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" printf "\e[${C_LINK_NUMBER}m[%d]${C_RESET} " "$ln" fold_line -n -B "\e[${C_LINK_TITLE}m" -A "${C_RESET}" \ -l "$((${#ln} + 3))" -m "${T_MARGIN}" \ "$WIDTH" "$(trim_string "$t")" fold_line -B " \e[${C_LINK_URL}m" \ -A "${C_RESET}" \ -l "$((${#ln} + 3 + ${#t}))" \ -m "$((T_MARGIN + ${#ln} + 2))" \ "$WIDTH" "$a" else gemini_pre "$1" fi } gemini_header() { local re="^(#+)[[:blank:]]*(.*)" local s t a # sigil, text, annotation(lvl) if ! ${2-false} && [[ "$1" =~ $re ]]; then s="${BASH_REMATCH[1]}" a="${#BASH_REMATCH[1]}" t="${BASH_REMATCH[2]}" local hdrfmt hdrfmt="$(eval echo "\$C_HEADER$a")" printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" fold_line -B "\e[${hdrfmt}m" -A "${C_RESET}" -m "${T_MARGIN}" \ "$WIDTH" "$t" else gemini_pre "$1" fi } gemini_list() { local re="^(\*)[[:blank:]]*(.*)" local s t # sigil, text if ! ${2-false} && [[ "$1" =~ $re ]]; then s="${BASH_REMATCH[1]}" t="${BASH_REMATCH[2]}" printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" fold_line -B "\e[${C_LIST}m" -A "${C_RESET}" -m "$T_MARGIN" \ "$WIDTH" "$t" else gemini_pre "$1" fi } gemini_quote() { local re="^(>)[[:blank:]]*(.*)" local s t # sigil, text if ! ${2-false} && [[ "$1" =~ $re ]]; then s="${BASH_REMATCH[1]}" t="${BASH_REMATCH[2]}" printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s" fold_line -B "\e[${C_QUOTE}m" -A "${C_RESET}" -m "$T_MARGIN" \ "$WIDTH" "$t" else gemini_pre "$1" fi } gemini_text() { if ! ${2-false}; then printf "%${S_MARGIN}s " ' ' fold_line -m "$T_MARGIN" \ "$WIDTH" "$1" else gemini_pre "$1" fi } gemini_pre() { # Print preformatted text, dependent on $T_PRE_DISPLAY and # $PRE_LINE_FORCE if [[ alt != "${T_PRE_DISPLAY%%,*}" ]] || $PRE_LINE_FORCE; then printf "\e[${C_SIGIL}m%${S_MARGIN}s " '```' printf "\e[${C_PRE}m%s${C_RESET}\n" "$1" fi } # wrap lines on words to WIDTH fold_line() { # fold_line [OPTIONS...] WIDTH TEXT # see getopts, below, for options local newline=true local -i margin_all=0 margin_first=0 width ll=0 wl=0 wn=0 local before="" after="" OPTIND=0 while getopts nm:f:l:B:A: OPT; do case "$OPT" in n) # -n = no trailing newline newline=false ;; m) # -m MARGIN = margin for all lines margin_all="$OPTARG" ;; f) # -f MARGIN = margin for first line margin_first="$OPTARG" ;; l) # -l LENGTH = length of line before starting fold ll="$OPTARG" ;; B) # -B BEFORE = text to insert before each line before="$OPTARG" ;; A) # -A AFTER = text to insert after each line after="$OPTARG" ;; *) return 1 ;; esac done shift "$((OPTIND - 1))" width="$1" ll=$((ll % width)) #shellcheck disable=2086 set -- $2 local plain="" if ((margin_first > 0 && ll == 0)); then printf "%${margin_first}s" " " fi if [[ -n "$before" ]]; then printf '%b' "$before" fi for word; do ((wn += 1)) shopt -s extglob plain="${word//$'\x1b'\[*([0-9;])m/}" shopt -u extglob wl=$((${#plain} + 1)) if (((ll + wl) >= width)); then printf "${after:-}\n%${margin_all}s${before:-}" ' ' ll=$wl else ((ll += wl)) fi printf '%s' "$word" ((wn != $#)) && printf ' ' done [[ -n "$after" ]] && printf '%b' "$after" $newline && printf '\n' } # use the exit code from less (see mklesskey) to do things handle_keypress() { # handle_keypress CODE case "$1" in 48) # o - open a link -- show a menu of links on the page run select_url "$BOLLUX_PAGESRC" ;; 49) # g - goto a url -- input a new url prompt GO run blastoff -u "$REPLY" ;; 50) # [ - back in the history run history_back || { sleep 0.5 run blastoff "$BOLLUX_URL" } ;; 51) # ] - forward in the history run history_forward || { sleep 0.5 run blastoff "$BOLLUX_URL" } ;; 52) # r - re-request the current resource run blastoff "$BOLLUX_URL" ;; 53) # G - goto a url (pre-filled with current) run prompt -u GO run blastoff -u "$REPLY" ;; 54) # ` - change alt-text visibility and refresh run list_cycle T_PRE_DISPLAY , run blastoff "$BOLLUX_URL" ;; 55) # 55-57 -- still available for binding die "$?" "less(1) error" ;; esac } # select a URL from a text/gemini file select_url() { # select_url FILE run mapfile -t < <(extract_links <"$1") if ((${#MAPFILE[@]} == 0)); then log e "No links on this page!" sleep 0.5 run blastoff "$BOLLUX_URL" fi PS3="OPEN> " select u in "${MAPFILE[@]}"; do case "$REPLY" in q) bollux_quit ;; [^0-9]*) run blastoff -u "$REPLY" && break ;; esac run blastoff "${u%%[[:space:]]*}" && break done by the time we're ready to save a non-text/* resource, it's already # > corrupted beyond repair. One possibile solution, attached, is to request it # > again, presume the reply is 20 and save the data to the filesystem. What do # > you think? # # I think this is great, thanks!!! download() { # The binary file has been corrupted by normalize, which strips 0x0d # bytes. Something also drops NULL bytes. So, we'll discard this data cat >/dev/null # Now it's time to re-download the binary file temp_data="$(mktemp)" log x "Downloading: '$BOLLUX_URL' => '$temp_data'..." gemini_request "$BOLLUX_URL" | dd status=progress >"$temp_data" # Now $temp_data holds both the header and the data HEADER=$(head -1 "$temp_data") # To get the header length we use ${#HEADER} syntax, but this gives # a bad value because it doesn't count the last byte 0x0A. # We sum 2 because tail wants the first useful byte. let FIRST_BYTE=$((${#HEADER} + 2)) temp_name="$(mktemp)" if tail --bytes=+$FIRST_BYTE "$temp_data" >"$temp_name"; then rm "$temp_data" else log error "Error removing the header from '$temp_data'." fi final_name="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}" if [[ -f "$final_name" ]]; then log x "Saved '$temp_name'." elif mv "$temp_name" "$final_name"; then log x "Saved '$final_name'." else log error "Error saving '$final_name': downloaded to '$temp_name'." fi } # HISTORY ##################################################################### # # While bollux saves history to a file ($BOLLUX_HISTFILE), it doesn't /do/ # anything with the history that's been saved. When I do implement the history # functionality, it'll probably be on top of a file:// protocol, which will make # it very simple to also implement bookmarks and the previewing of pages. In # fact, I should be able to implement this change by the weekend (2021-03-07). # ############################################################################### # Append a URL to history. history_append() { # history_append URL TITLE local url="$1" local title="$2" # Print the URL and its title (if given) to $BOLLUX_HISTFILE. local fmt='' fmt+='%(%FT%T)T\t' # %(_)T calls directly to 'strftime'. if (($# == 2)); then fmt+='%s\t' # $url fmt+='%s\n' # $title else fmt+='%s%s\n' # printf needs a field for every argument. fi run printf -- "$fmt" -1 "$url" "$title" >>"$BOLLUX_HISTFILE" # Add the URL to the HISTORY array and increment the pointer. HISTORY[$HN]="$url" ((HN += 1)) # Update $BOLLUX_URL. BOLLUX_URL="$url" } # Move back in session history. history_back() { log d "HN=$HN" # We need to subtract 2 from HN because it automatically increases by # one with each call to `history_append'. If we subtract 1, we'll just # be at the end of the array again, reloading the page. ((HN -= 2)) if ((HN < 0)); then HN=0 log e "Beginning of history." return 1 fi run blastoff "${HISTORY[$HN]}" } # Move forward in session history. history_forward() { log d "HN=$HN" if ((HN >= ${#HISTORY[@]})); then HN="${#HISTORY[@]}" log e "End of history." return 1 fi run blastoff "${HISTORY[$HN]}" } # Load a URL. # # I was feeling fancy when I named this function -- a more descriptive name # would be 'bollux_goto' or something. blastoff() { # blastoff [-u] URL local u # `blastoff' assumes a "well-formed" URL by default -- i.e., a URL with # a protocol string and no extraneous whitespace. Since bollux can't # trust the user to input a proper URL at a prompt, nor capsule authors # to fully-form their URLs, so the -u flag is necessary for those # use-cases. Otherwise, bollux knows the URL is well-formed -- or # should be, due to the Gemini specification. if [[ "$1" == "-u" ]]; then u="$(run uwellform "$2")" else u="$1" fi # After ensuring the URL is well-formed, `blastoff' needs to transform # it according to the transform rules of RFC 3986 (see Section 5.2.2), which # turns relative references into absolute references that bollux can use # in its request to the server. That's followed by a check that the # protocol is set, defaulting to Gemini if it isn't. # # Implementation detail: because Bash is really stupid when it comes to # arrays, the URL functions u* (see below) work with an array defined # with `local -a' and passed by name, not by value. Thus, the # `utransform url ...' instead of `urltransform "${url[@]}"' or # similar. In addition, the `ucdef' and `ucset' functions take the name # of the array element as parameters, not the element itself. local -a url run utransform url "$BOLLUX_URL" "$u" if ! ucdef url[1]; then run ucset url[1] "$BOLLUX_PROTO" fi # To try and keep `bollux' as extensible as possible, I've written it # only to expect two functions for every protocol it supports: # `x_request' and `x_response', where `x' is the name of the protocol # (the first element of the built `url' array). `declare -F' looks only # for functions in the current scope, failing if it doesn't exist. # # In between `x_request' and `x_response', `blastoff' normalizes the # line endings to UNIX-style (LF) for ease of display. { if declare -F "${url[1]}_request" >/dev/null 2>&1; then run "${url[1]}_request" "$url" else die 99 "No request handler for '${url[1]}'" fi } | run normalize | { if declare -F "${url[1]}_response" >/dev/null 2>&1; then run "${url[1]}_response" "$url" else log d \ "No response handler for '${url[1]}';" \ " passing thru" passthru fi } } # $BASH_SOURCE is an array that stores the "stack" of source calls in bash. If # the first element of that array is "bollux", that means the user called this # script, instead of sourcing it. In that case, and ONLY in that case, should # bollux actually enter the main loop of the program. Otherwise, allow the # sourcing environment to simply source this script. # # This is basically the equivalent of python's 'if __name__ == "__main__":' # block. if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then ${DEBUG:-false} && set -x run bollux "$@" fi