From 38bac094bca2c3afd619d7f741bfd38e6ec4feba Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 8 Jun 2022 17:29:22 -0500 Subject: Fix download method Thanks White_Rabbit! --- bollux | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/bollux b/bollux index 4a8db07..08d2038 100755 --- a/bollux +++ b/bollux @@ -1512,18 +1512,44 @@ extract_links() { # `download' works by reading the end of the pipe from `display', which means # that sometimes, due to something with the way bash or while or ... something # ... chunks the data, sometimes binary data gets corrupted. This is an area -# that requires more research. +# that requires more research. UPDATE 2022-06-08: White_Rabbit has figured out +# solution! From their email: +# +# > by the time we're ready to save a non-text/* resource, it's already +# > corrupted beyond repair. One possibile solution, attached, is to request it +# > again, presume the reply is 20 and save the data to the filesystem. What do +# > you think? +# +# I think this is great, thanks!!! download() { - tn="$(mktemp)" - log x "Downloading: '$BOLLUX_URL' => '$tn'..." - dd status=progress >"$tn" - fn="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}" - if [[ -f "$fn" ]]; then - log x "Saved '$tn'." - elif mv "$tn" "$fn"; then - log x "Saved '$fn'." + # The binary file has been corrupted by normalize, which strips 0x0d + # bytes. Something also drops NULL bytes. So, we'll discard this data + cat >/dev/null + # Now it's time to re-download the binary file + temp_data="$(mktemp)" + log x "Downloading: '$BOLLUX_URL' => '$temp_data'..." + gemini_request "$BOLLUX_URL" | dd status=progress >"$temp_data" + + # Now $temp_data holds both the header and the data + HEADER=$(head -1 "$temp_data") + # To get the header length we use ${#HEADER} syntax, but this gives + # a bad value because it doesn't count the last byte 0x0A. + # We sum 2 because tail wants the first useful byte. + let FIRST_BYTE=$((${#HEADER} + 2)) + temp_name="$(mktemp)" + if tail --bytes=+$FIRST_BYTE "$temp_data" >"$temp_name"; then + rm "$temp_data" + else + log error "Error removing the header from '$temp_data'." + fi + + final_name="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}" + if [[ -f "$final_name" ]]; then + log x "Saved '$temp_name'." + elif mv "$temp_name" "$final_name"; then + log x "Saved '$final_name'." else - log error "Error saving '$fn': downloaded to '$tn'." + log error "Error saving '$final_name': downloaded to '$temp_name'." fi } -- cgit 1.4.1-21-gabe81