diff options
author | Case Duckworth | 2022-06-08 17:29:22 -0500 |
---|---|---|
committer | Case Duckworth | 2022-06-08 17:29:22 -0500 |
commit | 38bac094bca2c3afd619d7f741bfd38e6ec4feba (patch) | |
tree | 7786ec8360dba93b0b016c94253909448464cb45 | |
parent | Special-case file: protocol (empty string allowed) (diff) | |
download | bollux-38bac094bca2c3afd619d7f741bfd38e6ec4feba.tar.gz bollux-38bac094bca2c3afd619d7f741bfd38e6ec4feba.zip |
Fix download method
Thanks White_Rabbit!
-rwxr-xr-x | bollux | 46 |
1 files changed, 36 insertions, 10 deletions
diff --git a/bollux b/bollux index 4a8db07..08d2038 100755 --- a/bollux +++ b/bollux | |||
@@ -1512,18 +1512,44 @@ extract_links() { | |||
1512 | # `download' works by reading the end of the pipe from `display', which means | 1512 | # `download' works by reading the end of the pipe from `display', which means |
1513 | # that sometimes, due to something with the way bash or while or ... something | 1513 | # that sometimes, due to something with the way bash or while or ... something |
1514 | # ... chunks the data, sometimes binary data gets corrupted. This is an area | 1514 | # ... chunks the data, sometimes binary data gets corrupted. This is an area |
1515 | # that requires more research. | 1515 | # that requires more research. UPDATE 2022-06-08: White_Rabbit has figured out |
1516 | # solution! From their email: | ||
1517 | # | ||
1518 | # > by the time we're ready to save a non-text/* resource, it's already | ||
1519 | # > corrupted beyond repair. One possibile solution, attached, is to request it | ||
1520 | # > again, presume the reply is 20 and save the data to the filesystem. What do | ||
1521 | # > you think? | ||
1522 | # | ||
1523 | # I think this is great, thanks!!! | ||
1516 | download() { | 1524 | download() { |
1517 | tn="$(mktemp)" | 1525 | # The binary file has been corrupted by normalize, which strips 0x0d |
1518 | log x "Downloading: '$BOLLUX_URL' => '$tn'..." | 1526 | # bytes. Something also drops NULL bytes. So, we'll discard this data |
1519 | dd status=progress >"$tn" | 1527 | cat >/dev/null |
1520 | fn="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}" | 1528 | # Now it's time to re-download the binary file |
1521 | if [[ -f "$fn" ]]; then | 1529 | temp_data="$(mktemp)" |
1522 | log x "Saved '$tn'." | 1530 | log x "Downloading: '$BOLLUX_URL' => '$temp_data'..." |
1523 | elif mv "$tn" "$fn"; then | 1531 | gemini_request "$BOLLUX_URL" | dd status=progress >"$temp_data" |
1524 | log x "Saved '$fn'." | 1532 | |
1533 | # Now $temp_data holds both the header and the data | ||
1534 | HEADER=$(head -1 "$temp_data") | ||
1535 | # To get the header length we use ${#HEADER} syntax, but this gives | ||
1536 | # a bad value because it doesn't count the last byte 0x0A. | ||
1537 | # We sum 2 because tail wants the first useful byte. | ||
1538 | let FIRST_BYTE=$((${#HEADER} + 2)) | ||
1539 | temp_name="$(mktemp)" | ||
1540 | if tail --bytes=+$FIRST_BYTE "$temp_data" >"$temp_name"; then | ||
1541 | rm "$temp_data" | ||
1542 | else | ||
1543 | log error "Error removing the header from '$temp_data'." | ||
1544 | fi | ||
1545 | |||
1546 | final_name="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}" | ||
1547 | if [[ -f "$final_name" ]]; then | ||
1548 | log x "Saved '$temp_name'." | ||
1549 | elif mv "$temp_name" "$final_name"; then | ||
1550 | log x "Saved '$final_name'." | ||
1525 | else | 1551 | else |
1526 | log error "Error saving '$fn': downloaded to '$tn'." | 1552 | log error "Error saving '$final_name': downloaded to '$temp_name'." |
1527 | fi | 1553 | fi |
1528 | } | 1554 | } |
1529 | 1555 | ||