From 6aa64d77e0f8ef2e42bdb90bc5ed3f6557e3ed81 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Sun, 17 Jul 2022 23:10:22 -0500 Subject: Initial commit --- scripts/get-feed.sh | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 scripts/get-feed.sh (limited to 'scripts') diff --git a/scripts/get-feed.sh b/scripts/get-feed.sh new file mode 100755 index 0000000..f000fee --- /dev/null +++ b/scripts/get-feed.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +main() { + # get-feed.sh URL(str) => DIRECTIVE(feed_directive) + url="$1" + wp="$(mktemp /tmp/get-feed.XXXXXX)" + curl -sL "$url" >"$wp" + case "$url" in + *html) # We know it's a webpage + type=html + ;; + *xml) # We know it's a feed + type=xml + ;; + *) # Not sure + type="$(head -n1 "$wp")" + ;; + esac + case "$type" in + *xml*) # a feed + title="$(get_title_xml <"$wp")" + output_feed "$title" "$url" + ;; + *html*) # a webpage + cat "$wp" | sfeed_web | cut -f1 | + while read u; do + title="$(curl -sL "$u" | get_title_xml)" + output_feed "$title" "$u" + done + ;; + *) + echo >&2 "Don't know type \"$type\"." + exit 1 + ;; + esac +} + +output_feed() { + ## output_feed TITLE(str) URL(str) => FEED_DIRECTIVE(str) + printf "feed \"%s\" '%s'\n" "$1" "$2" +} + +get_title_xml() { + ## get_title_xml < FILE => TITLE(str) + awk ' +// { channel = 1; }//{ channel = 0; } +channel && $0 ~ // { title = 1; } +title { + if (match($0,/<\/title>/)) title = 0; + gsub(/<\/?title>/,""); + sub(/^[ \t]*/,""); + sub(/[ \t]*$/,""); + print; +} +channel && $0 ~ /<\/title>/ { title = 0; } +' +} + +main "$@" -- cgit 1.4.1-21-gabe81