#!/bin/sh main() { # get-feed.sh URL(str) => DIRECTIVE(feed_directive) url="$1" wp="$(mktemp /tmp/get-feed.XXXXXX)" curl -sL "$url" >"$wp" case "$url" in *html) # We know it's a webpage type=html ;; *xml) # We know it's a feed type=xml ;; *) # Not sure type="$(head -n1 "$wp")" ;; esac case "$type" in *xml*) # a feed title="$(get_title_xml <"$wp")" output_feed "$title" "$url" ;; *html*) # a webpage cat "$wp" | sfeed_web | cut -f1 | while read u; do title="$(curl -sL "$u" | get_title_xml)" output_feed "$title" "$u" done ;; *) echo >&2 "Don't know type \"$type\"." exit 1 ;; esac } output_feed() { ## output_feed TITLE(str) URL(str) => FEED_DIRECTIVE(str) printf "feed \"%s\" '%s'\n" "$1" "$2" } get_title_xml() { ## get_title_xml < FILE => TITLE(str) awk ' // { channel = 1; }//{ channel = 0; } channel && $0 ~ // { title = 1; } title { if (match($0,/<\/title>/)) title = 0; gsub(/<\/?title>/,""); sub(/^[ \t]*/,""); sub(/[ \t]*$/,""); print; } channel && $0 ~ /<\/title>/ { title = 0; } ' } main "$@"