about summary refs log tree commit diff stats
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/get-feed.sh59
1 files changed, 59 insertions, 0 deletions
diff --git a/scripts/get-feed.sh b/scripts/get-feed.sh new file mode 100755 index 0000000..f000fee --- /dev/null +++ b/scripts/get-feed.sh
@@ -0,0 +1,59 @@
1#!/bin/sh
2
3main() {
4 # get-feed.sh URL(str) => DIRECTIVE(feed_directive)
5 url="$1"
6 wp="$(mktemp /tmp/get-feed.XXXXXX)"
7 curl -sL "$url" >"$wp"
8 case "$url" in
9 *html) # We know it's a webpage
10 type=html
11 ;;
12 *xml) # We know it's a feed
13 type=xml
14 ;;
15 *) # Not sure
16 type="$(head -n1 "$wp")"
17 ;;
18 esac
19 case "$type" in
20 *xml*) # a feed
21 title="$(get_title_xml <"$wp")"
22 output_feed "$title" "$url"
23 ;;
24 *html*) # a webpage
25 cat "$wp" | sfeed_web | cut -f1 |
26 while read u; do
27 title="$(curl -sL "$u" | get_title_xml)"
28 output_feed "$title" "$u"
29 done
30 ;;
31 *)
32 echo >&2 "Don't know type \"$type\"."
33 exit 1
34 ;;
35 esac
36}
37
38output_feed() {
39 ## output_feed TITLE(str) URL(str) => FEED_DIRECTIVE(str)
40 printf "feed \"%s\" '%s'\n" "$1" "$2"
41}
42
43get_title_xml() {
44 ## get_title_xml < FILE => TITLE(str)
45 awk '
46/<channel>/ { channel = 1; }/<item>/{ channel = 0; }
47channel && $0 ~ /<title>/ { title = 1; }
48title {
49 if (match($0,/<\/title>/)) title = 0;
50 gsub(/<\/?title>/,"");
51 sub(/^[ \t]*/,"");
52 sub(/[ \t]*$/,"");
53 print;
54}
55channel && $0 ~ /<\/title>/ { title = 0; }
56'
57}
58
59main "$@"