diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/get-feed.sh | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/scripts/get-feed.sh b/scripts/get-feed.sh new file mode 100755 index 0000000..f000fee --- /dev/null +++ b/scripts/get-feed.sh | |||
@@ -0,0 +1,59 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | main() { | ||
4 | # get-feed.sh URL(str) => DIRECTIVE(feed_directive) | ||
5 | url="$1" | ||
6 | wp="$(mktemp /tmp/get-feed.XXXXXX)" | ||
7 | curl -sL "$url" >"$wp" | ||
8 | case "$url" in | ||
9 | *html) # We know it's a webpage | ||
10 | type=html | ||
11 | ;; | ||
12 | *xml) # We know it's a feed | ||
13 | type=xml | ||
14 | ;; | ||
15 | *) # Not sure | ||
16 | type="$(head -n1 "$wp")" | ||
17 | ;; | ||
18 | esac | ||
19 | case "$type" in | ||
20 | *xml*) # a feed | ||
21 | title="$(get_title_xml <"$wp")" | ||
22 | output_feed "$title" "$url" | ||
23 | ;; | ||
24 | *html*) # a webpage | ||
25 | cat "$wp" | sfeed_web | cut -f1 | | ||
26 | while read u; do | ||
27 | title="$(curl -sL "$u" | get_title_xml)" | ||
28 | output_feed "$title" "$u" | ||
29 | done | ||
30 | ;; | ||
31 | *) | ||
32 | echo >&2 "Don't know type \"$type\"." | ||
33 | exit 1 | ||
34 | ;; | ||
35 | esac | ||
36 | } | ||
37 | |||
38 | output_feed() { | ||
39 | ## output_feed TITLE(str) URL(str) => FEED_DIRECTIVE(str) | ||
40 | printf "feed \"%s\" '%s'\n" "$1" "$2" | ||
41 | } | ||
42 | |||
43 | get_title_xml() { | ||
44 | ## get_title_xml < FILE => TITLE(str) | ||
45 | awk ' | ||
46 | /<channel>/ { channel = 1; }/<item>/{ channel = 0; } | ||
47 | channel && $0 ~ /<title>/ { title = 1; } | ||
48 | title { | ||
49 | if (match($0,/<\/title>/)) title = 0; | ||
50 | gsub(/<\/?title>/,""); | ||
51 | sub(/^[ \t]*/,""); | ||
52 | sub(/[ \t]*$/,""); | ||
53 | print; | ||
54 | } | ||
55 | channel && $0 ~ /<\/title>/ { title = 0; } | ||
56 | ' | ||
57 | } | ||
58 | |||
59 | main "$@" | ||