blob: f000fee9cbf8c86d0c97462d51a238f33f9d75fb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
#!/bin/sh
main() {
# get-feed.sh URL(str) => DIRECTIVE(feed_directive)
url="$1"
wp="$(mktemp /tmp/get-feed.XXXXXX)"
curl -sL "$url" >"$wp"
case "$url" in
*html) # We know it's a webpage
type=html
;;
*xml) # We know it's a feed
type=xml
;;
*) # Not sure
type="$(head -n1 "$wp")"
;;
esac
case "$type" in
*xml*) # a feed
title="$(get_title_xml <"$wp")"
output_feed "$title" "$url"
;;
*html*) # a webpage
cat "$wp" | sfeed_web | cut -f1 |
while read u; do
title="$(curl -sL "$u" | get_title_xml)"
output_feed "$title" "$u"
done
;;
*)
echo >&2 "Don't know type \"$type\"."
exit 1
;;
esac
}
output_feed() {
## output_feed TITLE(str) URL(str) => FEED_DIRECTIVE(str)
printf "feed \"%s\" '%s'\n" "$1" "$2"
}
get_title_xml() {
## get_title_xml < FILE => TITLE(str)
awk '
/<channel>/ { channel = 1; }/<item>/{ channel = 0; }
channel && $0 ~ /<title>/ { title = 1; }
title {
if (match($0,/<\/title>/)) title = 0;
gsub(/<\/?title>/,"");
sub(/^[ \t]*/,"");
sub(/[ \t]*$/,"");
print;
}
channel && $0 ~ /<\/title>/ { title = 0; }
'
}
main "$@"
|