about summary refs log tree commit diff stats
path: root/scripts/get-feed.sh
blob: f000fee9cbf8c86d0c97462d51a238f33f9d75fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/sh

main() {
	# get-feed.sh URL(str) => DIRECTIVE(feed_directive)
	url="$1"
	wp="$(mktemp /tmp/get-feed.XXXXXX)"
	curl -sL "$url" >"$wp"
	case "$url" in
	*html) # We know it's a webpage
		type=html
		;;
	*xml) # We know it's a feed
		type=xml
		;;
	*) # Not sure
		type="$(head -n1 "$wp")"
		;;
	esac
	case "$type" in
	*xml*) # a feed
		title="$(get_title_xml <"$wp")"
		output_feed "$title" "$url"
		;;
	*html*) # a webpage
		cat "$wp" | sfeed_web | cut -f1 |
			while read u; do
				title="$(curl -sL "$u" | get_title_xml)"
				output_feed "$title" "$u"
			done
		;;
	*)
		echo >&2 "Don't know type \"$type\"."
		exit 1
		;;
	esac
}

output_feed() {
	## output_feed TITLE(str) URL(str) => FEED_DIRECTIVE(str)
	printf "feed \"%s\" '%s'\n" "$1" "$2"
}

get_title_xml() {
	## get_title_xml < FILE => TITLE(str)
	awk '
/<channel>/ { channel = 1; }/<item>/{ channel = 0; }
channel && $0 ~ /<title>/ { title = 1; }
title {
      if (match($0,/<\/title>/)) title = 0;
      gsub(/<\/?title>/,"");
      sub(/^[ \t]*/,"");
      sub(/[ \t]*$/,"");
      print;
}
channel && $0 ~ /<\/title>/ { title = 0; }
'
}

main "$@"