about summary refs log tree commit diff stats
path: root/sfeedrc
blob: 9b15324dc80dd50c63d5d3c6a406a83eb1218ae0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# -*- sh -*-

# SFEED="$HOME/.sfeed"
USER_AGENT='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'
# sfeedpath="$SFEED/feeds"
maxjobs="$(nproc)"

### Feeds #######################################################

feeds() {
	## feed NAME FEED_URL BASE_URL ENCODING # defined by sfeedrc(5).
	# I want to add some extra fields after these.
	feeds_planets
	feeds_youtube
	feeds_podcasts
	feeds_friends
	feeds_people
	feeds_news
	feeds_smolweb
	feeds_comics
	feeds_misc
	feeds_me
	feeds_from_agafnd
	feeds_from_elioat
}

feeds_planets() {
	feed "Planet Emacs" "https://planet.emacslife.com/atom.xml" "https://planet.emacslife.com/" ""
	feed "r/emacs" "https://i.reddit.com/r/emacs.rss" "https://i.reddit.com/r/emacs" ""
	feed "Planet Lisp" "https://planet.lisp.org/rss20.xml" "https://planet.lisp.org/" ""
	feed "Planet Scheme" "https://planet.scheme.org/atom.xml" "https://planet.scheme.org/" ""
}

yt_feed() { # yt_feed TITLE CHANNEL_ID
	feed "$1 [yt]" "https://www.youtube.com/feeds/videos.xml?channel_id=$2" "https://youtube.com/channel/$2"
}

feeds_youtube() {
	yt_feed "3Blue1Brown" "UCYO_jab_esuFRV4b17AJtAw"
	yt_feed "AB - Ancienne Belgique" "UCUVAw2kdxJlcfCdEcdgXv5A"
	yt_feed "Adam Ragusea" "UC9_p50tH3WmMslWRWKnM7dQ"
	yt_feed "Babish Culinary Universe" "UCJHA_jMfCvEnv-3kRjTCQXw"
	yt_feed "Baggers" "UCMV8p6Lb-bd6UZtTc_QD4zA"
	yt_feed "Case Duckworth" "UC92gRJdnUYklVu4pvj9n0Lw"
	yt_feed "Claire Saffitz x Dessert Person" "UCvw6Y1kr_8bp6B5m1dqNyiw"
	yt_feed "Computerphile" "UC9-y-6csu5WGm29I7JiwpnA"
	yt_feed "EBRPL Career Center" "UCIvntuaxP7PyaJDeHE_9E8Q"
	yt_feed "EmacsConf and Emacs hangouts" "UCwuyodzTl_KdEKNuJmeo99A"
	yt_feed "Gavin Freeborn" "UCJetJ7nDNLlEzDLXv7KIo0w"
	yt_feed "Henry Homesweet" "UCZqjwc1Wy5t1rsviYYsJiYg"
	yt_feed "Howard Abrams" "UCVHICXXtKG7rZgtC5xonNdQ"
	yt_feed "Ignite Talks" "UCZotK8ZPTUNLMeW5Q6T0cKg"
	yt_feed "Jake B" "UCBMMB7Yi0eyFuY95Qn2o0Yg"
	yt_feed "James Tomasino" "UCbTp1BYjpuhDRG5OmgIT8iw"
	yt_feed "jan Misali" "UCJOh5FKisc0hUlEeWFBlD-w"
	yt_feed "Joshua Weissman" "UChBEbMKI1eCcejTtmI32UEw"
	yt_feed "J Duckworth Animations" "UCtAEaNVrNxAUy2VSRPD_PYQ"
	yt_feed "John Kitchin" "UCQp2VLAOlvq142YN3JO3y8w"
	yt_feed "karthik" "UCbh_g91w0T6OYp40xFrtnhA"
	yt_feed "Ken Forkish" "UCvVvFZd0e86bLbd5FdgYiUg"
	yt_feed "Lex Fridman" "UCSHZKyawb77ixDdsGog4iWA"
	yt_feed "LockPickingLawyer" "UCm9K6rby98W8JigLoZOh6FQ"
	yt_feed "Maangchi" "UC8gFadPgK2r1ndqLI04Xvvw"
	yt_feed "Mike Zamansky" "UCxkMDXQ5qzYOgXPRnOBrp1w"
	yt_feed "MIT OpenCourseWare" "UCEBb1b_L6zDS3xTUrIALZOw"
	yt_feed "My Analog Journal" "UC8TZwtZ17WKFJSmwTZQpBTA"
	yt_feed "Nat's What I Reckon" "UCEFW1E8QzP-hKxjO2Rj68wg"
	yt_feed "Now You See It" "UCWTFGPpNQ0Ms6afXhaWDiRw"
	yt_feed "Numberphile" "UCoxcjq-8xIDTYp3uz647V5A"
	yt_feed "Philosophy Tube" "UC2PA-AKmVpU6NKCGtZq_rKQ"
	yt_feed "PronunciationManual" "UCqDSLtXeZsGc3dtVb5MW13g"
	yt_feed "Protesilaos Stavrou" "UC0uTPqBCFIpZxlz_Lv1tk_g"
	yt_feed "RailCowGirl" "UCj-Xm8j6WBgKY8OG7s9r2vQ"
	yt_feed "Simone Giertz" "UC3KEoMzNz8eYnwBC34RaKCQ"
	yt_feed "Steve Yegge" "UC2RCcnTltR3HMQOYVqwmweA"
	yt_feed "System Crafters" "UCAiiOTio8Yu69c3XnR7nQBQ"
	yt_feed "Tasting History with Max Miller" "UCsaGKqPZnGp_7N80hcHySGQ"
	yt_feed "Technology Connections" "UCy0tKL1T7wFoYcxCe0xjN6Q"
	yt_feed "Too Many Zooz" "UCtjXVqMVzBIgU0SO8AV0vPg"
	yt_feed "Townsends" "UCxr2d4As312LulcajAkKJYw"
	yt_feed "Unitarian Church of Baton Rouge" "UClrqHvbiFM-1hn931ZmAPFw"
	yt_feed "Vulf" "UCtWuB1D_E3mcyYThA9iKggQ"
	yt_feed "WFTDA: Women's Flat Track Derby Association" "UC7eMWpvytqd3gYAqxTl9w7g"
	yt_feed "Zach Anner" "UCPTVYxUoYWhNa8J7GzIGnyQ"
	yt_feed "Andrew Tropin" "UCuj_loxODrOPxSsXDfJmpng"
	yt_feed "Animist" "UCweDxCT5Fiykk3uHqPKqLWg"
	yt_feed "Chris Were Digital" "UCAPR27YUyxmgwm3Wc2WSHLw"
	yt_feed "Esoterica" "UCoydhtfFSk1fZXNRnkGnneQ"
	yt_feed "FOSDEM" "UC9NuJImUbaSNKiwF2bdSfAw"
}

feeds_from_agafnd() {
	# https://tilde.town/~agafnd/good-rss.html
	yt_feed "bill wurtz" "UCq6aw03lNILzV96UvEAASfQ"
	yt_feed "Atomic Shrimp" "UCSl5Uxu2LyaoAoMMGp6oTJA"
	yt_feed "brian david gilbert" "UCakAg8hC_RFJm4RI3DlD7SA"
	yt_feed "Jonni Phillips" "UC1NbRaGNot6kNEL3Jsa7SRA"
	# podcasts
	feed "Lingthusiasm" "https://feeds.soundcloud.com/users/soundcloud:users:237055046/sounds.rss" "https://lingthusiasm.com/"
	# other stuff
	feed "Buttersafe" "https://feeds.feedburner.com/buttersafe" "https://www.buttersafe.com/"
	feed "Going Medieval" "https://going-medieval.com/feed/"
	feed "Scooby Doo Mistakes" "https://scoobydoomistakes.tumblr.com/rss" "https://scoobydoomistakes.tumblr.com/"
}

feeds_from_elioat() {
	# feed <name> <feedurl> [basesiteurl] [encoding]
	feed "antoine.studio" "https://antoine.studio/rss.xml"
	feed "apenwarr" "https://apenwarr.ca/log/rss.php"
	feed "bert.org" "https://bert.org/feed.xml"
	feed "Ben Hoyt's technical writing" "https://benhoyt.com/writings/rss.xml"
	feed "Buried Treasure" "https://buried-treasure.org/feed/"

	feed "catapult" "https://catapult.co/rss"
	feed "codex99" "http://codex99.com"
	feed "compudanzas log" "https://compudanzas.net/atom.xml"
	feed "DustyCloud Brainstorms" "http://dustycloud.org/blog/index.xml"
	feed "Earthly" "https://earthly.dev/blog/feed.xml"
	feed "Ella Hoeppner" "https://www.ellahoeppner.com/blog.rss"
	feed "Esoteric Codes" "https://esoteric.codes/rss"
	feed "Grimgrains" "https://grimgrains.com/links/rss.xml"
	feed "hisaac.net" "https://hisaac.net/feed.xml"
	feed "Hundred Rabbits" "http://100r.co/links/rss.xml"
	feed "Hypercritical" "https://hypercritical.co/feeds/main"
	feed "joshua stein" "https://jcs.org/rss"
	feed "journal.stuffwithstuff.com" "http://journal.stuffwithstuff.com/rss.xml"
	feed "Julia Evans" "https://jvns.ca/atom.xml"
	feed "Kicks Condor [RSS]" "https://www.kickscondor.com/rss.xml"
	feed "kottke.org" "http://feeds.kottke.org/main"
	feed "literary monster" "https://literary.monster/index.xml"
	feed "Linus Lee" "https://thesephist.com/index.xml"
	feed "Meg Conley" "https://www.megconley.com/rss/"
	feed "minimalist baker" "https://minimalistbaker.com/feed/"
	feed "llimllib notes" "https://notes.billmill.org/atom.xml"
	feed "petermolnar.net" "https://petermolnar.net/feed/"
	feed "Piper Haywood" "https://piperhaywood.com/feed/"
	feed "Rainbow Plant Life" "https://rainbowplantlife.com/feed/"
	feed "Robin Rendle" "https://robinrendle.com/feed.xml"
	feed "sogrady" "https://sogrady.org/feed/"
	feed "Sibylla Bostoniensis" "https://siderea.dreamwidth.org/data/rss"
	feed "The only good system is a sound system" "https://ftrv.se/posts.atom"
	feed "uxn" "https://git.sr.ht/~rabbits/uxn/log/main/rss.xml"
	feed "uxnfth" "https://git.sr.ht/~binarycat/uxnfth/log/front/rss.xml"
	feed "weaver.skepti.ch" "https://weaver.skepti.ch/atom.xml"
	feed "whippet" "https://thewhippet.org/archive/rss/"
	feed "worgle" "https://git.sr.ht/~pbatch/worgle/log/master/rss.xml"
	feed "r/Worldnews" "https://www.reddit.com/r/worldnews/.rss"
	feed "zep [Lexaloffle Blog Feed]" "https://www.lexaloffle.com/bbs/feed.php?uid=1"
}

feeds_podcasts() {
	feed "Tilde Whirl Tildeverse Podcast" "https://tilde.town/~dozens/podcast/rss.xml" "https://tilde.town/~dozens/podcast/" ""
	feed "trash cat tech cat" "https://podcast.librepunk.club/tctc/ogg.xml" "https://podcast.librepunk.club/tctc/" ""
	feed "Hacker Public Radio" "https://hackerpublicradio.org/hpr_ogg_rss.php" "https://hackerpublicradio.org/" ""
	feed "Levar Burton Reads" "https://feeds.simplecast.com/LDNgBXht"
}

feeds_friends() {
	feed "lipu pi jan Niko" "https://tilde.town/~nihilazo/index.xml" "https://tilde.town/~nihilazo/" ""
	feed "chrismanbrown.gitlab.io (dozens)" "https://chrismanbrown.gitlab.io/rss.xml" "https://chrismanbrown.gitlab.io/" ""
	feed "Dozens and Dragons" "https://dozensanddragons.neocities.org/rss.xml" "https://dozensanddragons.neocities.org/" ""
	feed "dozens: Society For Putting Things On Top Of Other Things" "https://society.neocities.org/rss.xml" "https://society.neocities.org/" ""
	feed "dozens: vgnfdblg" "https://supervegan.neocities.org/feed.xml" "https://supervegan.neocities.org/" ""
	feed "dozens: backgammon" "http://tilde.town/~dozens/backgammon/rss.xml" "http://tilde.town/~dozens/backgammon/" ""
	feed "dozens: It's Pro Toad and Superb Owl" "https://git.tilde.town/dozens/protoadandsuperbowl/raw/branch/master/feed.xml" "https://git.tilde.town/dozens/protoadandsuperbowl/" ""
	feed "dozens dreams" "https://tilde.team/~dozens/dreams/rss.xml" "https://tilde.team/~dozens/dreams/" ""
	feed "dozens: write.as" "https://write.tildeverse.org/dozens/feed/" "https://write.tildeverse.org/dozens/" ""
	feed "dozens css art" "http://tilde.town/~dozens/cssart/feed.xml" "http://tilde.town/~dozens/cssart/" ""
	feed "Benjamin Wil" "https://benjaminwil.info/feed.xml" "https://benjaminwil.info/" ""
	feed "(lambda (x) (create x))" "http://lambdacreate.com/static/feed.rss" "http://lambdacreate.com/" ""
	feed "m455.casa" "https://m455.casa/feed.rss" "https://m455.casa/" ""
	feed "Oatmeal" "https://eli.li/feed.rss" "https://eli.li/" ""
	feed "RSRSSS" "https://envs.net/~lucidiot/rsrsss/feed.xml" "https://envs.net/~lucidiot/rsrsss/" ""
	feed "Tomasino Blog" "https://blog.tomasino.org/index.xml" "https://blog.tomasino.org/" ""
	feed "Tomasino Labs" "https://labs.tomasino.org/index.xml" "https://labs.tomasino.org/" ""
	feed "Will's Blog" "https://wflewis.com/feed.xml" "https://wflewis.com/" ""
	feed "Rick Carlino's Blog" "https://rickcarlino.com/rss/feed.rss" "https://rickcarlino.com/rss/" ""
	feed "Causal Agency" "https://text.causal.agency/feed.atom" "https://text.causal.agency/" ""
	feed "Benoit Joly" "https://blog.benoitj.ca/posts/index.xml" "https://blog.benoitj.ca/posts/" ""
	feed "linkbudz" "https://linkbudz.m455.casa/feed.rss" "https://linkbudz.m455.casa/" ""
	feed "Alex Schroeder" "https://alexschroeder.ch/wiki/feed/full/" "https://alexschroeder.ch/" ""
	feed "Björn Wärmedal" "https://warmedal.se/~bjorn/atom.xml" "https://warmedal.se/~bjorn/" ""
	feed "a rickety bridge of impossible crossing" "https://bluelander.bearblog.dev/feed/" "https://bluelander.bearblog.dev/" ""
	feed "javapool updates" "https://tilde.town/~m455/javapool.rss" "" ""
	feed "coolguy.website" "https://coolguy.website/rss/index.xml" "https://coolguy.website/"
	feed "brainshit" "https://brainshit.fr/rss" "https://brainshit.fr"
	feed 'Idiomdrottning' 'https://idiomdrottning.org/blog' "https://idiomdrottning.org"
	feed "Rick Carlino's Blog" "https://rickcarlino.com/rss/feed.rss"
	feed "Rick Carlino's Recent News Feed" "https://news.rickcarlino.com/rss.rss"
}

feeds_people() {
	feed "ajroach" "http://ajroach42.com/feed.xml" "http://ajroach42.com/" ""
	feed "rachelbythebay" "https://rachelbythebay.com/w/atom.xml" "https://rachelbythebay.com/" ""
	feed "Codemadness" "https://codemadness.org/atom.xml"
	feed "Ben Hoyt's technical writing" "https://benhoyt.com/writings/rss.xml" "https://benhoyt.com/writings/"
}

feeds_news() {
	feed "Hacker News" "https://news.ycombinator.com/rss" "https://news.ycombinator.com/" ""
	feed "lobste.rs" "https://lobste.rs/rss" "https://lobste.rs/" ""
	feed "tilde news" "https://tilde.news/rss" "https://tilde.news/" ""
	feed "Tildes Atom feed" "https://tildes.net/topics.atom" "https://tildes.net/" ""
	feed "NPR" "https://feeds.npr.org/1001/rss.xml" "https://text.npr.org/" ""
}

feeds_smolweb() {
	feed "~town friday postcard" "https://tilde.town/~lucidiot/fridaypostcard.xml" ""
	feed "Cosmic Voyage" "https://cosmic.voyage/rss.xml" "https://cosmic.voyage/" ""
	feed "plan.cat" "https://plan.cat/rss" "https://plan.cat/" ""
}

feeds_comics() {
	feed "Cat and Girl" "https://catandgirl.com/feed/" "https://catandgirl.com/" ""
	feed "Dinosaur Comics!" "https://qwantz.com/rssfeed.php" "https://qwantz.com/" ""
	feed "False Knees" "https://falseknees.tumblr.com/rss" "https://falseknees.tumblr.com/" ""
	feed "Saturday Morning Breakfast Cereal" "https://www.smbc-comics.com/comic/rss" "https://www.smbc-comics.com/" ""
	feed "xkcd" "https://xkcd.com/atom.xml" "https://xkcd.com/" ""
}

feeds_misc() {
	feed "Crystalverse" "https://crystalverse.com/feed/" "https://crystalverse.com/" ""
	feed "Hetzner" "https://status.hetzner.com/en.atom" "https://status.hetzner.com/" ""
	feed "LOW-TECH MAGAZINE" "https://feeds2.feedburner.com/typepad/krisdedecker/lowtechmagazineenglish" "https://www.lowtechmagazine.com/" ""
	feed "500-mile email" "https://500mile.email/feed.xml" "https://500mile.email/"
	feed "computers are bad" "https://computer.rip/rss.xml" "https://computer.rip/"
	feed "Eggs Unlimited (5)" "https://eggs.call-cc.org/rss-5.xml"
}

feeds_me() {
	:
}

### Remove feeds ################################################

unsubscribe_feeds() {
	yt_feed "Jelle's Marble Runs" "UCYJdpnjuSWVOLgGT9fIzL0g"
	yt_feed "LOOK MUM NO COMPUTER" "UCafxR2HWJRmMfSdyZXvZMTw"
	yt_feed "Ska Tune Network" "UCji2l5wcs6GoYJY1GgG_slQ"
	yt_feed "Thought Slime" "UCrr7y8rEXb7_RiVniwvzk9w"
	yt_feed "STRANGE ÆONS" "UCrq3JYirgV-BLluzTF6X_7A"
	yt_feed "NileRed" "UCFhXFikryT4aFcLkLw2LBLA"
	yt_feed "Cathode Ray Dude" "UCXnNibvR_YIdyPs8PZIBoEw"
	feed "Kill James Bond!" "https://feed.podbean.com/killjamesbondpod/feed.xml"
	feed "Jo's Boys" "https://anchor.fm/s/840fb444/podcast/rss"
	feed "Boonta Vista" "http://feeds.soundcloud.com/users/soundcloud:users:307723090/sounds.rss"
	feed "Yesterday's Print" "https://yesterdaysprint.tumblr.com/rss"
	feed "Threatening Music Notation" "https://nitter.kavin.rocks/ThreatNotation/rss"
	feed "arstechnica features" "https://feeds.arstechnica.com/arstechnica/features"
	feed "ars ludi" "https://arsludi.lamemage.com/index.php/feed/"
	feed "Carlos Carrasco" "https://carloscarrasco.com/index.xml"
	feed "dracula daily" "https://draculadaily.substack.com/feed"
	feed "DRIVE™: The SciFi Comic, by Dave Kellett" "http://cdn.drivecomic.com/rss.xml"
	feed "EcoClipper" "https://ecoclipper.org/feed/"
	feed "esolangs wiki" "https://esolangs.org/w/api.php?hidebots=1&urlversion=1&days=7&limit=50&action=feedrecentchanges&feedformat=atom"
	feed "Flashing Palely in the Margins" "https://www.inthemargins.ca/feed.rss"
	feed "Gamestudies.org" "http://gamestudies.org/rss.php"
	feed "gbadev.org" "https://rss.gbadev.org/feed.php"
	feed "handmade.network" "https://handmade.network/atom"
	feed "Indigenous Environmental Network" "https://www.ienearth.org/feed/"
	feed "milofultz" "https://milofultz.com/atom.xml"
	feed "N O D E" "https://n-o-d-e.net/rss/rss.xml"
	feed "bill mill" "https://billmill.org/Atom"
	feed "nullrouted.space" "https://nullrouted.space/feed/"
	feed "orllewin" "https://orllewin.uk/category/blog/feed/"
	feed "Pinboard (unread bookmarks for eli-oat)" "https://feeds.pinboard.in/rss/u:eli-oat/"
	feed "Rock Paper Shotgun" "https://www.rockpapershotgun.com/feed"
}

### Filter ######################################################

filter() {
	case "$1" in
	# Filter items based on feed name.
	*'Hacker News'*)
		# Hacker news is stupid and doesn't include IDs, and
		# instead includes a comments tag in its body.
		gawk 'BEGIN {FS="\t"; OFS="\t"} {
$6 = $4
sub(/^<a href="/,"", $6)
sub(/">.*$/,"", $6)
print
}'
		;;
	*'Claire Saffitz'*) sed 's@ | Dessert Person@@' ;;
	*Computerphile*) sed 's@ - Computerphile@@' ;;
	*NPR*) sed 's@www\.npr\.org@text.npr.org@' ;;
	r_* | r/*)
		# Reddit doesn't include a link as its item IDs, but instead
		# something of the form `t3_xxxxx', where xxxxx can be glommed
		# into a URL to get to the comments.
		awk -v SUBREDDIT="$1" -v ALT=i.reddit.com \
			'BEGIN { FS="\t"; OFS="\t"; }
			{ $6 = $3 ALT "/" SUBREDDIT "/" substr($6, 4); }
			{ if (match($4,/<a href="([^"]*)">\[link\]<\/a>/,ln)) $3 = ln[1]; }
			{ gsub(/www\.reddit\.com/, ALT, $0); }
			{ print; }'
		;;
	'Saturday Morning Breakfast Cereal')
		sed 's@Saturday Morning Breakfast Cereal - @@'
		;;
	*) cat ;;
	esac |
		filter_add_empties |
		filter_html_entities |
		filter_filter_links
}

filter_add_empties() {
	awk 'BEGIN{FS="\t";OFS=FS;}
{ $2 = $2 ? $2 : "[empty]" }
{ print $1,$2,$3,$4,$5,$6,$7,$8,$9; }
'
}

filter_embed_youtube() {
	# replace youtube links with embed links
	sed 's@www\.youtube\.com/watch?v=@www.youtube.com/embed/@g'
}

filter_filter_links() {
	# shorten feedburner links and strip tracking parameters and pixels
	awk 'BEGIN { FS= "\t"; OFS = "\t"; }
function filterlink(s) {
# protocol must start with http, https or gopher.
if (match(s, /^(http|https|gopher):\/\//) == 0) {
return "";
}
# shorten feedburner links.
if (match(s, /^(http|https):\/\/[^\/]+\/~r\/.*\/~3\/[^\/]+\//)) {
s = substr($3, RSTART, RLENGTH);
}
# strip tracking parameters
# urchin, facebook, piwik, webtrekk and generic.
gsub(/\?(ad|campaign|fbclid|pk|tm|utm|wt)_([^&]+)/, "?", s);
gsub(/&(ad|campaign|fbclid|pk|tm|utm|wt)_([^&]+)/, "", s);
gsub(/\?&/, "?", s);
gsub(/[\?&]+$/, "", s);
return s
}
{
$3 = filterlink($3); # link
$8 = filterlink($8); # enclosure
# try to remove tracking pixels: <img/> tags with 1px width or height.
gsub("<img[^>]*(width|height)[[:space:]]*=[[:space:]]*[\"'"'"' ]?1[\"'"'"' ]?[^0-9>]+[^>]*>", "", $4);
print $0;
}'
}

filter_html_entities() {
	# convert HTML entities into dumb counterparts
	# (and weird shit to normal shit, too)
	awk '{
gsub(/&#34;/,"\""); gsub(/&#x22;/,"\"");
gsub(/&#39;/,"'\''"); gsub(/&#x27;/,"'\''");
gsub(/’/,"'\''");
gsub(/&amp;/,"\\&"); # MUST BE LAST!;
print
}'
}

# Fetch #########################################################

fetch() { # fetch(name, url, feedfile)
	"${NOFETCH:-false}" && return
	curl -s -L \
		--max-redirs 3 \
		--header "'User-Agent: $USER_AGENT'" \
		--fail \
		--max-time 15 \
		"$2"
}

# Merge ########################################################

merge() {
	# merge(name, oldfile, newfile)
	case "$1" in
	*'Tildes'* | *r_*)
		# Tildes changes its IDs and titles fairly regularly, which is really
		# annoying and adds false doubles.  So here, remove the last
		# element of the path name (the title), and sort ONLY on
		# TIMESTAMP.
		#
		# Reddit also sometimes does weird things, so let's just use
		# timestamps for that too.
		cat "$2" "$3" |
			sort -t '	' -k1,1 -u
		;;
	*)
		# Otherwise, sort on ID and URL.  This minimizes the risk of
		# false doubles.
		sort -t '	' -u -k6,6 -k3,3 "$2" "$3" 2>/dev/null
		;;
	esac
}

# Local Variables:
# truncate-lines: t
# End: