# -*- sh -*- # SFEED="$HOME/.sfeed" USER_AGENT='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0' # sfeedpath="$SFEED/feeds" maxjobs="$(nproc)" ### Feeds ####################################################### feeds() { ## feed NAME FEED_URL BASE_URL ENCODING # defined by sfeedrc(5). # I want to add some extra fields after these. feeds_planets feeds_youtube feeds_podcasts feeds_friends feeds_people feeds_news feeds_smolweb feeds_comics feeds_misc feeds_me feeds_from_agafnd feeds_from_elioat } feeds_planets() { feedx "Planet Emacs" "https://planet.emacslife.com/atom.xml" "https://planet.emacslife.com/" "" feedx "r/emacs" "https://i.reddit.com/r/emacs.rss" "https://i.reddit.com/r/emacs" "" feedx "Planet Lisp" "https://planet.lisp.org/rss20.xml" "https://planet.lisp.org/" "" feedx "Planet Scheme" "https://planet.scheme.org/atom.xml" "https://planet.scheme.org/" "" } feeds_podcasts() { feedx "Tilde Whirl Tildeverse Podcast" "https://tilde.town/~dozens/podcast/rss.xml" "https://tilde.town/~dozens/podcast/" "" feedx "trash cat tech cat" "https://podcast.librepunk.club/tctc/ogg.xml" "https://podcast.librepunk.club/tctc/" "" feedx "Hacker Public Radio" "https://hackerpublicradio.org/hpr_ogg_rss.php" "https://hackerpublicradio.org/" "" feedx "Levar Burton Reads" "https://feeds.simplecast.com/LDNgBXht" feedx "A Problem Squared" "https://feeds.libsyn.com/230510/rss" feedx "Lowtech Radio Gazeete" "https://lambdacreate.com/static/ltrg/feed.xml" "https://lambdacreate.com/podcast/ltrg" yt_feed "iilluminaughtii" "UCpDmn2FfVYdPIDwRTcf5-OA" feedx "Maintenance Phase" "https://feeds.buzzsprout.com/1411126.rss" "https://www.maintenancephase.com/" feedx "Switched on Pop" "https://feeds.megaphone.fm/switchedonpop" "https://switchedonpop.com/" feedx "You Are Good" "https://feeds.blubrry.com/feeds/whyaredads.xml" "" } feeds_youtube() { yt_feed "NPR Music" "UC4eYXhJI4-7wSWc8UNRwD4A" yt_feed "3Blue1Brown" "UCYO_jab_esuFRV4b17AJtAw" yt_feed "AB - Ancienne Belgique" "UCUVAw2kdxJlcfCdEcdgXv5A" yt_feed "Adam Ragusea" "UC9_p50tH3WmMslWRWKnM7dQ" yt_feed "Babish Culinary Universe" "UCJHA_jMfCvEnv-3kRjTCQXw" yt_feed "Baggers" "UCMV8p6Lb-bd6UZtTc_QD4zA" yt_feed "Case Duckworth" "UC92gRJdnUYklVu4pvj9n0Lw" yt_feed "Claire Saffitz x Dessert Person" "UCvw6Y1kr_8bp6B5m1dqNyiw" yt_feed "Computerphile" "UC9-y-6csu5WGm29I7JiwpnA" yt_feed "EBRPL Career Center" "UCIvntuaxP7PyaJDeHE_9E8Q" yt_feed "EmacsConf and Emacs hangouts" "UCwuyodzTl_KdEKNuJmeo99A" yt_feed "Gavin Freeborn" "UCJetJ7nDNLlEzDLXv7KIo0w" yt_feed "Henry Homesweet" "UCZqjwc1Wy5t1rsviYYsJiYg" yt_feed "Howard Abrams" "UCVHICXXtKG7rZgtC5xonNdQ" yt_feed "Ignite Talks" "UCZotK8ZPTUNLMeW5Q6T0cKg" yt_feed "Jake B" "UCBMMB7Yi0eyFuY95Qn2o0Yg" yt_feed "James Tomasino" "UCbTp1BYjpuhDRG5OmgIT8iw" yt_feed "jan Misali" "UCJOh5FKisc0hUlEeWFBlD-w" yt_feed "Joshua Weissman" "UChBEbMKI1eCcejTtmI32UEw" yt_feed "J Duckworth Animations" "UCtAEaNVrNxAUy2VSRPD_PYQ" yt_feed "John Kitchin" "UCQp2VLAOlvq142YN3JO3y8w" yt_feed "karthik" "UCbh_g91w0T6OYp40xFrtnhA" yt_feed "Ken Forkish" "UCvVvFZd0e86bLbd5FdgYiUg" yt_feed "Lex Fridman" "UCSHZKyawb77ixDdsGog4iWA" yt_feed "LockPickingLawyer" "UCm9K6rby98W8JigLoZOh6FQ" yt_feed "Maangchi" "UC8gFadPgK2r1ndqLI04Xvvw" yt_feed "Mike Zamansky" "UCxkMDXQ5qzYOgXPRnOBrp1w" yt_feed "MIT OpenCourseWare" "UCEBb1b_L6zDS3xTUrIALZOw" yt_feed "My Analog Journal" "UC8TZwtZ17WKFJSmwTZQpBTA" yt_feed "Nat's What I Reckon" "UCEFW1E8QzP-hKxjO2Rj68wg" yt_feed "Now You See It" "UCWTFGPpNQ0Ms6afXhaWDiRw" yt_feed "Numberphile" "UCoxcjq-8xIDTYp3uz647V5A" yt_feed "Philosophy Tube" "UC2PA-AKmVpU6NKCGtZq_rKQ" yt_feed "PronunciationManual" "UCqDSLtXeZsGc3dtVb5MW13g" yt_feed "Protesilaos Stavrou" "UC0uTPqBCFIpZxlz_Lv1tk_g" yt_feed "RailCowGirl" "UCj-Xm8j6WBgKY8OG7s9r2vQ" yt_feed "Simone Giertz" "UC3KEoMzNz8eYnwBC34RaKCQ" yt_feed "Steve Yegge" "UC2RCcnTltR3HMQOYVqwmweA" yt_feed "System Crafters" "UCAiiOTio8Yu69c3XnR7nQBQ" yt_feed "Tasting History with Max Miller" "UCsaGKqPZnGp_7N80hcHySGQ" yt_feed "Technology Connections" "UCy0tKL1T7wFoYcxCe0xjN6Q" yt_feed "Too Many Zooz" "UCtjXVqMVzBIgU0SO8AV0vPg" yt_feed "Townsends" "UCxr2d4As312LulcajAkKJYw" yt_feed "Unitarian Church of Baton Rouge" "UClrqHvbiFM-1hn931ZmAPFw" yt_feed "Vulf" "UCtWuB1D_E3mcyYThA9iKggQ" yt_feed "WFTDA: Women's Flat Track Derby Association" "UC7eMWpvytqd3gYAqxTl9w7g" yt_feed "Zach Anner" "UCPTVYxUoYWhNa8J7GzIGnyQ" yt_feed "Andrew Tropin" "UCuj_loxODrOPxSsXDfJmpng" yt_feed "Animist" "UCweDxCT5Fiykk3uHqPKqLWg" yt_feed "Chris Were Digital" "UCAPR27YUyxmgwm3Wc2WSHLw" yt_feed "Esoterica" "UCoydhtfFSk1fZXNRnkGnneQ" yt_feed "FOSDEM" "UC9NuJImUbaSNKiwF2bdSfAw" } feeds_from_agafnd() { # https://tilde.town/~agafnd/good-rss.html yt_feed "bill wurtz" "UCq6aw03lNILzV96UvEAASfQ" yt_feed "Atomic Shrimp" "UCSl5Uxu2LyaoAoMMGp6oTJA" yt_feed "brian david gilbert" "UCakAg8hC_RFJm4RI3DlD7SA" yt_feed "Jonni Phillips" "UC1NbRaGNot6kNEL3Jsa7SRA" # podcasts feedx "Lingthusiasm" "https://feeds.soundcloud.com/users/soundcloud:users:237055046/sounds.rss" "https://lingthusiasm.com/" # other stuff feedx "Buttersafe" "https://feeds.feedburner.com/buttersafe" "https://www.buttersafe.com/" feedx "Going Medieval" "https://going-medieval.com/feed/" feedx "Scooby Doo Mistakes" "https://scoobydoomistakes.tumblr.com/rss" "https://scoobydoomistakes.tumblr.com/" } feeds_from_elioat() { # feed [basesiteurl] [encoding] feedx "antoine.studio" "https://antoine.studio/rss.xml" feedx "apenwarr" "https://apenwarr.ca/log/rss.php" feedx "bert.org" "https://bert.org/feed.xml" feedx "Ben Hoyt's technical writing" "https://benhoyt.com/writings/rss.xml" feedx "Buried Treasure" "https://buried-treasure.org/feed/" feedx "catapult" "https://catapult.co/rss" feedx "codex99" "http://codex99.com" feedx "compudanzas log" "https://compudanzas.net/atom.xml" feedx "DustyCloud Brainstorms" "http://dustycloud.org/blog/index.xml" feedx "Earthly" "https://earthly.dev/blog/feed.xml" feedx "Ella Hoeppner" "https://www.ellahoeppner.com/blog.rss" feedx "Esoteric Codes" "https://esoteric.codes/rss" feedx "Grimgrains" "https://grimgrains.com/links/rss.xml" feedx "hisaac.net" "https://hisaac.net/feed.xml" feedx "Hundred Rabbits" "http://100r.co/links/rss.xml" feedx "Hypercritical" "https://hypercritical.co/feeds/main" feedx "joshua stein" "https://jcs.org/rss" feedx "journal.stuffwithstuff.com" "http://journal.stuffwithstuff.com/rss.xml" feedx "Julia Evans" "https://jvns.ca/atom.xml" feedx "Kicks Condor [RSS]" "https://www.kickscondor.com/rss.xml" feedx "kottke.org" "http://feeds.kottke.org/main" feedx "literary monster" "https://literary.monster/index.xml" feedx "Linus Lee" "https://thesephist.com/index.xml" feedx "Meg Conley" "https://www.megconley.com/rss/" feedx "minimalist baker" "https://minimalistbaker.com/feed/" feedx "llimllib notes" "https://notes.billmill.org/atom.xml" feedx "petermolnar.net" "https://petermolnar.net/feed/" feedx "Piper Haywood" "https://piperhaywood.com/feed/" feedx "Rainbow Plant Life" "https://rainbowplantlife.com/feed/" feedx "Robin Rendle" "https://robinrendle.com/feed.xml" feedx "sogrady" "https://sogrady.org/feed/" feedx "Sibylla Bostoniensis" "https://siderea.dreamwidth.org/data/rss" feedx "The only good system is a sound system" "https://ftrv.se/posts.atom" feedx "weaver.skepti.ch" "https://weaver.skepti.ch/atom.xml" feedx "whippet" "https://thewhippet.org/archive/rss/" feedx "zep [Lexaloffle Blog Feed]" "https://www.lexaloffle.com/bbs/feed.php?uid=1" } feeds_dozens() { feedx "Dozens and Dragons" "https://dozensanddragons.neocities.org/rss.xml" "https://dozensanddragons.neocities.org/" "" feedx "dozens: Society For Putting Things On Top Of Other Things" "https://society.neocities.org/rss.xml" "https://society.neocities.org/" "" feedx "dozens: vgnfdblg" "https://supervegan.neocities.org/feed.xml" "https://supervegan.neocities.org/" "" feedx "dozens: backgammon" "http://tilde.town/~dozens/backgammon/rss.xml" "http://tilde.town/~dozens/backgammon/" "" feedx "dozens: It's Pro Toad and Superb Owl" "https://git.tilde.town/dozens/protoadandsuperbowl/raw/branch/master/feed.xml" "https://git.tilde.town/dozens/protoadandsuperbowl/" "" feedx "dozens dreams" "https://tilde.team/~dozens/dreams/rss.xml" "https://tilde.team/~dozens/dreams/" "" feedx "dozens: write.as" "https://write.tildeverse.org/dozens/feed/" "https://write.tildeverse.org/dozens/" "" feedx "dozens css art" "http://tilde.town/~dozens/cssart/feed.xml" "http://tilde.town/~dozens/cssart/" "" feedx "Cleric+Thief: episodes" "https://blog.miso.town/atom?url=http://tilde.town/~dozens/clericthief/" "https://tilde.town/~dozens/clericthief/index.html" feedx "Cleric+Thief: journal" "https://journal.miso.town/atom?url=http://tilde.town/~dozens/clericthief/journal.html" "https://tilde.town/~dozens/clericthief/journal.html" } feeds_friends() { feedx "elly.town" "https://elly.town/feed.xml" "https://elly.town/" feeds_dozens feedx "kirch" "https://jkirchartz.com/feed.xml" "https://jkirchartz.com/" feedx "lipu pi jan Niko" "https://tilde.town/~nihilazo/index.xml" "https://tilde.town/~nihilazo/" "" feedx "chrismanbrown.gitlab.io (dozens)" "https://chrismanbrown.gitlab.io/rss.xml" "https://chrismanbrown.gitlab.io/" "" feedx "Benjamin Wil" "https://benjaminwil.info/feed.xml" "https://benjaminwil.info/" "" feedx "(lambda (x) (create x))" "http://lambdacreate.com/static/feed.rss" "http://lambdacreate.com/" "" feedx "m455.casa" "https://m455.casa/feed.rss" "https://m455.casa/" "" feedx "Oatmeal" "https://eli.li/feed.rss" "https://eli.li/" "" feedx "RSRSSS" "https://envs.net/~lucidiot/rsrsss/feed.xml" "https://envs.net/~lucidiot/rsrsss/" "" feedx "Tomasino Blog" "https://blog.tomasino.org/index.xml" "https://blog.tomasino.org/" "" feedx "Tomasino Labs" "https://labs.tomasino.org/index.xml" "https://labs.tomasino.org/" "" feedx "Will's Blog" "https://wflewis.com/feed.xml" "https://wflewis.com/" "" feedx "Rick Carlino's Blog" "https://rickcarlino.com/rss/feed.rss" "https://rickcarlino.com/rss/" "" feedx "Causal Agency" "https://text.causal.agency/feed.atom" "https://text.causal.agency/" "" feedx "Benoit Joly" "https://blog.benoitj.ca/posts/index.xml" "https://blog.benoitj.ca/posts/" "" feedx "linkbudz" "https://linkbudz.m455.casa/feed.rss" "https://linkbudz.m455.casa/" "" feedx "Alex Schroeder" "https://alexschroeder.ch/wiki/feed/full/" "https://alexschroeder.ch/" "" feedx "Björn Wärmedal" "https://warmedal.se/~bjorn/atom.xml" "https://warmedal.se/~bjorn/" "" feedx "a rickety bridge of impossible crossing" "https://bluelander.bearblog.dev/feed/" "https://bluelander.bearblog.dev/" "" feedx "javapool updates" "https://tilde.town/~m455/javapool.rss" "" "" feedx "coolguy.website" "https://coolguy.website/rss/index.xml" "https://coolguy.website/" feedx "brainshit" "https://brainshit.fr/rss" "https://brainshit.fr" feedx 'Idiomdrottning' 'https://idiomdrottning.org/blog' "https://idiomdrottning.org" feedx "Rick Carlino's Blog" "https://rickcarlino.com/rss/feed.rss" feedx "Rick Carlino's Recent News Feed" "https://news.rickcarlino.com/rss.rss" } feeds_people() { feedx "zserge" "https://zserge.com/rss.xml" "https://zserge.com/" feedx "nerdypepper's μblog" "https://peppe.rs/index.xml" "https://peppe.rs/" feedx "unix sheikh" "https://unixsheikh.com/feed.rss" "https://unixsheikh.com/" feedx "artemis.sh" "https://artemis.sh/feed.xml" "https://artemis.sh/" feedx "ajroach" "http://ajroach42.com/feed.xml" "http://ajroach42.com/" "" feedx "rachelbythebay" "https://rachelbythebay.com/w/atom.xml" "https://rachelbythebay.com/" "" feedx "Codemadness" "https://codemadness.org/atom.xml" feedx "Ben Hoyt's technical writing" "https://benhoyt.com/writings/rss.xml" "https://benhoyt.com/writings/" feedx "Tom Van Winkle's Return to Gaming" "https://lichvanwinkle.blogspot.com/feeds/posts/default" "https://lichvanwinkle.blogspot.com/" feedx "Chris Wellons" "https://nullprogram.com/feed/" "https://nullprogram.com/" feedx "maya.land" "https://maya.land/feed.xml" "https://maya.land/" feedx "Steve Losh" "https://stevelosh.com/rss.xml" "https://stevelosh.com/" feedx "Atomized (Ian Eure)" "http://atomized.org/rss.xml" "http://atomized.org/" } feeds_news() { feedx "lobste.rs" "https://lobste.rs/rss" "https://lobste.rs/" "" feedx "tilde news" "https://tilde.news/rss" "https://tilde.news/" "" feedx "Tildes Atom feed" "https://tildes.net/topics.atom" "https://tildes.net/" "" feedx "NPR" "https://feeds.npr.org/1001/rss.xml" "https://text.npr.org/" "" feeds_local } feeds_local() { feedx "Louisiana Considered" "https://omny.fm/shows/louisiana-considered/playlists/podcast.rss" "https://www.wrkf.org/show/louisiana-considered" } feeds_smolweb() { feedx "~town friday postcard" "https://tilde.town/~lucidiot/fridaypostcard.xml" "" feedx "Cosmic Voyage" "https://cosmic.voyage/rss.xml" "https://cosmic.voyage/" "" feedx "plan.cat" "https://plan.cat/rss" "https://plan.cat/" "" feedx "The Plain Text Project" "https://plaintextproject.online/feed.xml" "https://plaintextproject.online/" } feeds_comics() { feedx "webcomic name" "https://webcomicname.com/rss" "https://webcomicname.com/" feedx "Sarah's Scribbles" "https://sarahcandersen.com/rss" "https://sarahcandersen.com/" feedx "Foxes in Love" "https://foxes-in-love.tumblr.com/rss" "https://foxes-in-love.tumblr.com/" feedx "Elder Cactus" "https://www.eldercactus.com/rss.xml" "https://www.eldercactus.com/" feedx "COOELST CAT COMCIX" "https://coolmxmuffin.tumblr.com/rss" "https://coolmxmuffin.tumblr.com/" feedx "Cat and Girl" "https://catandgirl.com/feed/" "https://catandgirl.com/" "" feedx "Dinosaur Comics!" "https://qwantz.com/rssfeed.php" "https://qwantz.com/" "" feedx "False Knees" "https://falseknees.tumblr.com/rss" "https://falseknees.tumblr.com/" "" feedx "Saturday Morning Breakfast Cereal" "https://www.smbc-comics.com/comic/rss" "https://www.smbc-comics.com/" "" feedx "xkcd" "https://xkcd.com/atom.xml" "https://xkcd.com/" "" } feeds_misc() { feedx "Word of the Day" "https://www.merriam-webster.com/wotd/feed/rss2" "https://www.merriam-webster.com/word-of-the-day/" feedx "Crystalverse" "https://crystalverse.com/feed/" "https://crystalverse.com/" "" feedx "Hetzner" "https://status.hetzner.com/en.atom" "https://status.hetzner.com/" "" feedx "LOW-TECH MAGAZINE" "https://feeds2.feedburner.com/typepad/krisdedecker/lowtechmagazineenglish" "https://www.lowtechmagazine.com/" "" feedx "500-mile email" "https://500mile.email/feed.xml" "https://500mile.email/" feedx "computers are bad" "https://computer.rip/rss.xml" "https://computer.rip/" feedx "Eggs Unlimited (5)" "https://eggs.call-cc.org/rss-5.xml" } feeds_sotw() { feedx "noa" "http://noa.gaiwan.org/sotw/feed.xml" "http://noa.gaiwan.org/" feedx "agafnd" "https://tilde.town/~agafnd/sotw/sotw.xml" "https://tilde.town/~agafnd/sotw/" } feeds_me() { feedx "acdw.casa" "https://acdw.casa/feed.xml" } ### Feed helpers ################################################ feedx() { # feedx [OPTIONS] FEED_ARGS... ## Wrapper around `feed' to allow for more metadata while getopts t: opt; do case "$opt" in t) tags="$OPTARG" ;; # tags should be a comma-separated list *) ;; esac done shift $((OPTIND - 1)) # `feed' (well, really `_feed') expects only 4 arguments, so we can pass # more after that. feed "$1" "$2" "$3" "$4" \ "$tags" } yt_feed() { # yt_feed TITLE CHANNEL_ID feedx -t yt "$1 [yt]" \ "https://www.youtube.com/feeds/videos.xml?channel_id=$2" \ "https://youtube.com/channel/$2" } ### Remove feeds ################################################ unsubscribe_feeds() { feedx "r/Worldnews" "https://www.reddit.com/r/worldnews/.rss" feedx "Web3 is going just great" "https://web3isgoinggreat.com/feed.xml" "https://web3isgoinggreat.com/" feedx "Hacker News" "https://news.ycombinator.com/rss" "https://news.ycombinator.com/" "" feedx "uxn" "https://git.sr.ht/~rabbits/uxn/log/main/rss.xml" feedx "uxnfth" "https://git.sr.ht/~binarycat/uxnfth/log/front/rss.xml" feedx "worgle" "https://git.sr.ht/~pbatch/worgle/log/master/rss.xml" yt_feed "Jelle's Marble Runs" "UCYJdpnjuSWVOLgGT9fIzL0g" yt_feed "LOOK MUM NO COMPUTER" "UCafxR2HWJRmMfSdyZXvZMTw" yt_feed "Ska Tune Network" "UCji2l5wcs6GoYJY1GgG_slQ" yt_feed "Thought Slime" "UCrr7y8rEXb7_RiVniwvzk9w" yt_feed "STRANGE ÆONS" "UCrq3JYirgV-BLluzTF6X_7A" yt_feed "NileRed" "UCFhXFikryT4aFcLkLw2LBLA" yt_feed "Cathode Ray Dude" "UCXnNibvR_YIdyPs8PZIBoEw" feedx "Kill James Bond!" "https://feed.podbean.com/killjamesbondpod/feed.xml" feedx "Jo's Boys" "https://anchor.fm/s/840fb444/podcast/rss" feedx "Boonta Vista" "http://feeds.soundcloud.com/users/soundcloud:users:307723090/sounds.rss" feedx "Yesterday's Print" "https://yesterdaysprint.tumblr.com/rss" feedx "Threatening Music Notation" "https://nitter.kavin.rocks/ThreatNotation/rss" feedx "arstechnica features" "https://feeds.arstechnica.com/arstechnica/features" feedx "ars ludi" "https://arsludi.lamemage.com/index.php/feed/" feedx "Carlos Carrasco" "https://carloscarrasco.com/index.xml" feedx "dracula daily" "https://draculadaily.substack.com/feed" feedx "DRIVE™: The SciFi Comic, by Dave Kellett" "http://cdn.drivecomic.com/rss.xml" feedx "EcoClipper" "https://ecoclipper.org/feed/" feedx "esolangs wiki" "https://esolangs.org/w/api.php?hidebots=1&urlversion=1&days=7&limit=50&action=feedrecentchanges&feedformat=atom" feedx "Flashing Palely in the Margins" "https://www.inthemargins.ca/feed.rss" feedx "Gamestudies.org" "http://gamestudies.org/rss.php" feedx "gbadev.org" "https://rss.gbadev.org/feed.php" feedx "handmade.network" "https://handmade.network/atom" feedx "Indigenous Environmental Network" "https://www.ienearth.org/feed/" feedx "milofultz" "https://milofultz.com/atom.xml" feedx "N O D E" "https://n-o-d-e.net/rss/rss.xml" feedx "bill mill" "https://billmill.org/Atom" feedx "nullrouted.space" "https://nullrouted.space/feed/" feedx "orllewin" "https://orllewin.uk/category/blog/feed/" feedx "Pinboard (unread bookmarks for eli-oat)" "https://feeds.pinboard.in/rss/u:eli-oat/" feedx "Rock Paper Shotgun" "https://www.rockpapershotgun.com/feed" } ### Filter ###################################################### filter() { case "$1" in # Filter items based on feed name. *'Hacker News'*) # Hacker news is stupid and doesn't include IDs, and # instead includes a comments tag in its body. gawk 'BEGIN {FS="\t"; OFS="\t"} { $6 = $4 sub(/^.*$/,"", $6) print }' ;; *'Claire Saffitz'*) sed 's@ | Dessert Person@@' ;; *Computerphile*) sed 's@ - Computerphile@@' ;; *NPR*) sed 's@www\.npr\.org@text.npr.org@' ;; r_* | r/*) # Reddit doesn't include a link as its item IDs, but instead # something of the form `t3_xxxxx', where xxxxx can be glommed # into a URL to get to the comments. gawk -v SUBREDDIT="$1" -v ALT=i.reddit.com \ 'BEGIN { FS="\t"; OFS="\t"; sub("^r_","r/",SUBREDDIT); } { $6 = "https://" ALT "/" SUBREDDIT "/comments/" substr($6, 4); } { if (match($4,/\[link\]<\/a>/,ln)) $3 = ln[1]; } { gsub(/www\.reddit\.com/, ALT, $0); } { print; }' ;; 'Saturday Morning Breakfast Cereal') sed 's@Saturday Morning Breakfast Cereal - @@' ;; *) cat ;; esac | filter_add_empties | filter_html_entities | filter_filter_links } filter_add_empties() { awk 'BEGIN{FS="\t";OFS=FS;} { $2 = $2 ? $2 : "[empty]" } { print $1,$2,$3,$4,$5,$6,$7,$8,$9; } ' } filter_embed_youtube() { # replace youtube links with embed links sed 's@www\.youtube\.com/watch?v=@www.youtube.com/embed/@g' } filter_filter_links() { # shorten feedburner links and strip tracking parameters and pixels awk 'BEGIN { FS= "\t"; OFS = "\t"; } function filterlink(s) { # protocol must start with http, https or gopher. if (match(s, /^(http|https|gopher):\/\//) == 0) { return ""; } # shorten feedburner links. if (match(s, /^(http|https):\/\/[^\/]+\/~r\/.*\/~3\/[^\/]+\//)) { s = substr($3, RSTART, RLENGTH); } # strip tracking parameters # urchin, facebook, piwik, webtrekk and generic. gsub(/\?(ad|campaign|fbclid|pk|tm|utm|wt)_([^&]+)/, "?", s); gsub(/&(ad|campaign|fbclid|pk|tm|utm|wt)_([^&]+)/, "", s); gsub(/\?&/, "?", s); gsub(/[\?&]+$/, "", s); return s } { $3 = filterlink($3); # link $8 = filterlink($8); # enclosure # try to remove tracking pixels: tags with 1px width or height. gsub("]*(width|height)[[:space:]]*=[[:space:]]*[\"'"'"' ]?1[\"'"'"' ]?[^0-9>]+[^>]*>", "", $4); print $0; }' } filter_html_entities() { # convert HTML entities into dumb counterparts # (and weird shit to normal shit, too) awk '{ gsub(/"/,"\""); gsub(/"/,"\""); gsub(/'/,"'\''"); gsub(/'/,"'\''"); gsub(/’/,"'\''"); gsub(/&/,"\\&"); # MUST BE LAST!; print }' } # Fetch ######################################################### fetch() { # fetch(name, url, feedfile) "${NOFETCH:-false}" && return curl -s -L \ --max-redirs 3 \ --header "'User-Agent: $USER_AGENT'" \ --fail \ --max-time 15 \ "$2" } # Merge ######################################################## merge() { # merge(name, oldfile, newfile) case "$1" in *'Tildes'* | *r_*) # Tildes changes its IDs and titles fairly regularly, which is really # annoying and adds false doubles. So here, remove the last # element of the path name (the title), and sort ONLY on # TIMESTAMP. # # Reddit also sometimes does weird things, so let's just use # timestamps for that too. cat "$2" "$3" | sort -t ' ' -k1,1 -u ;; *) # Otherwise, sort on ID and URL. This minimizes the risk of # false doubles. sort -t ' ' -u -k6,6 -k3,3 "$2" "$3" 2>/dev/null ;; esac } # Local Variables: # truncate-lines: t # End: