From a5b4863d9702a13e96665d118f9b974bb59ae7d6 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Thu, 2 May 2024 22:55:16 -0500 Subject: First commit --- .gitignore | 6 +++ feed.scm | 50 ++++++++++++++++++++ ingest.scm | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ok | 16 +++++++ planet.egg | 19 ++++++++ util.scm | 22 +++++++++ 6 files changed, 267 insertions(+) create mode 100644 .gitignore create mode 100644 feed.scm create mode 100644 ingest.scm create mode 100644 ok create mode 100644 planet.egg create mode 100644 util.scm diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..792343d --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.build.sh +*.install.sh +*.so +*.link +*.o +*.import.scm diff --git a/feed.scm b/feed.scm new file mode 100644 index 0000000..dae8e2e --- /dev/null +++ b/feed.scm @@ -0,0 +1,50 @@ +;;; (planet feed) --- internal feed representation + +(declare (module (planet feed)) + (import scheme (chicken base) + (chicken time posix) + uri-common) + (export feed make-feed feed? + feed-url feed-title feed-updated feed-meta feed-items) + (export item make-item item? + item-link item-title item-updated item-meta item-content) + (export attachment make-attachment attachment? + attachment-url attachment-size attachment-mimetype)) + +(define-record-type feed + (make-feed url + title + updated + meta + items) + feed? + (url feed-url (setter feed-url)) ; URI + (title feed-title (setter feed-title)) ; String + (updated feed-updated (setter feed-updated)) ; Date + (meta feed-meta (setter feed-meta)) ; Alist: String -> String (?) + (items feed-items (setter feed-items)) ; Items + ) + +(define-record-type item + (make-item link + title + updated + meta + content + attachments) + item? + (link item-link (setter item-link)) ; URI + (title item-title (setter item-title)) ; String + (updated item-updated (setter item-updated)) ; Date + (meta item-meta (setter item-meta)) ; Alist: String -> String + (content item-content (setter item-content)) ; String + (attachments item-attachments (setter item-attachments)) ; List: Attachment* + ) + +(define-record-type attachment + ;; Basically RSS + (make-attachment url size mimetype) + attachment? + (url attachment-url (setter attachment-url)) + (size attachment-size (setter attachment-size)) + (mimetype attachment-mimetype (setter attachment-mimetype))) diff --git a/ingest.scm b/ingest.scm new file mode 100644 index 0000000..3b65c75 --- /dev/null +++ b/ingest.scm @@ -0,0 +1,154 @@ +;;; (planet ingest) --- injest feeds + +(declare (module (planet ingest)) + (import scheme (chicken base) + (chicken io) + (chicken port) + (chicken time posix) + (planet feed) + (planet util) + http-client + uri-common + rss + rfc3339 + sxpath + (prefix atom atom:))) + +;;; Public API + +(define-public (download-and-ingest-feed url) + (ingest-feed (download-feed url) url)) + +(define-public (ingest-feed fd #!optional url) + (cond + ((rss->feed fd) => (cut ingest/rss <> url)) + ((atom->feed fd) => (cut ingest/atom <> url)) + (else (error "Unknown feed type" fd)))) + +(define-public (download-feed url) + (with-input-from-request url #f read-string)) + +;;; RSS +(define (rss->feed x) + (or (and (rss:feed? x) x) + (false-on-error + (lambda () (with-input-from-string x rss:read))))) + +(define (ingest/rss fd #!optional url) + (let ((chan (rss:feed-channel fd)) + (items (map item/rss->item (rss:feed-items fd)))) + (make-feed url ; url object -- xxx; brittle + (rss:item-title chan) + (rfc822->rfc3339 (rss:item-attribute chan 'lastBuildDate)) + (rss:item-attributes chan) + items))) + +(define (item/rss->item it) + (assert (rss:item? it) "Not an rss:item" it) + (let ((attachments (map enclosure->attachment rss:item-enclosures))) + (make-item (rss:item-link it) + (rss:item-title it) + (rfc822->rfc3339 (rss:item-attribute it 'pubDate)) + (rss:item-attributes it) + (rss:item-description it) + rss:item-enclosures + attachments))) + +(define (enclosure->attachment enc) + (make-attachment rss:enclosure-url + rss:enclosure-length + rss:enclosure-type)) + +(define (rfc822->rfc3339 tm) + (let loop ((fmts + ;; these don't capture the timezone -- afaik, a limitation of + ;; strptime + '("%a, %d %b %y %H:%M:%S" + "%d %b %y %H:%M:%S" + "%a, %e %b %y %H:%M:%S" + "%e %b %y %H:%M:%S" + "%a, %d %b %y %H:%M" + "%d %b %y %H:%M" + "%a, %e %b %y %H:%M" + "%e %b %y %H:%M" + "%a, %d %b %Y %H:%M:%S" + "%d %b %Y %H:%M:%S" + "%a, %e %b %Y %H:%M:%S" + "%e %b %Y %H:%M:%S" + "%a, %d %b %Y %H:%M" + "%d %b %Y %H:%M" + "%a, %e %b %Y %H:%M" + "%e %b %Y %H:%M"))) + (cond + ((null? fmts) tm) ; fallback + ((string->time tm (car fmts)) + => (lambda (tm) (time->rfc3339 tm))) + (else (loop (cdr fmts)))))) + +;;; Atom + +(define (atom->feed fd) + (or (and (atom:feed? fd) fd) + (false-on-error + (lambda () (call-with-input-string fd atom:read-atom-feed))))) + +(define (ingest/atom fd #!optional url) + (let ((items (map item/atom->item (atom:feed-entries fd)))) + (make-feed url + (atom:feed-title fd) + (string->rfc3339 (atom:feed-updated fd)) + (map (lambda (pair) (cons (car pair) ((cdr pair) fd))) + ;; have to build meta + `((authors . ,atom:feed-authors) + (categories . ,atom:feed-categories) + (contributors . ,atom:feed-contributors) + (generator . ,atom:feed-generator) + (icon . ,atom:feed-icon) + (id . ,atom:feed-id) + (links . ,atom:feed-links) + (logo . ,atom:feed-logo) + (rights . ,atom:feed-rights) + (subtitle . ,atom:feed-subtitle))) + items))) + +(define (item/atom->item it) + (assert (atom:entry? it) "Not an Atom entry" it) + (make-item (item-link/atom it) + (atom:entry-title it) + (atom:entry-updated it) + (map (lambda (pair) (cons (car pair) ((cdr pair) it))) + `((authors . ,atom:entry-authors) + (categories . ,atom:entry-categories) + (content . ,atom:entry-content) + (contributors . ,atom:entry-contributors) + (id . ,atom:entry-id) + (links . ,atom:entry-links) + (published . ,atom:entry-published) + (rights . ,atom:entry-rights) + (source . ,atom:entry-source) + (summary . ,atom:entry-summary))) + (item-content/atom it) + #f ; afaik atom doesn't do enclosures + )) + +(define (item-link/atom it) + ;; Find the of IT, or return the first + (let ((alt ((sxpath '(// (atom:link (@ rel (equal? "alternate"))))) + it))) + (if (null? alt) + (car (atom:entry-links it)) + (car alt)))) + +(define (item-content/atom it) + ;; Return entry-content or entry-summary if content is external or binary + (let ((c (atom:entry-content it))) + (case (atom:content-kind c) + ((text html textual) (atom:content-text c)) + ((xhtml) (atom:content-xhtml c)) + ((xml) (atom:content-xml c)) + ((binary external) + (let ((sum (atom:entry-summary it))) + (case (atom:summary-type sum) + ((xhtml) (atom:summary-xhtml sum)) + (else (atom:summary-text sum))))) + (else (error "Bad content type for item" it))))) diff --git a/ok b/ok new file mode 100644 index 0000000..43a98a2 --- /dev/null +++ b/ok @@ -0,0 +1,16 @@ +# planet! -*- sh -*- + +build(){ # build the library/whatever + ok chicken-install -n +} + +clean(){ # clean up ignored files + ignore=.gitignore + test -f "$ignore" || return 1 + while read -r glob + do find . -iname "$glob" | + while read -r file + do ok rm "$file" + done + done < "$ignore" +} diff --git a/planet.egg b/planet.egg new file mode 100644 index 0000000..8c1616e --- /dev/null +++ b/planet.egg @@ -0,0 +1,19 @@ +((author "Case Duckworth") + (synopsis "A planet!") + (license "BSD3") + (dependencies module-declarations utf8 + http-client openssl + uri-common + rss + atom) + (component-options + (csc-options -X utf8 -X module-declarations)) + (components + (extension planet.feed + (source feed.scm)) + (extension planet.ingest + (source ingest.scm) + (component-dependencies planet.feed + planet.util)) + (extension planet.util + (source util.scm)))) diff --git a/util.scm b/util.scm new file mode 100644 index 0000000..0a8906c --- /dev/null +++ b/util.scm @@ -0,0 +1,22 @@ +(declare (module (planet util)) + (import scheme + (chicken base) + (chicken condition)) + (export define-public)) + +(define-syntax define-public + (syntax-rules () + ((define-public (name . arg) forms ...) + (begin (export name) + (define (name . arg) forms ...))) + ((define-public (name args ...) forms ...) + (begin (export name) + (define (name args ...) forms ...))) + ((define-public name value) + (begin (export name) + (define name value))))) + +(define-public (false-on-error thunk) + (call/cc (lambda (k) + (with-exception-handler (lambda (x) (k #f)) + thunk)))) -- cgit 1.4.1-21-gabe81