summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2024-05-02 22:55:16 -0500
committerCase Duckworth2024-05-02 22:55:16 -0500
commita5b4863d9702a13e96665d118f9b974bb59ae7d6 (patch)
tree874c396bd745e3569c6d8c49e54a95d213eac238
downloadplanet-a5b4863d9702a13e96665d118f9b974bb59ae7d6.tar.gz
planet-a5b4863d9702a13e96665d118f9b974bb59ae7d6.zip
First commit main
-rw-r--r--.gitignore6
-rw-r--r--feed.scm50
-rw-r--r--ingest.scm154
-rw-r--r--ok16
-rw-r--r--planet.egg19
-rw-r--r--util.scm22
6 files changed, 267 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..792343d --- /dev/null +++ b/.gitignore
@@ -0,0 +1,6 @@
1*.build.sh
2*.install.sh
3*.so
4*.link
5*.o
6*.import.scm
diff --git a/feed.scm b/feed.scm new file mode 100644 index 0000000..dae8e2e --- /dev/null +++ b/feed.scm
@@ -0,0 +1,50 @@
1;;; (planet feed) --- internal feed representation
2
3(declare (module (planet feed))
4 (import scheme (chicken base)
5 (chicken time posix)
6 uri-common)
7 (export feed make-feed feed?
8 feed-url feed-title feed-updated feed-meta feed-items)
9 (export item make-item item?
10 item-link item-title item-updated item-meta item-content)
11 (export attachment make-attachment attachment?
12 attachment-url attachment-size attachment-mimetype))
13
14(define-record-type feed
15 (make-feed url
16 title
17 updated
18 meta
19 items)
20 feed?
21 (url feed-url (setter feed-url)) ; URI
22 (title feed-title (setter feed-title)) ; String
23 (updated feed-updated (setter feed-updated)) ; Date
24 (meta feed-meta (setter feed-meta)) ; Alist: String -> String (?)
25 (items feed-items (setter feed-items)) ; Items
26 )
27
28(define-record-type item
29 (make-item link
30 title
31 updated
32 meta
33 content
34 attachments)
35 item?
36 (link item-link (setter item-link)) ; URI
37 (title item-title (setter item-title)) ; String
38 (updated item-updated (setter item-updated)) ; Date
39 (meta item-meta (setter item-meta)) ; Alist: String -> String
40 (content item-content (setter item-content)) ; String
41 (attachments item-attachments (setter item-attachments)) ; List: Attachment*
42 )
43
44(define-record-type attachment
45 ;; Basically RSS <enclosure>
46 (make-attachment url size mimetype)
47 attachment?
48 (url attachment-url (setter attachment-url))
49 (size attachment-size (setter attachment-size))
50 (mimetype attachment-mimetype (setter attachment-mimetype)))
diff --git a/ingest.scm b/ingest.scm new file mode 100644 index 0000000..3b65c75 --- /dev/null +++ b/ingest.scm
@@ -0,0 +1,154 @@
1;;; (planet ingest) --- injest feeds
2
3(declare (module (planet ingest))
4 (import scheme (chicken base)
5 (chicken io)
6 (chicken port)
7 (chicken time posix)
8 (planet feed)
9 (planet util)
10 http-client
11 uri-common
12 rss
13 rfc3339
14 sxpath
15 (prefix atom atom:)))
16
17;;; Public API
18
19(define-public (download-and-ingest-feed url)
20 (ingest-feed (download-feed url) url))
21
22(define-public (ingest-feed fd #!optional url)
23 (cond
24 ((rss->feed fd) => (cut ingest/rss <> url))
25 ((atom->feed fd) => (cut ingest/atom <> url))
26 (else (error "Unknown feed type" fd))))
27
28(define-public (download-feed url)
29 (with-input-from-request url #f read-string))
30
31;;; RSS
32(define (rss->feed x)
33 (or (and (rss:feed? x) x)
34 (false-on-error
35 (lambda () (with-input-from-string x rss:read)))))
36
37(define (ingest/rss fd #!optional url)
38 (let ((chan (rss:feed-channel fd))
39 (items (map item/rss->item (rss:feed-items fd))))
40 (make-feed url ; url object -- xxx; brittle
41 (rss:item-title chan)
42 (rfc822->rfc3339 (rss:item-attribute chan 'lastBuildDate))
43 (rss:item-attributes chan)
44 items)))
45
46(define (item/rss->item it)
47 (assert (rss:item? it) "Not an rss:item" it)
48 (let ((attachments (map enclosure->attachment rss:item-enclosures)))
49 (make-item (rss:item-link it)
50 (rss:item-title it)
51 (rfc822->rfc3339 (rss:item-attribute it 'pubDate))
52 (rss:item-attributes it)
53 (rss:item-description it)
54 rss:item-enclosures
55 attachments)))
56
57(define (enclosure->attachment enc)
58 (make-attachment rss:enclosure-url
59 rss:enclosure-length
60 rss:enclosure-type))
61
62(define (rfc822->rfc3339 tm)
63 (let loop ((fmts
64 ;; these don't capture the timezone -- afaik, a limitation of
65 ;; strptime
66 '("%a, %d %b %y %H:%M:%S"
67 "%d %b %y %H:%M:%S"
68 "%a, %e %b %y %H:%M:%S"
69 "%e %b %y %H:%M:%S"
70 "%a, %d %b %y %H:%M"
71 "%d %b %y %H:%M"
72 "%a, %e %b %y %H:%M"
73 "%e %b %y %H:%M"
74 "%a, %d %b %Y %H:%M:%S"
75 "%d %b %Y %H:%M:%S"
76 "%a, %e %b %Y %H:%M:%S"
77 "%e %b %Y %H:%M:%S"
78 "%a, %d %b %Y %H:%M"
79 "%d %b %Y %H:%M"
80 "%a, %e %b %Y %H:%M"
81 "%e %b %Y %H:%M")))
82 (cond
83 ((null? fmts) tm) ; fallback
84 ((string->time tm (car fmts))
85 => (lambda (tm) (time->rfc3339 tm)))
86 (else (loop (cdr fmts))))))
87
88;;; Atom
89
90(define (atom->feed fd)
91 (or (and (atom:feed? fd) fd)
92 (false-on-error
93 (lambda () (call-with-input-string fd atom:read-atom-feed)))))
94
95(define (ingest/atom fd #!optional url)
96 (let ((items (map item/atom->item (atom:feed-entries fd))))
97 (make-feed url
98 (atom:feed-title fd)
99 (string->rfc3339 (atom:feed-updated fd))
100 (map (lambda (pair) (cons (car pair) ((cdr pair) fd)))
101 ;; have to build meta
102 `((authors . ,atom:feed-authors)
103 (categories . ,atom:feed-categories)
104 (contributors . ,atom:feed-contributors)
105 (generator . ,atom:feed-generator)
106 (icon . ,atom:feed-icon)
107 (id . ,atom:feed-id)
108 (links . ,atom:feed-links)
109 (logo . ,atom:feed-logo)
110 (rights . ,atom:feed-rights)
111 (subtitle . ,atom:feed-subtitle)))
112 items)))
113
114(define (item/atom->item it)
115 (assert (atom:entry? it) "Not an Atom entry" it)
116 (make-item (item-link/atom it)
117 (atom:entry-title it)
118 (atom:entry-updated it)
119 (map (lambda (pair) (cons (car pair) ((cdr pair) it)))
120 `((authors . ,atom:entry-authors)
121 (categories . ,atom:entry-categories)
122 (content . ,atom:entry-content)
123 (contributors . ,atom:entry-contributors)
124 (id . ,atom:entry-id)
125 (links . ,atom:entry-links)
126 (published . ,atom:entry-published)
127 (rights . ,atom:entry-rights)
128 (source . ,atom:entry-source)
129 (summary . ,atom:entry-summary)))
130 (item-content/atom it)
131 #f ; afaik atom doesn't do enclosures
132 ))
133
134(define (item-link/atom it)
135 ;; Find the <link rel="alternate"> of IT, or return the first <link>
136 (let ((alt ((sxpath '(// (atom:link (@ rel (equal? "alternate")))))
137 it)))
138 (if (null? alt)
139 (car (atom:entry-links it))
140 (car alt))))
141
142(define (item-content/atom it)
143 ;; Return entry-content or entry-summary if content is external or binary
144 (let ((c (atom:entry-content it)))
145 (case (atom:content-kind c)
146 ((text html textual) (atom:content-text c))
147 ((xhtml) (atom:content-xhtml c))
148 ((xml) (atom:content-xml c))
149 ((binary external)
150 (let ((sum (atom:entry-summary it)))
151 (case (atom:summary-type sum)
152 ((xhtml) (atom:summary-xhtml sum))
153 (else (atom:summary-text sum)))))
154 (else (error "Bad content type for item" it)))))
diff --git a/ok b/ok new file mode 100644 index 0000000..43a98a2 --- /dev/null +++ b/ok
@@ -0,0 +1,16 @@
1# planet! -*- sh -*-
2
3build(){ # build the library/whatever
4 ok chicken-install -n
5}
6
7clean(){ # clean up ignored files
8 ignore=.gitignore
9 test -f "$ignore" || return 1
10 while read -r glob
11 do find . -iname "$glob" |
12 while read -r file
13 do ok rm "$file"
14 done
15 done < "$ignore"
16}
diff --git a/planet.egg b/planet.egg new file mode 100644 index 0000000..8c1616e --- /dev/null +++ b/planet.egg
@@ -0,0 +1,19 @@
1((author "Case Duckworth")
2 (synopsis "A planet!")
3 (license "BSD3")
4 (dependencies module-declarations utf8
5 http-client openssl
6 uri-common
7 rss
8 atom)
9 (component-options
10 (csc-options -X utf8 -X module-declarations))
11 (components
12 (extension planet.feed
13 (source feed.scm))
14 (extension planet.ingest
15 (source ingest.scm)
16 (component-dependencies planet.feed
17 planet.util))
18 (extension planet.util
19 (source util.scm))))
diff --git a/util.scm b/util.scm new file mode 100644 index 0000000..0a8906c --- /dev/null +++ b/util.scm
@@ -0,0 +1,22 @@
1(declare (module (planet util))
2 (import scheme
3 (chicken base)
4 (chicken condition))
5 (export define-public))
6
7(define-syntax define-public
8 (syntax-rules ()
9 ((define-public (name . arg) forms ...)
10 (begin (export name)
11 (define (name . arg) forms ...)))
12 ((define-public (name args ...) forms ...)
13 (begin (export name)
14 (define (name args ...) forms ...)))
15 ((define-public name value)
16 (begin (export name)
17 (define name value)))))
18
19(define-public (false-on-error thunk)
20 (call/cc (lambda (k)
21 (with-exception-handler (lambda (x) (k #f))
22 thunk))))