diff options
author | Case Duckworth | 2024-05-02 22:55:16 -0500 |
---|---|---|
committer | Case Duckworth | 2024-05-02 22:55:16 -0500 |
commit | a5b4863d9702a13e96665d118f9b974bb59ae7d6 (patch) | |
tree | 874c396bd745e3569c6d8c49e54a95d213eac238 | |
download | planet-main.tar.gz planet-main.zip |
First commit main
-rw-r--r-- | .gitignore | 6 | ||||
-rw-r--r-- | feed.scm | 50 | ||||
-rw-r--r-- | ingest.scm | 154 | ||||
-rw-r--r-- | ok | 16 | ||||
-rw-r--r-- | planet.egg | 19 | ||||
-rw-r--r-- | util.scm | 22 |
6 files changed, 267 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..792343d --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,6 @@ | |||
1 | *.build.sh | ||
2 | *.install.sh | ||
3 | *.so | ||
4 | *.link | ||
5 | *.o | ||
6 | *.import.scm | ||
diff --git a/feed.scm b/feed.scm new file mode 100644 index 0000000..dae8e2e --- /dev/null +++ b/feed.scm | |||
@@ -0,0 +1,50 @@ | |||
1 | ;;; (planet feed) --- internal feed representation | ||
2 | |||
3 | (declare (module (planet feed)) | ||
4 | (import scheme (chicken base) | ||
5 | (chicken time posix) | ||
6 | uri-common) | ||
7 | (export feed make-feed feed? | ||
8 | feed-url feed-title feed-updated feed-meta feed-items) | ||
9 | (export item make-item item? | ||
10 | item-link item-title item-updated item-meta item-content) | ||
11 | (export attachment make-attachment attachment? | ||
12 | attachment-url attachment-size attachment-mimetype)) | ||
13 | |||
14 | (define-record-type feed | ||
15 | (make-feed url | ||
16 | title | ||
17 | updated | ||
18 | meta | ||
19 | items) | ||
20 | feed? | ||
21 | (url feed-url (setter feed-url)) ; URI | ||
22 | (title feed-title (setter feed-title)) ; String | ||
23 | (updated feed-updated (setter feed-updated)) ; Date | ||
24 | (meta feed-meta (setter feed-meta)) ; Alist: String -> String (?) | ||
25 | (items feed-items (setter feed-items)) ; Items | ||
26 | ) | ||
27 | |||
28 | (define-record-type item | ||
29 | (make-item link | ||
30 | title | ||
31 | updated | ||
32 | meta | ||
33 | content | ||
34 | attachments) | ||
35 | item? | ||
36 | (link item-link (setter item-link)) ; URI | ||
37 | (title item-title (setter item-title)) ; String | ||
38 | (updated item-updated (setter item-updated)) ; Date | ||
39 | (meta item-meta (setter item-meta)) ; Alist: String -> String | ||
40 | (content item-content (setter item-content)) ; String | ||
41 | (attachments item-attachments (setter item-attachments)) ; List: Attachment* | ||
42 | ) | ||
43 | |||
44 | (define-record-type attachment | ||
45 | ;; Basically RSS <enclosure> | ||
46 | (make-attachment url size mimetype) | ||
47 | attachment? | ||
48 | (url attachment-url (setter attachment-url)) | ||
49 | (size attachment-size (setter attachment-size)) | ||
50 | (mimetype attachment-mimetype (setter attachment-mimetype))) | ||
diff --git a/ingest.scm b/ingest.scm new file mode 100644 index 0000000..3b65c75 --- /dev/null +++ b/ingest.scm | |||
@@ -0,0 +1,154 @@ | |||
1 | ;;; (planet ingest) --- injest feeds | ||
2 | |||
3 | (declare (module (planet ingest)) | ||
4 | (import scheme (chicken base) | ||
5 | (chicken io) | ||
6 | (chicken port) | ||
7 | (chicken time posix) | ||
8 | (planet feed) | ||
9 | (planet util) | ||
10 | http-client | ||
11 | uri-common | ||
12 | rss | ||
13 | rfc3339 | ||
14 | sxpath | ||
15 | (prefix atom atom:))) | ||
16 | |||
17 | ;;; Public API | ||
18 | |||
19 | (define-public (download-and-ingest-feed url) | ||
20 | (ingest-feed (download-feed url) url)) | ||
21 | |||
22 | (define-public (ingest-feed fd #!optional url) | ||
23 | (cond | ||
24 | ((rss->feed fd) => (cut ingest/rss <> url)) | ||
25 | ((atom->feed fd) => (cut ingest/atom <> url)) | ||
26 | (else (error "Unknown feed type" fd)))) | ||
27 | |||
28 | (define-public (download-feed url) | ||
29 | (with-input-from-request url #f read-string)) | ||
30 | |||
31 | ;;; RSS | ||
32 | (define (rss->feed x) | ||
33 | (or (and (rss:feed? x) x) | ||
34 | (false-on-error | ||
35 | (lambda () (with-input-from-string x rss:read))))) | ||
36 | |||
37 | (define (ingest/rss fd #!optional url) | ||
38 | (let ((chan (rss:feed-channel fd)) | ||
39 | (items (map item/rss->item (rss:feed-items fd)))) | ||
40 | (make-feed url ; url object -- xxx; brittle | ||
41 | (rss:item-title chan) | ||
42 | (rfc822->rfc3339 (rss:item-attribute chan 'lastBuildDate)) | ||
43 | (rss:item-attributes chan) | ||
44 | items))) | ||
45 | |||
46 | (define (item/rss->item it) | ||
47 | (assert (rss:item? it) "Not an rss:item" it) | ||
48 | (let ((attachments (map enclosure->attachment rss:item-enclosures))) | ||
49 | (make-item (rss:item-link it) | ||
50 | (rss:item-title it) | ||
51 | (rfc822->rfc3339 (rss:item-attribute it 'pubDate)) | ||
52 | (rss:item-attributes it) | ||
53 | (rss:item-description it) | ||
54 | rss:item-enclosures | ||
55 | attachments))) | ||
56 | |||
57 | (define (enclosure->attachment enc) | ||
58 | (make-attachment rss:enclosure-url | ||
59 | rss:enclosure-length | ||
60 | rss:enclosure-type)) | ||
61 | |||
62 | (define (rfc822->rfc3339 tm) | ||
63 | (let loop ((fmts | ||
64 | ;; these don't capture the timezone -- afaik, a limitation of | ||
65 | ;; strptime | ||
66 | '("%a, %d %b %y %H:%M:%S" | ||
67 | "%d %b %y %H:%M:%S" | ||
68 | "%a, %e %b %y %H:%M:%S" | ||
69 | "%e %b %y %H:%M:%S" | ||
70 | "%a, %d %b %y %H:%M" | ||
71 | "%d %b %y %H:%M" | ||
72 | "%a, %e %b %y %H:%M" | ||
73 | "%e %b %y %H:%M" | ||
74 | "%a, %d %b %Y %H:%M:%S" | ||
75 | "%d %b %Y %H:%M:%S" | ||
76 | "%a, %e %b %Y %H:%M:%S" | ||
77 | "%e %b %Y %H:%M:%S" | ||
78 | "%a, %d %b %Y %H:%M" | ||
79 | "%d %b %Y %H:%M" | ||
80 | "%a, %e %b %Y %H:%M" | ||
81 | "%e %b %Y %H:%M"))) | ||
82 | (cond | ||
83 | ((null? fmts) tm) ; fallback | ||
84 | ((string->time tm (car fmts)) | ||
85 | => (lambda (tm) (time->rfc3339 tm))) | ||
86 | (else (loop (cdr fmts)))))) | ||
87 | |||
88 | ;;; Atom | ||
89 | |||
90 | (define (atom->feed fd) | ||
91 | (or (and (atom:feed? fd) fd) | ||
92 | (false-on-error | ||
93 | (lambda () (call-with-input-string fd atom:read-atom-feed))))) | ||
94 | |||
95 | (define (ingest/atom fd #!optional url) | ||
96 | (let ((items (map item/atom->item (atom:feed-entries fd)))) | ||
97 | (make-feed url | ||
98 | (atom:feed-title fd) | ||
99 | (string->rfc3339 (atom:feed-updated fd)) | ||
100 | (map (lambda (pair) (cons (car pair) ((cdr pair) fd))) | ||
101 | ;; have to build meta | ||
102 | `((authors . ,atom:feed-authors) | ||
103 | (categories . ,atom:feed-categories) | ||
104 | (contributors . ,atom:feed-contributors) | ||
105 | (generator . ,atom:feed-generator) | ||
106 | (icon . ,atom:feed-icon) | ||
107 | (id . ,atom:feed-id) | ||
108 | (links . ,atom:feed-links) | ||
109 | (logo . ,atom:feed-logo) | ||
110 | (rights . ,atom:feed-rights) | ||
111 | (subtitle . ,atom:feed-subtitle))) | ||
112 | items))) | ||
113 | |||
114 | (define (item/atom->item it) | ||
115 | (assert (atom:entry? it) "Not an Atom entry" it) | ||
116 | (make-item (item-link/atom it) | ||
117 | (atom:entry-title it) | ||
118 | (atom:entry-updated it) | ||
119 | (map (lambda (pair) (cons (car pair) ((cdr pair) it))) | ||
120 | `((authors . ,atom:entry-authors) | ||
121 | (categories . ,atom:entry-categories) | ||
122 | (content . ,atom:entry-content) | ||
123 | (contributors . ,atom:entry-contributors) | ||
124 | (id . ,atom:entry-id) | ||
125 | (links . ,atom:entry-links) | ||
126 | (published . ,atom:entry-published) | ||
127 | (rights . ,atom:entry-rights) | ||
128 | (source . ,atom:entry-source) | ||
129 | (summary . ,atom:entry-summary))) | ||
130 | (item-content/atom it) | ||
131 | #f ; afaik atom doesn't do enclosures | ||
132 | )) | ||
133 | |||
134 | (define (item-link/atom it) | ||
135 | ;; Find the <link rel="alternate"> of IT, or return the first <link> | ||
136 | (let ((alt ((sxpath '(// (atom:link (@ rel (equal? "alternate"))))) | ||
137 | it))) | ||
138 | (if (null? alt) | ||
139 | (car (atom:entry-links it)) | ||
140 | (car alt)))) | ||
141 | |||
142 | (define (item-content/atom it) | ||
143 | ;; Return entry-content or entry-summary if content is external or binary | ||
144 | (let ((c (atom:entry-content it))) | ||
145 | (case (atom:content-kind c) | ||
146 | ((text html textual) (atom:content-text c)) | ||
147 | ((xhtml) (atom:content-xhtml c)) | ||
148 | ((xml) (atom:content-xml c)) | ||
149 | ((binary external) | ||
150 | (let ((sum (atom:entry-summary it))) | ||
151 | (case (atom:summary-type sum) | ||
152 | ((xhtml) (atom:summary-xhtml sum)) | ||
153 | (else (atom:summary-text sum))))) | ||
154 | (else (error "Bad content type for item" it))))) | ||
diff --git a/ok b/ok new file mode 100644 index 0000000..43a98a2 --- /dev/null +++ b/ok | |||
@@ -0,0 +1,16 @@ | |||
1 | # planet! -*- sh -*- | ||
2 | |||
3 | build(){ # build the library/whatever | ||
4 | ok chicken-install -n | ||
5 | } | ||
6 | |||
7 | clean(){ # clean up ignored files | ||
8 | ignore=.gitignore | ||
9 | test -f "$ignore" || return 1 | ||
10 | while read -r glob | ||
11 | do find . -iname "$glob" | | ||
12 | while read -r file | ||
13 | do ok rm "$file" | ||
14 | done | ||
15 | done < "$ignore" | ||
16 | } | ||
diff --git a/planet.egg b/planet.egg new file mode 100644 index 0000000..8c1616e --- /dev/null +++ b/planet.egg | |||
@@ -0,0 +1,19 @@ | |||
1 | ((author "Case Duckworth") | ||
2 | (synopsis "A planet!") | ||
3 | (license "BSD3") | ||
4 | (dependencies module-declarations utf8 | ||
5 | http-client openssl | ||
6 | uri-common | ||
7 | rss | ||
8 | atom) | ||
9 | (component-options | ||
10 | (csc-options -X utf8 -X module-declarations)) | ||
11 | (components | ||
12 | (extension planet.feed | ||
13 | (source feed.scm)) | ||
14 | (extension planet.ingest | ||
15 | (source ingest.scm) | ||
16 | (component-dependencies planet.feed | ||
17 | planet.util)) | ||
18 | (extension planet.util | ||
19 | (source util.scm)))) | ||
diff --git a/util.scm b/util.scm new file mode 100644 index 0000000..0a8906c --- /dev/null +++ b/util.scm | |||
@@ -0,0 +1,22 @@ | |||
1 | (declare (module (planet util)) | ||
2 | (import scheme | ||
3 | (chicken base) | ||
4 | (chicken condition)) | ||
5 | (export define-public)) | ||
6 | |||
7 | (define-syntax define-public | ||
8 | (syntax-rules () | ||
9 | ((define-public (name . arg) forms ...) | ||
10 | (begin (export name) | ||
11 | (define (name . arg) forms ...))) | ||
12 | ((define-public (name args ...) forms ...) | ||
13 | (begin (export name) | ||
14 | (define (name args ...) forms ...))) | ||
15 | ((define-public name value) | ||
16 | (begin (export name) | ||
17 | (define name value))))) | ||
18 | |||
19 | (define-public (false-on-error thunk) | ||
20 | (call/cc (lambda (k) | ||
21 | (with-exception-handler (lambda (x) (k #f)) | ||
22 | thunk)))) | ||