diff options
author | Case Duckworth | 2023-07-07 23:59:04 -0500 |
---|---|---|
committer | Case Duckworth | 2023-07-07 23:59:04 -0500 |
commit | f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb (patch) | |
tree | 5d3efab6a82d23c0bb506d0785f73a58e362a92c | |
download | boudin-f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb.tar.gz boudin-f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb.zip |
Initial commit
-rwxr-xr-x | boudin.scm | 414 | ||||
-rw-r--r-- | test.html | 19 |
2 files changed, 433 insertions, 0 deletions
diff --git a/boudin.scm b/boudin.scm new file mode 100755 index 0000000..737cbf1 --- /dev/null +++ b/boudin.scm | |||
@@ -0,0 +1,414 @@ | |||
1 | #!/bin/sh | ||
2 | #| -*- scheme -*- | ||
3 | exec csi -R r7rs -s "$0" "$@" | ||
4 | boudin --- a little static site generator | ||
5 | |# | ||
6 | |||
7 | (import (chicken file) | ||
8 | (chicken file posix) | ||
9 | (chicken irregex) | ||
10 | (chicken pathname) | ||
11 | (chicken port) | ||
12 | (chicken process-context) | ||
13 | (chicken random) | ||
14 | (chicken string) | ||
15 | (chicken time posix) | ||
16 | (chicanery) | ||
17 | (atom) | ||
18 | (html-parser) | ||
19 | (srfi 37) | ||
20 | (srfi 152) | ||
21 | (sxpath)) | ||
22 | |||
23 | ;;; Transformations | ||
24 | ;; A static site generator can be thought of largely as two sets of | ||
25 | ;; transformations: one transforming given input content to output content, and | ||
26 | ;; another transforming source paths to destination paths. Since both, for my | ||
27 | ;; purposes, are strings, I have a generic function that can perform both | ||
28 | ;; transformations. | ||
29 | |||
30 | (define (transform str . procs) #| string (string ->string) ... -> string | ||
31 | Apply PROCS to STR, left-to-right, and return the result. | ||
32 | Each PROC will be called with its predecessor's output, and should take a | ||
33 | string as input and return a string. |# | ||
34 | (if (null? procs) | ||
35 | str | ||
36 | (apply transform ((car procs) str) (cdr procs)))) | ||
37 | |||
38 | ;;; Path transformations | ||
39 | |||
40 | (define (indexify path) #| path -> path | ||
41 | Replace the PATH's extension with "/index.html". | ||
42 | |# | ||
43 | (make-pathname (pathname-strip-extension path) | ||
44 | "index" | ||
45 | "html")) | ||
46 | |||
47 | (define (transform-path path outdir) #| path => path | ||
48 | Transform PATH according to boudin's needs. |# | ||
49 | (transform path | ||
50 | normalize-pathname | ||
51 | (lambda (p) (pathname-replace-directory p outdir)) | ||
52 | indexify)) | ||
53 | |||
54 | ;;; Content transformations | ||
55 | |||
56 | (define (split-paragraphs str) #| string -> (list string ...) | ||
57 | Split STR into paragraphs. | ||
58 | A paragraph is a contiguous series of text lines separated from other | ||
59 | paragraphs by at least 2 newline \n characters. This procedure collapses | ||
60 | inter-paragraph space. |# | ||
61 | (let loop ((lines (string-split str "\n")) | ||
62 | (par '()) | ||
63 | (acc '())) | ||
64 | (cond | ||
65 | ((and (null? lines) ; base case: no more lines | ||
66 | (null? par)) ; ... or pending paragraph | ||
67 | (reverse acc)) | ||
68 | ((null? lines) ; add the final paragraph | ||
69 | (loop '() '() (cons (apply string-append (reverse par)) acc))) | ||
70 | ((equal? (car lines) "") ; paragraph break | ||
71 | (loop (cdr lines) | ||
72 | '() | ||
73 | (cons (apply string-append (reverse par)) acc))) | ||
74 | (else ; line break | ||
75 | (loop (cdr lines) | ||
76 | (cons (string-append (car lines) "\n") par) | ||
77 | acc))))) | ||
78 | |||
79 | (define (wrap-paragraphs str) #| string -> string | ||
80 | Wrap naked paragraphs of STR in <p> tags. | ||
81 | A 'naked' paragraph is one that doesn't begin with '<' (after optional | ||
82 | beginning whitespace). |# | ||
83 | (let loop ((pars (map string-trim (split-paragraphs str))) | ||
84 | (acc '())) | ||
85 | (cond | ||
86 | ((null? pars) | ||
87 | (apply string-append (reverse acc))) | ||
88 | ((zero? (string-length (car pars))) | ||
89 | (loop (cdr pars) | ||
90 | acc)) | ||
91 | ((eq? #\< (string-ref (car pars) 0)) | ||
92 | (loop (cdr pars) | ||
93 | (cons (car pars) | ||
94 | acc))) | ||
95 | (else | ||
96 | (loop (cdr pars) | ||
97 | (cons (string-append "<p>" (car pars) "</p>\n") | ||
98 | acc)))))) | ||
99 | |||
100 | (define (expand-string str) #| string -> string | ||
101 | Expand STR by passing it in a port to CHICKEN's #<# string interpolation. | ||
102 | Yes, this is as cursed as it sounds. | ||
103 | |||
104 | To make it slightly less so, all # are duplicated to escape them, except for | ||
105 | those before ( and {. To escape /those/, double them. |# | ||
106 | (let* ((escaped (irregex-replace/all | ||
107 | '(or (: #\# #\# (look-ahead (or #\{ #\())) | ||
108 | (: #\# (look-ahead (~ #\{ #\())) | ||
109 | (: #\# eos)) | ||
110 | str | ||
111 | "##")) | ||
112 | (delim (let loop ((attempt (number->string (pseudo-random-real)))) | ||
113 | (if (irregex-search attempt str) | ||
114 | (loop (number->string (pseudo-random-real))) | ||
115 | attempt))) | ||
116 | (template (make-concatenated-port | ||
117 | (open-input-string (string-append "#<#" delim "\n")) | ||
118 | (open-input-string escaped) | ||
119 | (open-input-string (string-append "\n" delim "\n")))) | ||
120 | (expanded (let ((x (open-output-string))) | ||
121 | (display (eval (read template) | ||
122 | (interaction-environment)) | ||
123 | x) | ||
124 | (get-output-string x)))) | ||
125 | (irregex-replace/all '(: "#<unspecified>" | ||
126 | (* whitespace)) | ||
127 | expanded | ||
128 | ""))) | ||
129 | |||
130 | (define (transform-content content) #| string -> string | ||
131 | Transform CONTENT according to boudin's needs. | ||
132 | This is the raw html, and will still need to be processed to extract metadata | ||
133 | and to be further wrapped in a template. |# | ||
134 | (transform content | ||
135 | expand-string | ||
136 | wrap-paragraphs)) | ||
137 | |||
138 | ;;; Pages | ||
139 | ;; A <page> is a record type that wraps the two transformations outlined above. | ||
140 | ;; It also includes the extracted metadata from the page for processing. | ||
141 | |||
142 | (define-record-type <page> | ||
143 | (make-page url meta source dest source-path dest-path) | ||
144 | page? | ||
145 | (url page-url (setter page-url)) | ||
146 | (meta page-meta (setter page-meta)) | ||
147 | (source page-source) | ||
148 | (dest page-dest (setter page-dest)) | ||
149 | (source-path page-source-path) | ||
150 | (dest-path page-dest-path (setter page-dest-path))) | ||
151 | |||
152 | (define (%read-port port) | ||
153 | (let ((chunk-size 512)) | ||
154 | (let loop ((next (read-string chunk-size port)) | ||
155 | (blank? #f) | ||
156 | (acc '())) | ||
157 | (cond | ||
158 | ((or (eof-object? next) | ||
159 | (and blank? (equal? next ""))) | ||
160 | (close-input-port port) | ||
161 | (apply string-append (reverse acc))) | ||
162 | ((equal? next "") | ||
163 | (loop (read-string chunk-size port) | ||
164 | #t | ||
165 | (cons next acc))) | ||
166 | (else | ||
167 | (loop (read-string chunk-size port) | ||
168 | blank? | ||
169 | (cons next acc))))))) | ||
170 | |||
171 | (define read-port | ||
172 | (case-lambda | ||
173 | (() (%read-port (current-input-port))) | ||
174 | ((p) (%read-port p)))) | ||
175 | |||
176 | (define (file->page file) #| string -> <page> | ||
177 | Convert FILE to an sxml tree after transforming it. | ||
178 | This procedure returns both the sxml of the transformed content, but that | ||
179 | page's metadata, too. |# | ||
180 | (let* ((source (with-input-from-file file read-port)) | ||
181 | (dest (html->sxml (transform-content source)))) | ||
182 | (make-page (pathname-directory (transform-path file (site-base-url))) | ||
183 | (extract-meta dest) | ||
184 | source | ||
185 | dest | ||
186 | file | ||
187 | (transform-path file (output-directory))))) | ||
188 | |||
189 | (define (extract-meta tree) #| sxml -> alist | ||
190 | Extract metadata from TREE's comments. | ||
191 | Returns an alist of (key . value) pairs where keys and values are strings. |# | ||
192 | (let loop ((tree tree) | ||
193 | (acc '())) | ||
194 | (cond | ||
195 | ((or (atom? tree) | ||
196 | (null? tree)) | ||
197 | (reverse acc)) | ||
198 | ((and (list? (car tree)) | ||
199 | (eq? (caar tree) '*COMMENT*)) | ||
200 | (loop (cdr tree) | ||
201 | (let* ((comment (string-trim-both (cadar tree))) | ||
202 | (lines (string-split comment "\n"))) | ||
203 | (map (lambda (l) | ||
204 | (let ((kv (string-split l ":"))) | ||
205 | (cons (string-trim-both (car kv)) | ||
206 | (string-trim | ||
207 | (string-intersperse (cdr kv) ":"))))) | ||
208 | lines)))) | ||
209 | ((list? (car tree)) | ||
210 | (loop (cdr tree) | ||
211 | (let ((subtree (loop (car tree) '()))) | ||
212 | (if (null? subtree) | ||
213 | acc | ||
214 | (cons subtree acc))))) | ||
215 | (else (loop (cdr tree) acc))))) | ||
216 | |||
217 | (define (meta-ref meta key default) #| alist string string -> <?> | ||
218 | Get KEY's value from META, or DEFAULT if it doesn't exist. | ||
219 | DEFAULT is required because I think it's a good idea to require it. |# | ||
220 | (let ((x (assoc key meta))) | ||
221 | (if x (cdr x) default))) | ||
222 | |||
223 | (define (page-meta-ref page key default) #| <page> string string -> <?> | ||
224 | Get KEY's value from PAGE's meta, or DEFAULT. | ||
225 | |# | ||
226 | (let ((meta (page-meta page))) | ||
227 | (meta-ref meta key default))) | ||
228 | |||
229 | ;;; Time | ||
230 | ;; Time really only matters in feeds ... but it really does matter. So I need a | ||
231 | ;; few helper functions. | ||
232 | |||
233 | (define publish-time ; this is a parameter so it's consistent across a run. | ||
234 | (make-parameter | ||
235 | (time->string (seconds->utc-time) "%FT%TZ"))) | ||
236 | |||
237 | (define (page-mtime page) #| <page> -> time-string | ||
238 | Grab the mtime field from PAGE's source file. |# | ||
239 | (let ((file (page-source-path page))) | ||
240 | (and file | ||
241 | (file-exists? file) | ||
242 | (time->string (seconds->utc-time | ||
243 | (file-modification-time | ||
244 | file)))))) | ||
245 | |||
246 | (define (page-guess-updated page) #| <page> -> time-string | ||
247 | Guess the "updated" property of PAGE. |# | ||
248 | (let ((meta-date (page-meta-ref page "date" #f))) | ||
249 | (if meta-date | ||
250 | ;; Attempt to parse the date metadata field. | ||
251 | (time->string (seconds->utc-time ; This double-conversion is /great/ | ||
252 | (local-time->seconds | ||
253 | (or (string->time meta-date "%Y-%m-%d") | ||
254 | (string->time meta-date "%Y-%m-%d%n%H:%M") | ||
255 | (string->time meta-date "%Y-%m-%d%n%I:%M%n%p") | ||
256 | ;; ... more ? | ||
257 | (or (page-mtime page) | ||
258 | (publish-time)))))) | ||
259 | (or (page-mtime page) | ||
260 | (publish-time))))) | ||
261 | |||
262 | ;;; Templating | ||
263 | ;; Templating uses sxml to define a layout for pages and indeces (index.html, | ||
264 | ;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html | ||
265 | ;; comments and to further process the document. | ||
266 | |||
267 | ;; Each template has a default, but the user can override by defining templates | ||
268 | ;; in .config.scm (see below). All templates are function parameters that take | ||
269 | ;; a page's sxml tree (argument PAGE) and return a string. | ||
270 | |||
271 | (define page-template | ||
272 | (make-parameter | ||
273 | (lambda (page) | ||
274 | (sxml->html | ||
275 | `(html (@ (lang "en")) | ||
276 | (head (title ,(page-meta-ref page "title" "[untitled]"))) | ||
277 | (body ,(let ((title (page-meta-ref page "title" #f))) | ||
278 | (if title `(h1 ,title) "")) | ||
279 | ,@(cdr (page-dest page)))))))) | ||
280 | |||
281 | (define index-template | ||
282 | (make-parameter | ||
283 | (lambda pages | ||
284 | (sxml->html | ||
285 | `(html (@ (lang "en")) | ||
286 | (head (title ,(site-name))) | ||
287 | (body (h1 ,(site-name)) | ||
288 | (ul | ||
289 | ,@(map (lambda (pg) | ||
290 | `(li (a (@ (href ,(page-url pg))) | ||
291 | ,(page-meta-ref pg | ||
292 | "title" | ||
293 | (pathname-file | ||
294 | (page-source-path pg)))))) | ||
295 | pages)))))))) | ||
296 | |||
297 | (define feed-template | ||
298 | (make-parameter | ||
299 | (lambda pages | ||
300 | (with-output-to-string | ||
301 | (lambda () | ||
302 | (write-atom-doc | ||
303 | (make-atom-doc | ||
304 | (make-feed | ||
305 | title: (make-title (site-name)) | ||
306 | id: (site-base-url) | ||
307 | updated: (publish-time) ; I don't like these semantics .. | ||
308 | authors: (list (make-author name: (site-author) | ||
309 | uri: (site-base-url))) | ||
310 | links: (list (make-link type: 'html | ||
311 | uri-language: "en" | ||
312 | uri: (site-base-url)) | ||
313 | (make-link relation: "self" | ||
314 | type: "application/atom+xml" | ||
315 | uri: (make-pathname | ||
316 | (site-base-url) "feed" "xml"))) | ||
317 | rights: (make-rights (site-rights)) | ||
318 | generator: (make-generator "Boudin" | ||
319 | uri: "https://git.acdw.net/boudin" | ||
320 | version: "0.1.0") | ||
321 | entries: (map (lambda (pg) | ||
322 | (make-entry | ||
323 | title: (make-title | ||
324 | (page-meta-ref pg "title" "[untitled]")) | ||
325 | links: (list (make-link type: 'html | ||
326 | uri: (page-url pg))) | ||
327 | id: (page-url pg) | ||
328 | updated: (page-guess-updated pg) | ||
329 | ;;published: | ||
330 | content: | ||
331 | `(atom:content (@ (type "html")) | ||
332 | ,(cdr (page-dest pg))))) | ||
333 | pages))))))))) | ||
334 | |||
335 | ;;; Collecting pages from a directory | ||
336 | |||
337 | (define (collect-pages dir ext) | ||
338 | (map file->page | ||
339 | (glob (make-pathname dir "*" ext)))) | ||
340 | |||
341 | ;;; Publishing | ||
342 | |||
343 | (define (apply-template template pages) | ||
344 | (apply template (if (list? pages) | ||
345 | pages | ||
346 | (list pages)))) | ||
347 | |||
348 | ;;; Configuration | ||
349 | |||
350 | (define output-directory | ||
351 | (make-parameter "out/")) | ||
352 | |||
353 | (define site-name | ||
354 | (make-parameter "[A boudin web site]")) | ||
355 | |||
356 | (define site-base-url | ||
357 | (make-parameter "https://example.com/")) | ||
358 | |||
359 | (define site-author | ||
360 | (make-parameter "nobody")) | ||
361 | |||
362 | (define site-rights | ||
363 | (make-parameter (string-append "(C) " (site-author)))) | ||
364 | |||
365 | ;;; Options & Operands (SRFI 37) | ||
366 | |||
367 | (define opt/help | ||
368 | (option '(#\h "help") ; Names | ||
369 | #f ; Required arg? | ||
370 | #f ; Optional arg? | ||
371 | (lambda _ ; Option proc (opt name arg seeds ...) | ||
372 | (with-output-to-port (current-error-port) | ||
373 | (lambda () | ||
374 | (print "Usage: boudin [OPTIONS]\n" | ||
375 | "Options:\n" | ||
376 | "-h, --help show this help and exit\n" | ||
377 | "-C dir, --directory dir\n" | ||
378 | " build site in DIR instead of current directory" | ||
379 | ))) | ||
380 | (exit)))) | ||
381 | |||
382 | (define opt/directory | ||
383 | (option '(#\C "directory") #t #f | ||
384 | (lambda (opt name arg seeds) | ||
385 | (if (directory-exists? arg) | ||
386 | (change-directory arg) | ||
387 | (error "Directory doesn't exist" arg)) | ||
388 | seeds))) | ||
389 | |||
390 | (define (process-args args) | ||
391 | (args-fold args | ||
392 | ;; Options | ||
393 | (list opt/help | ||
394 | opt/directory) | ||
395 | ;; Unrecognized option proc (option name arg seeds ...) | ||
396 | (lambda (_ name _ _) | ||
397 | (error "Unrecognized option" name)) | ||
398 | ;; Operand proc (operand seeds ...) | ||
399 | (lambda (name _) | ||
400 | (error "Bad operand" name)) | ||
401 | ;; Seeds | ||
402 | '())) | ||
403 | |||
404 | ;;; Main entry point | ||
405 | |||
406 | (define (main args) | ||
407 | (process-args args) | ||
408 | |||
409 | #f) | ||
410 | |||
411 | (cond-expand | ||
412 | ((or chicken-script compiling) | ||
413 | (main (command-line-arguments))) | ||
414 | (else)) | ||
diff --git a/test.html b/test.html new file mode 100644 index 0000000..6348f0a --- /dev/null +++ b/test.html | |||
@@ -0,0 +1,19 @@ | |||
1 | <!-- | ||
2 | title: foo bar | ||
3 | subtitle: uhhh: the thing | ||
4 | --> | ||
5 | |||
6 | <h1>some html</h1> | ||
7 | <h2>(without p tags)</h2> | ||
8 | |||
9 | Here is a test paragraph. <a href="#">example link</a>. | ||
10 | |||
11 | Here's another. I wonder if it'll just do the thing .. or whatever. Maybe I | ||
12 | should try to make it multiple lines, as well. | ||
13 | |||
14 | <ul> | ||
15 | <li> | ||
16 | one plus two is #(+ 1 2). | ||
17 | </li> | ||
18 | <li>two</li> | ||
19 | </ul> | ||