diff options
author | Case Duckworth | 2023-08-15 23:33:17 -0500 |
---|---|---|
committer | Case Duckworth | 2023-08-15 23:33:17 -0500 |
commit | d4830cdd422258a7c91a5ed07af50f8c208a29ee (patch) | |
tree | bacdf4124ef9b9467ea64c6d098a5cd78426912a /boudin.scm | |
parent | Etc (diff) | |
download | boudin-d4830cdd422258a7c91a5ed07af50f8c208a29ee.tar.gz boudin-d4830cdd422258a7c91a5ed07af50f8c208a29ee.zip |
A new start (again)
Diffstat (limited to 'boudin.scm')
-rw-r--r--[-rwxr-xr-x] | boudin.scm | 521 |
1 files changed, 8 insertions, 513 deletions
diff --git a/boudin.scm b/boudin.scm index 28727ff..7bd741d 100755..100644 --- a/boudin.scm +++ b/boudin.scm | |||
@@ -1,519 +1,14 @@ | |||
1 | #!/bin/sh | 1 | ;;; (boudin) --- A small tasty ssg |
2 | #| -*- scheme -*- | ||
3 | exec csi -R r7rs -s "$0" "$@" | ||
4 | boudin --- a little static site generator | ||
5 | |# | ||
6 | 2 | ||
7 | (import (chicken file) | 3 | (import (boudin)) |
8 | (chicken file posix) | ||
9 | (chicken irregex) | ||
10 | (chicken pathname) | ||
11 | (chicken port) | ||
12 | (chicken process-context) | ||
13 | (chicken random) | ||
14 | (chicken string) | ||
15 | (chicken time posix) | ||
16 | (chicanery) | ||
17 | (atom) | ||
18 | (html-parser) | ||
19 | (scss) | ||
20 | (srfi 37) | ||
21 | (srfi 152) | ||
22 | (sxml-serializer) | ||
23 | (sxpath)) | ||
24 | 4 | ||
25 | ;;; Utilities | 5 | (define foo (render-string "#,hello from Boudin!" |
6 | (interaction-environment))) | ||
26 | 7 | ||
27 | (define el sxml->html) | 8 | (define (main . args) |
28 | 9 | (print foo) | |
29 | ;;; Transformations | 10 | (for-each print args)) |
30 | ;; A static site generator can be thought of largely as two sets of | ||
31 | ;; transformations: one transforming given input content to output content, and | ||
32 | ;; another transforming source paths to destination paths. Since both, for my | ||
33 | ;; purposes, are strings, I have a generic function that can perform both | ||
34 | ;; transformations. | ||
35 | |||
36 | (define (transform str . procs) #| string (string ->string) ... -> string | ||
37 | Apply PROCS to STR, left-to-right, and return the result. | ||
38 | Each PROC will be called with its predecessor's output, and should take a | ||
39 | string as input and return a string. |# | ||
40 | (if (null? procs) | ||
41 | str | ||
42 | (apply transform ((car procs) str) (cdr procs)))) | ||
43 | |||
44 | ;;; Path transformations | ||
45 | |||
46 | (define (indexify path) #| path -> path | ||
47 | Replace the PATH's extension with "/index.html". | ||
48 | |# | ||
49 | (make-pathname (pathname-strip-extension path) | ||
50 | "index" | ||
51 | "html")) | ||
52 | |||
53 | (define (slugify path) | ||
54 | (transform path | ||
55 | normalize-pathname | ||
56 | ;; XXX: this should be much more robust | ||
57 | (lambda (p) (pathname-replace-directory p "/")) | ||
58 | pathname-strip-extension | ||
59 | (lambda (p) (string-append p "/")))) | ||
60 | |||
61 | (define (transform-path path outdir) #| path => path | ||
62 | Transform PATH according to boudin's needs. |# | ||
63 | (transform path | ||
64 | normalize-pathname | ||
65 | (lambda (p) (pathname-replace-directory p outdir)) | ||
66 | indexify)) | ||
67 | |||
68 | ;;; Content transformations | ||
69 | |||
70 | (define (split-paragraphs str) #| string -> (list string ...) | ||
71 | Split STR into paragraphs. | ||
72 | A paragraph is a contiguous series of text lines separated from other | ||
73 | paragraphs by at least 2 newline \n characters. This procedure collapses | ||
74 | inter-paragraph space. |# | ||
75 | (let loop ((lines (string-split str "\n")) | ||
76 | (par '()) | ||
77 | (acc '())) | ||
78 | (cond | ||
79 | ((and (null? lines) ; base case: no more lines | ||
80 | (null? par)) ; ... or pending paragraph | ||
81 | (reverse acc)) | ||
82 | ((null? lines) ; add the final paragraph | ||
83 | (loop '() '() (cons (apply string-append (reverse par)) acc))) | ||
84 | ((equal? (car lines) "") ; paragraph break | ||
85 | (loop (cdr lines) | ||
86 | '() | ||
87 | (cons (apply string-append (reverse par)) acc))) | ||
88 | (else ; line break | ||
89 | (loop (cdr lines) | ||
90 | (cons (string-append (car lines) "\n") par) | ||
91 | acc))))) | ||
92 | |||
93 | (define (wrap-paragraphs str) #| string -> string | ||
94 | Wrap naked paragraphs of STR in <p> tags. | ||
95 | A 'naked' paragraph is one that doesn't begin with '<' (after optional | ||
96 | beginning whitespace). |# | ||
97 | (let loop ((pars (map string-trim (split-paragraphs str))) | ||
98 | (acc '())) | ||
99 | (cond | ||
100 | ((null? pars) | ||
101 | (apply string-append (reverse acc))) | ||
102 | ((zero? (string-length (car pars))) | ||
103 | (loop (cdr pars) | ||
104 | acc)) | ||
105 | ((eq? #\< (string-ref (car pars) 0)) | ||
106 | (loop (cdr pars) | ||
107 | (cons (car pars) | ||
108 | acc))) | ||
109 | (else | ||
110 | (loop (cdr pars) | ||
111 | (cons (string-append "<p>" (car pars) "</p>\n") | ||
112 | acc)))))) | ||
113 | |||
114 | (define (preprocess-text str) #| string -> string | ||
115 | Preprocess STR before passing it to `expand-string'. | ||
116 | For example, replace all '#' with '##' unless followed by a '{' or '('. | ||
117 | |# | ||
118 | (set! str (irregex-replace/all | ||
119 | '(or (: #\# #\# (look-ahead (or #\{ #\())) | ||
120 | (: #\# (look-ahead (~ #\{ #\())) | ||
121 | (: #\# eos)) | ||
122 | str | ||
123 | "##")) | ||
124 | #; (set! str (irregex-replace/all ; XXX: missing end paren | ||
125 | '(: #\@ #\() | ||
126 | str | ||
127 | "#(sxml->html `(")) | ||
128 | ;; Return transformed string | ||
129 | str) | ||
130 | |||
131 | (define (expand-string str) #| string -> string | ||
132 | Expand STR by passing it in a port to CHICKEN's #<# string interpolation. | ||
133 | Yes, this is as cursed as it sounds. | ||
134 | |||
135 | To make it slightly less so, all # are duplicated to escape them, except for | ||
136 | those before ( and {. To escape /those/, double them. |# | ||
137 | (let* ((delim (let loop ((attempt (number->string (pseudo-random-real)))) | ||
138 | (if (irregex-search attempt str) | ||
139 | (loop (number->string (pseudo-random-real))) | ||
140 | attempt))) | ||
141 | (template (make-concatenated-port | ||
142 | (open-input-string (string-append "#<#" delim "\n")) | ||
143 | (open-input-string (preprocess-text str)) | ||
144 | (open-input-string (string-append "\n" delim "\n")))) | ||
145 | (expanded (with-output-to-string | ||
146 | (lambda () | ||
147 | (display (eval (read template) | ||
148 | (interaction-environment))))))) | ||
149 | (irregex-replace/all '(: "#<unspecified>" | ||
150 | (* whitespace)) | ||
151 | expanded | ||
152 | ""))) | ||
153 | |||
154 | (define (transform-content content) #| string -> string | ||
155 | Transform CONTENT according to boudin's needs. | ||
156 | This is the raw html, and will still need to be processed to extract metadata | ||
157 | and to be further wrapped in a template. |# | ||
158 | (transform content | ||
159 | expand-string | ||
160 | wrap-paragraphs)) | ||
161 | |||
162 | ;;; Pages | ||
163 | ;; A <page> is a record type that wraps the two transformations outlined above. | ||
164 | ;; It also includes the extracted metadata from the page for processing. | ||
165 | |||
166 | (define-record-type <page> | ||
167 | (make-page slug meta source dest source-path dest-path) | ||
168 | page? | ||
169 | (slug page-slug (setter page-slug)) | ||
170 | (meta page-meta (setter page-meta)) | ||
171 | (source page-source) | ||
172 | (dest page-dest (setter page-dest)) | ||
173 | (source-path page-source-path) | ||
174 | (dest-path page-dest-path (setter page-dest-path))) | ||
175 | |||
176 | (define (page-url page) | ||
177 | (normalize-pathname | ||
178 | (make-pathname (list (site-base-url) | ||
179 | (page-slug page)) | ||
180 | #f))) | ||
181 | |||
182 | (define (%read-port port) #| port -> string | ||
183 | Read PORT until it hits eof and return the results as a string. | ||
184 | |# | ||
185 | (let ((chunk-size 512)) | ||
186 | (let loop ((next (read-string chunk-size port)) | ||
187 | (blank? #f) | ||
188 | (acc '())) | ||
189 | (cond | ||
190 | ((or (eof-object? next) | ||
191 | (and blank? (equal? next ""))) | ||
192 | (close-input-port port) | ||
193 | (apply string-append (reverse acc))) | ||
194 | ((equal? next "") | ||
195 | (loop (read-string chunk-size port) | ||
196 | #t | ||
197 | (cons next acc))) | ||
198 | (else | ||
199 | (loop (read-string chunk-size port) | ||
200 | blank? | ||
201 | (cons next acc))))))) | ||
202 | |||
203 | (define read-port #| (optional port) -> string | ||
204 | Read PORT completely, returning the results as a string. | ||
205 | PORT defaults to `current-input-port'. | ||
206 | |# | ||
207 | (case-lambda | ||
208 | (() (%read-port (current-input-port))) | ||
209 | ((p) (%read-port p)))) | ||
210 | |||
211 | (define (file->page file) #| string -> <page> | ||
212 | Convert FILE to an sxml tree after transforming it. | ||
213 | This procedure returns both the sxml of the transformed content, but that | ||
214 | page's metadata, too. |# | ||
215 | (let* ((source (with-input-from-file file read-port)) | ||
216 | (dest (html->sxml (transform-content source)))) | ||
217 | (make-page (slugify file) | ||
218 | (extract-meta dest) | ||
219 | source | ||
220 | dest | ||
221 | file | ||
222 | (transform-path file (build-directory))))) | ||
223 | |||
224 | (define (extract-meta tree) #| sxml -> alist | ||
225 | Extract metadata from TREE's comments. | ||
226 | Returns an alist of (key . value) pairs where keys and values are strings. |# | ||
227 | (let loop ((tree tree) | ||
228 | (acc '())) | ||
229 | (cond | ||
230 | ((or (atom? tree) | ||
231 | (null? tree)) | ||
232 | (reverse acc)) | ||
233 | ((and (list? (car tree)) | ||
234 | (eq? (caar tree) '*COMMENT*)) | ||
235 | (loop (cdr tree) | ||
236 | (let* ((comment (string-trim-both (cadar tree))) | ||
237 | (lines (string-split comment "\n"))) | ||
238 | (map (lambda (l) | ||
239 | (let ((kv (string-split l ":"))) | ||
240 | (cons (string-trim-both (car kv)) | ||
241 | (string-trim | ||
242 | (string-intersperse (cdr kv) ":"))))) | ||
243 | lines)))) | ||
244 | ((list? (car tree)) | ||
245 | (loop (cdr tree) | ||
246 | (let ((subtree (loop (car tree) '()))) | ||
247 | (if (null? subtree) | ||
248 | acc | ||
249 | (cons subtree acc))))) | ||
250 | (else (loop (cdr tree) acc))))) | ||
251 | |||
252 | (define (meta-ref meta key default) #| alist string string -> <?> | ||
253 | Get KEY's value from META, or DEFAULT if it doesn't exist. | ||
254 | DEFAULT is required because I think it's a good idea to require it. |# | ||
255 | (let ((x (assoc key meta))) | ||
256 | (if x (cdr x) default))) | ||
257 | |||
258 | (define (page-meta-ref page key default) #| <page> string string -> <?> | ||
259 | Get KEY's value from PAGE's meta, or DEFAULT. | ||
260 | |# | ||
261 | (let ((meta (page-meta page))) | ||
262 | (meta-ref meta key default))) | ||
263 | |||
264 | ;;; Time | ||
265 | ;; Time really only matters in feeds ... but it really does matter. So I need a | ||
266 | ;; few helper functions. | ||
267 | |||
268 | (define publish-time ; this is a parameter so it's consistent across a run. | ||
269 | (make-parameter | ||
270 | (time->string (seconds->utc-time) "%FT%TZ"))) | ||
271 | |||
272 | (define (page-mtime page) #| <page> -> time-string | ||
273 | Grab the mtime field from PAGE's source file. |# | ||
274 | (let ((file (page-source-path page))) | ||
275 | (and file | ||
276 | (file-exists? file) | ||
277 | (time->string (seconds->utc-time | ||
278 | (file-modification-time | ||
279 | file)))))) | ||
280 | |||
281 | (define (page-guess-updated page) #| <page> -> time-string | ||
282 | Guess the "updated" property of PAGE. |# | ||
283 | (let ((meta-date (page-meta-ref page "date" #f))) | ||
284 | (if meta-date | ||
285 | ;; Attempt to parse the date metadata field. | ||
286 | (time->string (seconds->utc-time ; This double-conversion is /great/ | ||
287 | (local-time->seconds | ||
288 | (or (string->time meta-date "%Y-%m-%d") | ||
289 | (string->time meta-date "%Y-%m-%d%n%H:%M") | ||
290 | (string->time meta-date "%Y-%m-%d%n%I:%M%n%p") | ||
291 | ;; ... more ? | ||
292 | (or (page-mtime page) | ||
293 | (publish-time)))))) | ||
294 | (or (page-mtime page) | ||
295 | (publish-time))))) | ||
296 | |||
297 | ;;; Templating | ||
298 | ;; Templating uses sxml to define a layout for pages and indeces (index.html, | ||
299 | ;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html | ||
300 | ;; comments and to further process the document. | ||
301 | |||
302 | ;; Each template has a default, but the user can override by defining templates | ||
303 | ;; in .config.scm (see below). All templates are function parameters that take | ||
304 | ;; a page's sxml tree (argument PAGE) and return a string. | ||
305 | |||
306 | (define page-template | ||
307 | (make-parameter | ||
308 | (lambda (page) | ||
309 | `(html (@ (lang "en")) | ||
310 | (head (title ,(page-meta-ref page "title" "[untitled]")) | ||
311 | (link (@ (href "../style.css") ; relative | ||
312 | (rel "stylesheet"))) | ||
313 | (meta (@ (name "viewport") | ||
314 | (content "initial-scale=1.0")))) | ||
315 | (body ,(let ((title (page-meta-ref page "title" #f))) | ||
316 | (if title `(h1 ,title) "")) | ||
317 | ,@(cdr (page-dest page))))))) | ||
318 | |||
319 | (define index-template | ||
320 | (make-parameter | ||
321 | (lambda pages | ||
322 | `(html (@ (lang "en")) | ||
323 | (head (title ,(site-name)) | ||
324 | (link (@ (href "./style.css") ; relative | ||
325 | (rel "stylesheet"))) | ||
326 | (meta (@ (name "viewport") | ||
327 | (content "initial-scale=1.0")))) | ||
328 | (body (h1 ,(site-name)) | ||
329 | (ul | ||
330 | ,@(map (lambda (pg) | ||
331 | `(li (a (@ (href ,(page-slug pg))) | ||
332 | ,(page-meta-ref pg | ||
333 | "title" | ||
334 | (pathname-file | ||
335 | (page-source-path pg)))))) | ||
336 | ;; TODO : sort pages | ||
337 | pages))))))) | ||
338 | |||
339 | (define feed-template | ||
340 | (make-parameter | ||
341 | (lambda pages | ||
342 | (make-atom-doc | ||
343 | (make-feed | ||
344 | title: (make-title (site-name)) | ||
345 | id: (site-base-url) | ||
346 | updated: (publish-time) ; I don't like these semantics .. | ||
347 | authors: (list (make-author name: (site-author) | ||
348 | uri: (site-base-url))) | ||
349 | links: (list (make-link type: 'html | ||
350 | uri-language: "en" | ||
351 | uri: (site-base-url)) | ||
352 | (make-link relation: "self" | ||
353 | type: "application/atom+xml" | ||
354 | uri: (make-pathname | ||
355 | (site-base-url) "feed" "xml"))) | ||
356 | rights: (make-rights (site-rights)) | ||
357 | generator: (make-generator "Boudin" | ||
358 | uri: "https://git.acdw.net/boudin" | ||
359 | version: "0.1.0") | ||
360 | entries: (map (lambda (pg) | ||
361 | (make-entry | ||
362 | title: (make-title | ||
363 | (page-meta-ref pg "title" "[untitled]")) | ||
364 | links: (list (make-link type: 'html | ||
365 | uri: (page-url pg))) | ||
366 | id: (page-url pg) | ||
367 | updated: (page-guess-updated pg) | ||
368 | ;;published: | ||
369 | content: (make-content | ||
370 | (sxml->html (page-dest pg)) | ||
371 | type: html) | ||
372 | #;`(atom:content "foo"))) | ||
373 | pages)))))) | ||
374 | |||
375 | ;;; Publishing | ||
376 | |||
377 | (define (write-style) | ||
378 | (print-log "writing style") | ||
379 | (with-output-to-file (make-pathname (build-directory) "style" "css") | ||
380 | (lambda () (write-css (site-style))))) | ||
381 | |||
382 | (define (write-page page) | ||
383 | (print-log "writing " (or (page-meta-ref page "title" #f) | ||
384 | (page-source-path page))) | ||
385 | (create-directory (pathname-directory (page-dest-path page)) 'parents) | ||
386 | (with-output-to-file (page-dest-path page) | ||
387 | (lambda () (sxml-display-as-html ((page-template) page))))) | ||
388 | |||
389 | (define (write-index pages) | ||
390 | (print-log "writing index") | ||
391 | (with-output-to-file (make-pathname (build-directory) "index" "html") | ||
392 | (lambda () (sxml-display-as-html (apply (index-template) pages))))) | ||
393 | |||
394 | (define (write-feed pages) | ||
395 | (print-log "writing feed") | ||
396 | (serialize-sxml (apply (feed-template) pages) | ||
397 | output: (make-pathname (build-directory) "feed" "xml") | ||
398 | cdata-section-elements: '(atom:content) | ||
399 | ns-prefixes: `((*default* . "http://www.w3.org/2005/Atom") | ||
400 | (*default* . "http://www.w3.org/1999/xhtml") | ||
401 | . ,(atom-ns-prefixes)) | ||
402 | allow-prefix-redeclarations: #t | ||
403 | )) | ||
404 | |||
405 | ;;; Configuration | ||
406 | |||
407 | ;; Build configuration | ||
408 | |||
409 | (define build-directory | ||
410 | (make-parameter "out/")) | ||
411 | |||
412 | (define build-config | ||
413 | (make-parameter "config.scm")) | ||
414 | |||
415 | ;; Site configuration | ||
416 | |||
417 | (define site-name | ||
418 | (make-parameter "[A boudin web site]")) | ||
419 | |||
420 | (define site-base-url | ||
421 | (make-parameter "https://example.com/")) | ||
422 | |||
423 | (define site-author | ||
424 | (make-parameter "nobody")) | ||
425 | |||
426 | (define site-rights | ||
427 | (make-parameter (string-append "(C) " (site-author)))) | ||
428 | |||
429 | (define site-style | ||
430 | (make-parameter | ||
431 | `(css+ (body (font "20px/1.4 sans-serif") | ||
432 | (max-width "48em") | ||
433 | (padding "1em") | ||
434 | (margin auto))))) | ||
435 | |||
436 | ;;; Options & Operands (SRFI 37) | ||
437 | |||
438 | (define (print-log . xs) | ||
439 | (with-output-to-port (current-error-port) | ||
440 | (lambda () (apply print xs)))) | ||
441 | |||
442 | (define (die error-code . xs) | ||
443 | (apply print-log xs) | ||
444 | (exit error-code)) | ||
445 | |||
446 | (define usage | ||
447 | (string-append | ||
448 | "Usage: boudin [OPTIONS] FILE ...\n" | ||
449 | "Options:\n" | ||
450 | "-h, --help show this help and exit\n" | ||
451 | "-o DIR, --output DIR build site to DIR (default: out/)\n" | ||
452 | "Operands:\n" | ||
453 | "FILE ... files to build\n")) | ||
454 | |||
455 | (define opt/help | ||
456 | (option '(#\h "help") ; Names | ||
457 | #f ; Required arg? | ||
458 | #f ; Optional arg? | ||
459 | (lambda _ ; Option proc (opt name arg seeds ...) | ||
460 | (die 0 usage)))) | ||
461 | |||
462 | (define opt/build-directory | ||
463 | (option '(#\o "output") #t #f | ||
464 | (lambda (opt name arg seeds) | ||
465 | (build-directory arg) | ||
466 | seeds))) | ||
467 | |||
468 | (define opt/build-config | ||
469 | (option '(#\c "config") #t #f | ||
470 | (lambda (opt name arg seeds) | ||
471 | (build-config arg) | ||
472 | seeds))) | ||
473 | |||
474 | (define (process-args args) | ||
475 | (let ((pages '())) | ||
476 | (args-fold args | ||
477 | ;; Options | ||
478 | (list opt/help | ||
479 | opt/build-directory | ||
480 | opt/build-config) | ||
481 | ;; Unrecognized option proc (option name arg seeds ...) | ||
482 | (lambda (_ name _ _) | ||
483 | (die 1 "Unrecognized option: -" name "\n" usage)) | ||
484 | ;; Operand proc (operand seeds ...) | ||
485 | (lambda (name seeds) | ||
486 | (if (file-exists? name) | ||
487 | (set! pages (cons name pages)) | ||
488 | (die 2 "File not found: " name)) | ||
489 | seeds) | ||
490 | ;; Seeds | ||
491 | '()) | ||
492 | pages)) | ||
493 | |||
494 | ;;; Main entry point | ||
495 | |||
496 | (define pages (make-parameter #f)) | ||
497 | |||
498 | (define (main args) | ||
499 | (parameterize ((pages (process-args args))) | ||
500 | (unless (pages) | ||
501 | (die 0 "No pages to process. Abort.")) | ||
502 | ;; Check for files, create directories | ||
503 | (if (file-exists? (build-config)) | ||
504 | (load (build-config)) | ||
505 | (print-log "No config.scm found; using default config")) | ||
506 | (create-directory (build-directory) 'parents) | ||
507 | ;; Convert pages to sxml. This needs to be done here because config.scm | ||
508 | ;; might define functions used by the pages. | ||
509 | (pages (map file->page (pages))) | ||
510 | ;; Build the site | ||
511 | (write-style) ; TODO: copy static assets (?) | ||
512 | (for-each write-page (pages)) | ||
513 | (write-index (pages)) | ||
514 | (write-feed (pages)))) | ||
515 | 11 | ||
516 | (cond-expand | 12 | (cond-expand |
517 | ((or chicken-script compiling) | 13 | (compiling (apply main (command-line))) |
518 | (main (command-line-arguments))) | ||
519 | (else)) | 14 | (else)) |