summary refs log tree commit diff stats
path: root/boudin.scm
diff options
context:
space:
mode:
authorCase Duckworth2023-08-15 23:33:17 -0500
committerCase Duckworth2023-08-15 23:33:17 -0500
commitd4830cdd422258a7c91a5ed07af50f8c208a29ee (patch)
treebacdf4124ef9b9467ea64c6d098a5cd78426912a /boudin.scm
parentEtc (diff)
downloadboudin-d4830cdd422258a7c91a5ed07af50f8c208a29ee.tar.gz
boudin-d4830cdd422258a7c91a5ed07af50f8c208a29ee.zip
A new start (again)
Diffstat (limited to 'boudin.scm')
-rw-r--r--[-rwxr-xr-x]boudin.scm521
1 files changed, 8 insertions, 513 deletions
diff --git a/boudin.scm b/boudin.scm index 28727ff..7bd741d 100755..100644 --- a/boudin.scm +++ b/boudin.scm
@@ -1,519 +1,14 @@
1#!/bin/sh 1;;; (boudin) --- A small tasty ssg
2#| -*- scheme -*-
3exec csi -R r7rs -s "$0" "$@"
4boudin --- a little static site generator
5|#
6 2
7(import (chicken file) 3(import (boudin))
8 (chicken file posix)
9 (chicken irregex)
10 (chicken pathname)
11 (chicken port)
12 (chicken process-context)
13 (chicken random)
14 (chicken string)
15 (chicken time posix)
16 (chicanery)
17 (atom)
18 (html-parser)
19 (scss)
20 (srfi 37)
21 (srfi 152)
22 (sxml-serializer)
23 (sxpath))
24 4
25;;; Utilities 5(define foo (render-string "#,hello from Boudin!"
6 (interaction-environment)))
26 7
27(define el sxml->html) 8(define (main . args)
28 9 (print foo)
29;;; Transformations 10 (for-each print args))
30;; A static site generator can be thought of largely as two sets of
31;; transformations: one transforming given input content to output content, and
32;; another transforming source paths to destination paths. Since both, for my
33;; purposes, are strings, I have a generic function that can perform both
34;; transformations.
35
36(define (transform str . procs) #| string (string ->string) ... -> string
37 Apply PROCS to STR, left-to-right, and return the result.
38 Each PROC will be called with its predecessor's output, and should take a
39 string as input and return a string. |#
40 (if (null? procs)
41 str
42 (apply transform ((car procs) str) (cdr procs))))
43
44;;; Path transformations
45
46(define (indexify path) #| path -> path
47 Replace the PATH's extension with "/index.html".
48 |#
49 (make-pathname (pathname-strip-extension path)
50 "index"
51 "html"))
52
53(define (slugify path)
54 (transform path
55 normalize-pathname
56 ;; XXX: this should be much more robust
57 (lambda (p) (pathname-replace-directory p "/"))
58 pathname-strip-extension
59 (lambda (p) (string-append p "/"))))
60
61(define (transform-path path outdir) #| path => path
62 Transform PATH according to boudin's needs. |#
63 (transform path
64 normalize-pathname
65 (lambda (p) (pathname-replace-directory p outdir))
66 indexify))
67
68;;; Content transformations
69
70(define (split-paragraphs str) #| string -> (list string ...)
71 Split STR into paragraphs.
72 A paragraph is a contiguous series of text lines separated from other
73 paragraphs by at least 2 newline \n characters. This procedure collapses
74 inter-paragraph space. |#
75 (let loop ((lines (string-split str "\n"))
76 (par '())
77 (acc '()))
78 (cond
79 ((and (null? lines) ; base case: no more lines
80 (null? par)) ; ... or pending paragraph
81 (reverse acc))
82 ((null? lines) ; add the final paragraph
83 (loop '() '() (cons (apply string-append (reverse par)) acc)))
84 ((equal? (car lines) "") ; paragraph break
85 (loop (cdr lines)
86 '()
87 (cons (apply string-append (reverse par)) acc)))
88 (else ; line break
89 (loop (cdr lines)
90 (cons (string-append (car lines) "\n") par)
91 acc)))))
92
93(define (wrap-paragraphs str) #| string -> string
94 Wrap naked paragraphs of STR in <p> tags.
95 A 'naked' paragraph is one that doesn't begin with '<' (after optional
96 beginning whitespace). |#
97 (let loop ((pars (map string-trim (split-paragraphs str)))
98 (acc '()))
99 (cond
100 ((null? pars)
101 (apply string-append (reverse acc)))
102 ((zero? (string-length (car pars)))
103 (loop (cdr pars)
104 acc))
105 ((eq? #\< (string-ref (car pars) 0))
106 (loop (cdr pars)
107 (cons (car pars)
108 acc)))
109 (else
110 (loop (cdr pars)
111 (cons (string-append "<p>" (car pars) "</p>\n")
112 acc))))))
113
114(define (preprocess-text str) #| string -> string
115 Preprocess STR before passing it to `expand-string'.
116 For example, replace all '#' with '##' unless followed by a '{' or '('.
117 |#
118 (set! str (irregex-replace/all
119 '(or (: #\# #\# (look-ahead (or #\{ #\()))
120 (: #\# (look-ahead (~ #\{ #\()))
121 (: #\# eos))
122 str
123 "##"))
124 #; (set! str (irregex-replace/all ; XXX: missing end paren
125 '(: #\@ #\()
126 str
127 "#(sxml->html `("))
128 ;; Return transformed string
129 str)
130
131(define (expand-string str) #| string -> string
132 Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
133 Yes, this is as cursed as it sounds.
134
135 To make it slightly less so, all # are duplicated to escape them, except for
136 those before ( and {. To escape /those/, double them. |#
137 (let* ((delim (let loop ((attempt (number->string (pseudo-random-real))))
138 (if (irregex-search attempt str)
139 (loop (number->string (pseudo-random-real)))
140 attempt)))
141 (template (make-concatenated-port
142 (open-input-string (string-append "#<#" delim "\n"))
143 (open-input-string (preprocess-text str))
144 (open-input-string (string-append "\n" delim "\n"))))
145 (expanded (with-output-to-string
146 (lambda ()
147 (display (eval (read template)
148 (interaction-environment)))))))
149 (irregex-replace/all '(: "#<unspecified>"
150 (* whitespace))
151 expanded
152 "")))
153
154(define (transform-content content) #| string -> string
155 Transform CONTENT according to boudin's needs.
156 This is the raw html, and will still need to be processed to extract metadata
157 and to be further wrapped in a template. |#
158 (transform content
159 expand-string
160 wrap-paragraphs))
161
162;;; Pages
163;; A <page> is a record type that wraps the two transformations outlined above.
164;; It also includes the extracted metadata from the page for processing.
165
166(define-record-type <page>
167 (make-page slug meta source dest source-path dest-path)
168 page?
169 (slug page-slug (setter page-slug))
170 (meta page-meta (setter page-meta))
171 (source page-source)
172 (dest page-dest (setter page-dest))
173 (source-path page-source-path)
174 (dest-path page-dest-path (setter page-dest-path)))
175
176(define (page-url page)
177 (normalize-pathname
178 (make-pathname (list (site-base-url)
179 (page-slug page))
180 #f)))
181
182(define (%read-port port) #| port -> string
183 Read PORT until it hits eof and return the results as a string.
184 |#
185 (let ((chunk-size 512))
186 (let loop ((next (read-string chunk-size port))
187 (blank? #f)
188 (acc '()))
189 (cond
190 ((or (eof-object? next)
191 (and blank? (equal? next "")))
192 (close-input-port port)
193 (apply string-append (reverse acc)))
194 ((equal? next "")
195 (loop (read-string chunk-size port)
196 #t
197 (cons next acc)))
198 (else
199 (loop (read-string chunk-size port)
200 blank?
201 (cons next acc)))))))
202
203(define read-port #| (optional port) -> string
204 Read PORT completely, returning the results as a string.
205 PORT defaults to `current-input-port'.
206 |#
207 (case-lambda
208 (() (%read-port (current-input-port)))
209 ((p) (%read-port p))))
210
211(define (file->page file) #| string -> <page>
212 Convert FILE to an sxml tree after transforming it.
213 This procedure returns both the sxml of the transformed content, but that
214 page's metadata, too. |#
215 (let* ((source (with-input-from-file file read-port))
216 (dest (html->sxml (transform-content source))))
217 (make-page (slugify file)
218 (extract-meta dest)
219 source
220 dest
221 file
222 (transform-path file (build-directory)))))
223
224(define (extract-meta tree) #| sxml -> alist
225 Extract metadata from TREE's comments.
226 Returns an alist of (key . value) pairs where keys and values are strings. |#
227 (let loop ((tree tree)
228 (acc '()))
229 (cond
230 ((or (atom? tree)
231 (null? tree))
232 (reverse acc))
233 ((and (list? (car tree))
234 (eq? (caar tree) '*COMMENT*))
235 (loop (cdr tree)
236 (let* ((comment (string-trim-both (cadar tree)))
237 (lines (string-split comment "\n")))
238 (map (lambda (l)
239 (let ((kv (string-split l ":")))
240 (cons (string-trim-both (car kv))
241 (string-trim
242 (string-intersperse (cdr kv) ":")))))
243 lines))))
244 ((list? (car tree))
245 (loop (cdr tree)
246 (let ((subtree (loop (car tree) '())))
247 (if (null? subtree)
248 acc
249 (cons subtree acc)))))
250 (else (loop (cdr tree) acc)))))
251
252(define (meta-ref meta key default) #| alist string string -> <?>
253 Get KEY's value from META, or DEFAULT if it doesn't exist.
254 DEFAULT is required because I think it's a good idea to require it. |#
255 (let ((x (assoc key meta)))
256 (if x (cdr x) default)))
257
258(define (page-meta-ref page key default) #| <page> string string -> <?>
259 Get KEY's value from PAGE's meta, or DEFAULT.
260 |#
261 (let ((meta (page-meta page)))
262 (meta-ref meta key default)))
263
264;;; Time
265;; Time really only matters in feeds ... but it really does matter. So I need a
266;; few helper functions.
267
268(define publish-time ; this is a parameter so it's consistent across a run.
269 (make-parameter
270 (time->string (seconds->utc-time) "%FT%TZ")))
271
272(define (page-mtime page) #| <page> -> time-string
273 Grab the mtime field from PAGE's source file. |#
274 (let ((file (page-source-path page)))
275 (and file
276 (file-exists? file)
277 (time->string (seconds->utc-time
278 (file-modification-time
279 file))))))
280
281(define (page-guess-updated page) #| <page> -> time-string
282 Guess the "updated" property of PAGE. |#
283 (let ((meta-date (page-meta-ref page "date" #f)))
284 (if meta-date
285 ;; Attempt to parse the date metadata field.
286 (time->string (seconds->utc-time ; This double-conversion is /great/
287 (local-time->seconds
288 (or (string->time meta-date "%Y-%m-%d")
289 (string->time meta-date "%Y-%m-%d%n%H:%M")
290 (string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
291 ;; ... more ?
292 (or (page-mtime page)
293 (publish-time))))))
294 (or (page-mtime page)
295 (publish-time)))))
296
297;;; Templating
298;; Templating uses sxml to define a layout for pages and indeces (index.html,
299;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html
300;; comments and to further process the document.
301
302;; Each template has a default, but the user can override by defining templates
303;; in .config.scm (see below). All templates are function parameters that take
304;; a page's sxml tree (argument PAGE) and return a string.
305
306(define page-template
307 (make-parameter
308 (lambda (page)
309 `(html (@ (lang "en"))
310 (head (title ,(page-meta-ref page "title" "[untitled]"))
311 (link (@ (href "../style.css") ; relative
312 (rel "stylesheet")))
313 (meta (@ (name "viewport")
314 (content "initial-scale=1.0"))))
315 (body ,(let ((title (page-meta-ref page "title" #f)))
316 (if title `(h1 ,title) ""))
317 ,@(cdr (page-dest page)))))))
318
319(define index-template
320 (make-parameter
321 (lambda pages
322 `(html (@ (lang "en"))
323 (head (title ,(site-name))
324 (link (@ (href "./style.css") ; relative
325 (rel "stylesheet")))
326 (meta (@ (name "viewport")
327 (content "initial-scale=1.0"))))
328 (body (h1 ,(site-name))
329 (ul
330 ,@(map (lambda (pg)
331 `(li (a (@ (href ,(page-slug pg)))
332 ,(page-meta-ref pg
333 "title"
334 (pathname-file
335 (page-source-path pg))))))
336 ;; TODO : sort pages
337 pages)))))))
338
339(define feed-template
340 (make-parameter
341 (lambda pages
342 (make-atom-doc
343 (make-feed
344 title: (make-title (site-name))
345 id: (site-base-url)
346 updated: (publish-time) ; I don't like these semantics ..
347 authors: (list (make-author name: (site-author)
348 uri: (site-base-url)))
349 links: (list (make-link type: 'html
350 uri-language: "en"
351 uri: (site-base-url))
352 (make-link relation: "self"
353 type: "application/atom+xml"
354 uri: (make-pathname
355 (site-base-url) "feed" "xml")))
356 rights: (make-rights (site-rights))
357 generator: (make-generator "Boudin"
358 uri: "https://git.acdw.net/boudin"
359 version: "0.1.0")
360 entries: (map (lambda (pg)
361 (make-entry
362 title: (make-title
363 (page-meta-ref pg "title" "[untitled]"))
364 links: (list (make-link type: 'html
365 uri: (page-url pg)))
366 id: (page-url pg)
367 updated: (page-guess-updated pg)
368 ;;published:
369 content: (make-content
370 (sxml->html (page-dest pg))
371 type: html)
372 #;`(atom:content "foo")))
373 pages))))))
374
375;;; Publishing
376
377(define (write-style)
378 (print-log "writing style")
379 (with-output-to-file (make-pathname (build-directory) "style" "css")
380 (lambda () (write-css (site-style)))))
381
382(define (write-page page)
383 (print-log "writing " (or (page-meta-ref page "title" #f)
384 (page-source-path page)))
385 (create-directory (pathname-directory (page-dest-path page)) 'parents)
386 (with-output-to-file (page-dest-path page)
387 (lambda () (sxml-display-as-html ((page-template) page)))))
388
389(define (write-index pages)
390 (print-log "writing index")
391 (with-output-to-file (make-pathname (build-directory) "index" "html")
392 (lambda () (sxml-display-as-html (apply (index-template) pages)))))
393
394(define (write-feed pages)
395 (print-log "writing feed")
396 (serialize-sxml (apply (feed-template) pages)
397 output: (make-pathname (build-directory) "feed" "xml")
398 cdata-section-elements: '(atom:content)
399 ns-prefixes: `((*default* . "http://www.w3.org/2005/Atom")
400 (*default* . "http://www.w3.org/1999/xhtml")
401 . ,(atom-ns-prefixes))
402 allow-prefix-redeclarations: #t
403 ))
404
405;;; Configuration
406
407;; Build configuration
408
409(define build-directory
410 (make-parameter "out/"))
411
412(define build-config
413 (make-parameter "config.scm"))
414
415;; Site configuration
416
417(define site-name
418 (make-parameter "[A boudin web site]"))
419
420(define site-base-url
421 (make-parameter "https://example.com/"))
422
423(define site-author
424 (make-parameter "nobody"))
425
426(define site-rights
427 (make-parameter (string-append "(C) " (site-author))))
428
429(define site-style
430 (make-parameter
431 `(css+ (body (font "20px/1.4 sans-serif")
432 (max-width "48em")
433 (padding "1em")
434 (margin auto)))))
435
436;;; Options & Operands (SRFI 37)
437
438(define (print-log . xs)
439 (with-output-to-port (current-error-port)
440 (lambda () (apply print xs))))
441
442(define (die error-code . xs)
443 (apply print-log xs)
444 (exit error-code))
445
446(define usage
447 (string-append
448 "Usage: boudin [OPTIONS] FILE ...\n"
449 "Options:\n"
450 "-h, --help show this help and exit\n"
451 "-o DIR, --output DIR build site to DIR (default: out/)\n"
452 "Operands:\n"
453 "FILE ... files to build\n"))
454
455(define opt/help
456 (option '(#\h "help") ; Names
457 #f ; Required arg?
458 #f ; Optional arg?
459 (lambda _ ; Option proc (opt name arg seeds ...)
460 (die 0 usage))))
461
462(define opt/build-directory
463 (option '(#\o "output") #t #f
464 (lambda (opt name arg seeds)
465 (build-directory arg)
466 seeds)))
467
468(define opt/build-config
469 (option '(#\c "config") #t #f
470 (lambda (opt name arg seeds)
471 (build-config arg)
472 seeds)))
473
474(define (process-args args)
475 (let ((pages '()))
476 (args-fold args
477 ;; Options
478 (list opt/help
479 opt/build-directory
480 opt/build-config)
481 ;; Unrecognized option proc (option name arg seeds ...)
482 (lambda (_ name _ _)
483 (die 1 "Unrecognized option: -" name "\n" usage))
484 ;; Operand proc (operand seeds ...)
485 (lambda (name seeds)
486 (if (file-exists? name)
487 (set! pages (cons name pages))
488 (die 2 "File not found: " name))
489 seeds)
490 ;; Seeds
491 '())
492 pages))
493
494;;; Main entry point
495
496(define pages (make-parameter #f))
497
498(define (main args)
499 (parameterize ((pages (process-args args)))
500 (unless (pages)
501 (die 0 "No pages to process. Abort."))
502 ;; Check for files, create directories
503 (if (file-exists? (build-config))
504 (load (build-config))
505 (print-log "No config.scm found; using default config"))
506 (create-directory (build-directory) 'parents)
507 ;; Convert pages to sxml. This needs to be done here because config.scm
508 ;; might define functions used by the pages.
509 (pages (map file->page (pages)))
510 ;; Build the site
511 (write-style) ; TODO: copy static assets (?)
512 (for-each write-page (pages))
513 (write-index (pages))
514 (write-feed (pages))))
515 11
516(cond-expand 12(cond-expand
517 ((or chicken-script compiling) 13 (compiling (apply main (command-line)))
518 (main (command-line-arguments)))
519 (else)) 14 (else))