summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCase Duckworth2023-07-07 23:59:04 -0500
committerCase Duckworth2023-07-07 23:59:04 -0500
commitf1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb (patch)
tree5d3efab6a82d23c0bb506d0785f73a58e362a92c
downloadboudin-f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb.tar.gz
boudin-f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb.zip
Initial commit
-rwxr-xr-xboudin.scm414
-rw-r--r--test.html19
2 files changed, 433 insertions, 0 deletions
diff --git a/boudin.scm b/boudin.scm new file mode 100755 index 0000000..737cbf1 --- /dev/null +++ b/boudin.scm
@@ -0,0 +1,414 @@
1#!/bin/sh
2#| -*- scheme -*-
3exec csi -R r7rs -s "$0" "$@"
4boudin --- a little static site generator
5|#
6
7(import (chicken file)
8 (chicken file posix)
9 (chicken irregex)
10 (chicken pathname)
11 (chicken port)
12 (chicken process-context)
13 (chicken random)
14 (chicken string)
15 (chicken time posix)
16 (chicanery)
17 (atom)
18 (html-parser)
19 (srfi 37)
20 (srfi 152)
21 (sxpath))
22
23;;; Transformations
24;; A static site generator can be thought of largely as two sets of
25;; transformations: one transforming given input content to output content, and
26;; another transforming source paths to destination paths. Since both, for my
27;; purposes, are strings, I have a generic function that can perform both
28;; transformations.
29
30(define (transform str . procs) #| string (string ->string) ... -> string
31 Apply PROCS to STR, left-to-right, and return the result.
32 Each PROC will be called with its predecessor's output, and should take a
33 string as input and return a string. |#
34 (if (null? procs)
35 str
36 (apply transform ((car procs) str) (cdr procs))))
37
38;;; Path transformations
39
40(define (indexify path) #| path -> path
41 Replace the PATH's extension with "/index.html".
42 |#
43 (make-pathname (pathname-strip-extension path)
44 "index"
45 "html"))
46
47(define (transform-path path outdir) #| path => path
48 Transform PATH according to boudin's needs. |#
49 (transform path
50 normalize-pathname
51 (lambda (p) (pathname-replace-directory p outdir))
52 indexify))
53
54;;; Content transformations
55
56(define (split-paragraphs str) #| string -> (list string ...)
57 Split STR into paragraphs.
58 A paragraph is a contiguous series of text lines separated from other
59 paragraphs by at least 2 newline \n characters. This procedure collapses
60 inter-paragraph space. |#
61 (let loop ((lines (string-split str "\n"))
62 (par '())
63 (acc '()))
64 (cond
65 ((and (null? lines) ; base case: no more lines
66 (null? par)) ; ... or pending paragraph
67 (reverse acc))
68 ((null? lines) ; add the final paragraph
69 (loop '() '() (cons (apply string-append (reverse par)) acc)))
70 ((equal? (car lines) "") ; paragraph break
71 (loop (cdr lines)
72 '()
73 (cons (apply string-append (reverse par)) acc)))
74 (else ; line break
75 (loop (cdr lines)
76 (cons (string-append (car lines) "\n") par)
77 acc)))))
78
79(define (wrap-paragraphs str) #| string -> string
80 Wrap naked paragraphs of STR in <p> tags.
81 A 'naked' paragraph is one that doesn't begin with '<' (after optional
82 beginning whitespace). |#
83 (let loop ((pars (map string-trim (split-paragraphs str)))
84 (acc '()))
85 (cond
86 ((null? pars)
87 (apply string-append (reverse acc)))
88 ((zero? (string-length (car pars)))
89 (loop (cdr pars)
90 acc))
91 ((eq? #\< (string-ref (car pars) 0))
92 (loop (cdr pars)
93 (cons (car pars)
94 acc)))
95 (else
96 (loop (cdr pars)
97 (cons (string-append "<p>" (car pars) "</p>\n")
98 acc))))))
99
100(define (expand-string str) #| string -> string
101 Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
102 Yes, this is as cursed as it sounds.
103
104 To make it slightly less so, all # are duplicated to escape them, except for
105 those before ( and {. To escape /those/, double them. |#
106 (let* ((escaped (irregex-replace/all
107 '(or (: #\# #\# (look-ahead (or #\{ #\()))
108 (: #\# (look-ahead (~ #\{ #\()))
109 (: #\# eos))
110 str
111 "##"))
112 (delim (let loop ((attempt (number->string (pseudo-random-real))))
113 (if (irregex-search attempt str)
114 (loop (number->string (pseudo-random-real)))
115 attempt)))
116 (template (make-concatenated-port
117 (open-input-string (string-append "#<#" delim "\n"))
118 (open-input-string escaped)
119 (open-input-string (string-append "\n" delim "\n"))))
120 (expanded (let ((x (open-output-string)))
121 (display (eval (read template)
122 (interaction-environment))
123 x)
124 (get-output-string x))))
125 (irregex-replace/all '(: "#<unspecified>"
126 (* whitespace))
127 expanded
128 "")))
129
130(define (transform-content content) #| string -> string
131 Transform CONTENT according to boudin's needs.
132 This is the raw html, and will still need to be processed to extract metadata
133 and to be further wrapped in a template. |#
134 (transform content
135 expand-string
136 wrap-paragraphs))
137
138;;; Pages
139;; A <page> is a record type that wraps the two transformations outlined above.
140;; It also includes the extracted metadata from the page for processing.
141
142(define-record-type <page>
143 (make-page url meta source dest source-path dest-path)
144 page?
145 (url page-url (setter page-url))
146 (meta page-meta (setter page-meta))
147 (source page-source)
148 (dest page-dest (setter page-dest))
149 (source-path page-source-path)
150 (dest-path page-dest-path (setter page-dest-path)))
151
152(define (%read-port port)
153 (let ((chunk-size 512))
154 (let loop ((next (read-string chunk-size port))
155 (blank? #f)
156 (acc '()))
157 (cond
158 ((or (eof-object? next)
159 (and blank? (equal? next "")))
160 (close-input-port port)
161 (apply string-append (reverse acc)))
162 ((equal? next "")
163 (loop (read-string chunk-size port)
164 #t
165 (cons next acc)))
166 (else
167 (loop (read-string chunk-size port)
168 blank?
169 (cons next acc)))))))
170
171(define read-port
172 (case-lambda
173 (() (%read-port (current-input-port)))
174 ((p) (%read-port p))))
175
176(define (file->page file) #| string -> <page>
177 Convert FILE to an sxml tree after transforming it.
178 This procedure returns both the sxml of the transformed content, but that
179 page's metadata, too. |#
180 (let* ((source (with-input-from-file file read-port))
181 (dest (html->sxml (transform-content source))))
182 (make-page (pathname-directory (transform-path file (site-base-url)))
183 (extract-meta dest)
184 source
185 dest
186 file
187 (transform-path file (output-directory)))))
188
189(define (extract-meta tree) #| sxml -> alist
190 Extract metadata from TREE's comments.
191 Returns an alist of (key . value) pairs where keys and values are strings. |#
192 (let loop ((tree tree)
193 (acc '()))
194 (cond
195 ((or (atom? tree)
196 (null? tree))
197 (reverse acc))
198 ((and (list? (car tree))
199 (eq? (caar tree) '*COMMENT*))
200 (loop (cdr tree)
201 (let* ((comment (string-trim-both (cadar tree)))
202 (lines (string-split comment "\n")))
203 (map (lambda (l)
204 (let ((kv (string-split l ":")))
205 (cons (string-trim-both (car kv))
206 (string-trim
207 (string-intersperse (cdr kv) ":")))))
208 lines))))
209 ((list? (car tree))
210 (loop (cdr tree)
211 (let ((subtree (loop (car tree) '())))
212 (if (null? subtree)
213 acc
214 (cons subtree acc)))))
215 (else (loop (cdr tree) acc)))))
216
217(define (meta-ref meta key default) #| alist string string -> <?>
218 Get KEY's value from META, or DEFAULT if it doesn't exist.
219 DEFAULT is required because I think it's a good idea to require it. |#
220 (let ((x (assoc key meta)))
221 (if x (cdr x) default)))
222
223(define (page-meta-ref page key default) #| <page> string string -> <?>
224 Get KEY's value from PAGE's meta, or DEFAULT.
225 |#
226 (let ((meta (page-meta page)))
227 (meta-ref meta key default)))
228
229;;; Time
230;; Time really only matters in feeds ... but it really does matter. So I need a
231;; few helper functions.
232
233(define publish-time ; this is a parameter so it's consistent across a run.
234 (make-parameter
235 (time->string (seconds->utc-time) "%FT%TZ")))
236
237(define (page-mtime page) #| <page> -> time-string
238 Grab the mtime field from PAGE's source file. |#
239 (let ((file (page-source-path page)))
240 (and file
241 (file-exists? file)
242 (time->string (seconds->utc-time
243 (file-modification-time
244 file))))))
245
246(define (page-guess-updated page) #| <page> -> time-string
247 Guess the "updated" property of PAGE. |#
248 (let ((meta-date (page-meta-ref page "date" #f)))
249 (if meta-date
250 ;; Attempt to parse the date metadata field.
251 (time->string (seconds->utc-time ; This double-conversion is /great/
252 (local-time->seconds
253 (or (string->time meta-date "%Y-%m-%d")
254 (string->time meta-date "%Y-%m-%d%n%H:%M")
255 (string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
256 ;; ... more ?
257 (or (page-mtime page)
258 (publish-time))))))
259 (or (page-mtime page)
260 (publish-time)))))
261
262;;; Templating
263;; Templating uses sxml to define a layout for pages and indeces (index.html,
264;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html
265;; comments and to further process the document.
266
267;; Each template has a default, but the user can override by defining templates
268;; in .config.scm (see below). All templates are function parameters that take
269;; a page's sxml tree (argument PAGE) and return a string.
270
271(define page-template
272 (make-parameter
273 (lambda (page)
274 (sxml->html
275 `(html (@ (lang "en"))
276 (head (title ,(page-meta-ref page "title" "[untitled]")))
277 (body ,(let ((title (page-meta-ref page "title" #f)))
278 (if title `(h1 ,title) ""))
279 ,@(cdr (page-dest page))))))))
280
281(define index-template
282 (make-parameter
283 (lambda pages
284 (sxml->html
285 `(html (@ (lang "en"))
286 (head (title ,(site-name)))
287 (body (h1 ,(site-name))
288 (ul
289 ,@(map (lambda (pg)
290 `(li (a (@ (href ,(page-url pg)))
291 ,(page-meta-ref pg
292 "title"
293 (pathname-file
294 (page-source-path pg))))))
295 pages))))))))
296
297(define feed-template
298 (make-parameter
299 (lambda pages
300 (with-output-to-string
301 (lambda ()
302 (write-atom-doc
303 (make-atom-doc
304 (make-feed
305 title: (make-title (site-name))
306 id: (site-base-url)
307 updated: (publish-time) ; I don't like these semantics ..
308 authors: (list (make-author name: (site-author)
309 uri: (site-base-url)))
310 links: (list (make-link type: 'html
311 uri-language: "en"
312 uri: (site-base-url))
313 (make-link relation: "self"
314 type: "application/atom+xml"
315 uri: (make-pathname
316 (site-base-url) "feed" "xml")))
317 rights: (make-rights (site-rights))
318 generator: (make-generator "Boudin"
319 uri: "https://git.acdw.net/boudin"
320 version: "0.1.0")
321 entries: (map (lambda (pg)
322 (make-entry
323 title: (make-title
324 (page-meta-ref pg "title" "[untitled]"))
325 links: (list (make-link type: 'html
326 uri: (page-url pg)))
327 id: (page-url pg)
328 updated: (page-guess-updated pg)
329 ;;published:
330 content:
331 `(atom:content (@ (type "html"))
332 ,(cdr (page-dest pg)))))
333 pages)))))))))
334
335;;; Collecting pages from a directory
336
337(define (collect-pages dir ext)
338 (map file->page
339 (glob (make-pathname dir "*" ext))))
340
341;;; Publishing
342
343(define (apply-template template pages)
344 (apply template (if (list? pages)
345 pages
346 (list pages))))
347
348;;; Configuration
349
350(define output-directory
351 (make-parameter "out/"))
352
353(define site-name
354 (make-parameter "[A boudin web site]"))
355
356(define site-base-url
357 (make-parameter "https://example.com/"))
358
359(define site-author
360 (make-parameter "nobody"))
361
362(define site-rights
363 (make-parameter (string-append "(C) " (site-author))))
364
365;;; Options & Operands (SRFI 37)
366
367(define opt/help
368 (option '(#\h "help") ; Names
369 #f ; Required arg?
370 #f ; Optional arg?
371 (lambda _ ; Option proc (opt name arg seeds ...)
372 (with-output-to-port (current-error-port)
373 (lambda ()
374 (print "Usage: boudin [OPTIONS]\n"
375 "Options:\n"
376 "-h, --help show this help and exit\n"
377 "-C dir, --directory dir\n"
378 " build site in DIR instead of current directory"
379 )))
380 (exit))))
381
382(define opt/directory
383 (option '(#\C "directory") #t #f
384 (lambda (opt name arg seeds)
385 (if (directory-exists? arg)
386 (change-directory arg)
387 (error "Directory doesn't exist" arg))
388 seeds)))
389
390(define (process-args args)
391 (args-fold args
392 ;; Options
393 (list opt/help
394 opt/directory)
395 ;; Unrecognized option proc (option name arg seeds ...)
396 (lambda (_ name _ _)
397 (error "Unrecognized option" name))
398 ;; Operand proc (operand seeds ...)
399 (lambda (name _)
400 (error "Bad operand" name))
401 ;; Seeds
402 '()))
403
404;;; Main entry point
405
406(define (main args)
407 (process-args args)
408
409 #f)
410
411(cond-expand
412 ((or chicken-script compiling)
413 (main (command-line-arguments)))
414 (else))
diff --git a/test.html b/test.html new file mode 100644 index 0000000..6348f0a --- /dev/null +++ b/test.html
@@ -0,0 +1,19 @@
1<!--
2 title: foo bar
3 subtitle: uhhh: the thing
4-->
5
6<h1>some html</h1>
7<h2>(without p tags)</h2>
8
9Here is a test paragraph. <a href="#">example link</a>.
10
11Here's another. I wonder if it'll just do the thing .. or whatever. Maybe I
12should try to make it multiple lines, as well.
13
14<ul>
15 <li>
16 one plus two is #(+ 1 2).
17 </li>
18 <li>two</li>
19</ul>