summary refs log tree commit diff stats
path: root/boudin.scm
blob: 51bf8d5d431205250de11c71f134221791eb943c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
#!/bin/sh
#| -*- scheme -*-
exec csi -R r7rs -s "$0" "$@"
boudin --- a little static site generator
|#

(import (chicken file)
        (chicken file posix)
        (chicken irregex)
        (chicken pathname)
        (chicken port)
        (chicken process-context)
        (chicken random)
        (chicken string)
        (chicken time posix)
        (chicanery)
        (atom)
        (html-parser)
        (scss)
        (srfi 37)
        (srfi 152)
        (sxpath))

;;; Transformations
;; A static site generator can be thought of largely as two sets of
;; transformations: one transforming given input content to output content, and
;; another transforming source paths to destination paths.  Since both, for my
;; purposes, are strings, I have a generic function that can perform both
;; transformations.

(define (transform str . procs) #| string (string ->string) ... -> string
  Apply PROCS to STR, left-to-right, and return the result.
  Each PROC will be called with its predecessor's output, and should take a
  string as input and return a string. |#
  (if (null? procs)
      str
      (apply transform ((car procs) str) (cdr procs))))

;;; Path transformations

(define (indexify path) #| path -> path
  Replace the PATH's extension with "/index.html".
  |#
  (make-pathname (pathname-strip-extension path)
                 "index"
                 "html"))

(define (transform-path path outdir) #| path => path
  Transform PATH according to boudin's needs. |#
  (transform path
             normalize-pathname
             (lambda (p) (pathname-replace-directory p outdir))
             indexify))

;;; Content transformations

(define (split-paragraphs str) #| string -> (list string ...)
  Split STR into paragraphs.
  A paragraph is a contiguous series of text lines separated from other
  paragraphs by at least 2 newline \n characters.  This procedure collapses
  inter-paragraph space. |#
  (let loop ((lines (string-split str "\n"))
             (par '())
             (acc '()))
    (cond
     ((and (null? lines)                ; base case: no more lines
           (null? par))                 ; ... or pending paragraph
      (reverse acc))
     ((null? lines)                     ; add the final paragraph
      (loop '() '() (cons (apply string-append (reverse par)) acc)))
     ((equal? (car lines) "")           ; paragraph break
      (loop (cdr lines)
            '()
            (cons (apply string-append (reverse par)) acc)))
     (else                              ; line break
      (loop (cdr lines)
            (cons (string-append (car lines) "\n") par)
            acc)))))

(define (wrap-paragraphs str) #| string -> string
  Wrap naked paragraphs of STR in <p> tags.
  A 'naked' paragraph is one that doesn't begin with '<' (after optional
  beginning whitespace). |#
  (let loop ((pars (map string-trim (split-paragraphs str)))
             (acc '()))
    (cond
     ((null? pars)
      (apply string-append (reverse acc)))
     ((zero? (string-length (car pars)))
      (loop (cdr pars)
            acc))
     ((eq? #\< (string-ref (car pars) 0))
      (loop (cdr pars)
            (cons (car pars)
                  acc)))
     (else
      (loop (cdr pars)
            (cons (string-append "<p>" (car pars) "</p>\n")
                  acc))))))

(define (preprocess-text str) #| string -> string
  Preprocess STR before passing it to `expand-string'.
  For example, replace all '#' with '##' unless followed by a '{' or '('.
  |#
  (set! str (irregex-replace/all
             '(or (: #\# #\# (look-ahead (or #\{ #\()))
                  (: #\# (look-ahead (~ #\{ #\()))
                  (: #\# eos))
             str
             "##"))
  #;  (set! str (irregex-replace/all ; XXX: missing end paren
  '(: #\@ #\()
  str
  "#(sxml->html `("))
  ;; Return transformed string
  str)

(define (expand-string str) #| string -> string
  Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
  Yes, this is as cursed as it sounds.

  To make it slightly less so, all # are duplicated to escape them, except for
  those before ( and {.  To escape /those/, double them. |#
  (let* ((delim (let loop ((attempt (number->string (pseudo-random-real))))
                  (if (irregex-search attempt str)
                      (loop (number->string (pseudo-random-real)))
                      attempt)))
         (template (make-concatenated-port
                    (open-input-string (string-append "#<#" delim "\n"))
                    (open-input-string (preprocess-text str))
                    (open-input-string (string-append "\n" delim "\n"))))
         (expanded (with-output-to-string
                     (lambda ()
                       (display (eval (read template)
                                      (interaction-environment)))))))
    (irregex-replace/all '(: "#<unspecified>"
                             (* whitespace))
                         expanded
                         "")))

(define (transform-content content) #| string -> string
  Transform CONTENT according to boudin's needs.
  This is the raw html, and will still need to be processed to extract metadata
  and to be further wrapped in a template. |#
  (transform content
             expand-string
             wrap-paragraphs))

;;; Pages
;; A <page> is a record type that wraps the two transformations outlined above.
;; It also includes the extracted metadata from the page for processing.

(define-record-type <page>
  (make-page url meta source dest source-path dest-path)
  page?
  (url page-url (setter page-url))
  (meta page-meta (setter page-meta))
  (source page-source)
  (dest page-dest (setter page-dest))
  (source-path page-source-path)
  (dest-path page-dest-path (setter page-dest-path)))

(define (%read-port port) #| port -> string
  Read PORT until it hits eof and return the results as a string.
  |#
  (let ((chunk-size 512))
    (let loop ((next (read-string chunk-size port))
               (blank? #f)
               (acc '()))
      (cond
       ((or (eof-object? next)
            (and blank? (equal? next "")))
        (close-input-port port)
        (apply string-append (reverse acc)))
       ((equal? next "")
        (loop (read-string chunk-size port)
              #t
              (cons next acc)))
       (else
        (loop (read-string chunk-size port)
              blank?
              (cons next acc)))))))

(define read-port #| (optional port) -> string
  Read PORT completely, returning the results as a string.
  PORT defaults to `current-input-port'.
  |#
  (case-lambda
    (() (%read-port (current-input-port)))
    ((p) (%read-port p))))

(define (file->page file) #| string -> <page>
  Convert FILE to an sxml tree after transforming it.
  This procedure returns both the sxml of the transformed content, but that
  page's metadata, too. |#
  (let* ((source (with-input-from-file file read-port))
         (dest (html->sxml (transform-content source))))
    (make-page (pathname-directory (transform-path file (site-base-url)))
               (extract-meta dest)
               source
               dest
               file
               (transform-path file (build-directory)))))

(define (extract-meta tree) #| sxml -> alist
  Extract metadata from TREE's comments.
  Returns an alist of (key . value) pairs where keys and values are strings. |#
  (let loop ((tree tree)
             (acc '()))
    (cond
     ((or (atom? tree)
          (null? tree))
      (reverse acc))
     ((and (list? (car tree))
           (eq? (caar tree) '*COMMENT*))
      (loop (cdr tree)
            (let* ((comment (string-trim-both (cadar tree)))
                   (lines (string-split comment "\n")))
              (map (lambda (l)
                     (let ((kv (string-split l ":")))
                       (cons (string-trim-both (car kv))
                             (string-trim
                              (string-intersperse (cdr kv) ":")))))
                   lines))))
     ((list? (car tree))
      (loop (cdr tree)
            (let ((subtree (loop (car tree) '())))
              (if (null? subtree)
                  acc
                  (cons subtree acc)))))
     (else (loop (cdr tree) acc)))))

(define (meta-ref meta key default) #| alist string string -> <?>
  Get KEY's value from META, or DEFAULT if it doesn't exist.
  DEFAULT is required because I think it's a good idea to require it. |#
  (let ((x (assoc key meta)))
    (if x (cdr x) default)))

(define (page-meta-ref page key default) #| <page> string string -> <?>
  Get KEY's value from PAGE's meta, or DEFAULT.
  |#
  (let ((meta (page-meta page)))
    (meta-ref meta key default)))

;;; Time
;; Time really only matters in feeds ... but it really does matter.  So I need a
;; few helper functions.

(define publish-time      ; this is a parameter so it's consistent across a run.
  (make-parameter
   (time->string (seconds->utc-time) "%FT%TZ")))

(define (page-mtime page) #| <page> -> time-string
  Grab the mtime field from PAGE's source file. |#
  (let ((file (page-source-path page)))
    (and file
         (file-exists? file)
         (time->string (seconds->utc-time
                        (file-modification-time
                         file))))))

(define (page-guess-updated page) #| <page> -> time-string
  Guess the "updated" property of PAGE. |#
  (let ((meta-date (page-meta-ref page "date" #f)))
    (if meta-date
        ;; Attempt to parse the date metadata field.
        (time->string (seconds->utc-time ; This double-conversion is /great/
                       (local-time->seconds
                        (or (string->time meta-date "%Y-%m-%d")
                            (string->time meta-date "%Y-%m-%d%n%H:%M")
                            (string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
                            ;; ... more ?
                            (or (page-mtime page)
                                (publish-time))))))
        (or (page-mtime page)
            (publish-time)))))

;;; Templating
;; Templating uses sxml to define a layout for pages and indeces (index.html,
;; feed.xml).  Sxml's "stylesheets" can be used to extract metadata out of html
;; comments and to further process the document.

;; Each template has a default, but the user can override by defining templates
;; in .config.scm (see below).  All templates are function parameters that take
;; a page's sxml tree (argument PAGE) and return a string.

(define page-template
  (make-parameter
   (lambda (page)
     `(html (@ (lang "en"))
            (head (title ,(page-meta-ref page "title" "[untitled]"))
                  (link (@ (href "../style.css") ; relative
                           (rel "stylesheet")))
                  (meta (@ (name "viewport")
                           (content "initial-scale=1.0"))))
            (body ,(let ((title (page-meta-ref page "title" #f)))
                     (if title `(h1 ,title) ""))
                  ,@(cdr (page-dest page)))))))

(define index-template
  (make-parameter
   (lambda pages
     `(html (@ (lang "en"))
            (head (title ,(site-name))
                  (link (@ (href "./style.css") ; relative
                           (rel "stylesheet")))
                  (meta (@ (name "viewport")
                           (content "initial-scale=1.0"))))
            (body (h1 ,(site-name))
                  (ul
                   ,@(map (lambda (pg)
                            `(li (a (@ (href ,(page-url pg)))
                                    ,(page-meta-ref pg
                                                    "title"
                                                    (pathname-file
                                                     (page-source-path pg))))))
                          pages)))))))

(define feed-template
  (make-parameter
   (lambda pages
     (make-atom-doc
      (make-feed
       title: (make-title (site-name))
       id: (site-base-url)
       updated: (publish-time)       ; I don't like these semantics ..
       authors: (list (make-author name: (site-author)
                                   uri: (site-base-url)))
       links: (list (make-link type: 'html
                               uri-language: "en"
                               uri: (site-base-url))
                    (make-link relation: "self"
                               type: "application/atom+xml"
                               uri: (make-pathname
                                     (site-base-url) "feed" "xml")))
       rights: (make-rights (site-rights))
       generator: (make-generator "Boudin"
                                  uri: "https://git.acdw.net/boudin"
                                  version: "0.1.0")
       entries: (map (lambda (pg)
                       (make-entry
                        title: (make-title
                                (page-meta-ref pg "title" "[untitled]"))
                        links: (list (make-link type: 'html
                                                uri: (page-url pg)))
                        id: (page-url pg)
                        updated: (page-guess-updated pg)
                        ;;published:
                        content:
                        `(atom:content (@ (type "html"))
                                       ,(cdr (page-dest pg)))))
                     pages))))))

;;; Publishing

(define (write-style)
  (print-log "writing style")
  (with-output-to-file (make-pathname (build-directory) "style" "css")
    (lambda () (write-css (site-style)))))

(define (write-page page)
  (print-log "writing " (or (page-meta-ref page "title" #f)
                            (page-source-path page)))
  (create-directory (pathname-directory (page-dest-path page)) 'parents)
  (with-output-to-file (page-dest-path page)
    (lambda () (sxml-display-as-html ((page-template) page)))))

(define (write-index pages)
  (print-log "writing index")
  (with-output-to-file (make-pathname (build-directory) "index" "html")
    (lambda () (sxml-display-as-html (apply (index-template) pages)))))

(define (write-feed pages)
  (print-log "writing feed")
  (with-output-to-file (make-pathname (build-directory) "feed" "xml")
    (lambda () (write-atom-doc (apply (feed-template) pages)))))

;;; Configuration

;; Build configuration

(define build-directory
  (make-parameter "out/"))

(define build-config
  (make-parameter "config.scm"))

;; Site configuration

(define site-name
  (make-parameter "[A boudin web site]"))

(define site-base-url
  (make-parameter "https://example.com/"))

(define site-author
  (make-parameter "nobody"))

(define site-rights
  (make-parameter (string-append "(C) " (site-author))))

(define site-style
  (make-parameter
   `(css+ (body (font "20px/1.4 sans-serif")
                (max-width "48em")
                (padding "1em")
                (margin auto)))))

;;; Options & Operands (SRFI 37)

(define (print-log . xs)
  (with-output-to-port (current-error-port)
    (lambda () (apply print xs))))

(define (die error-code . xs)
  (apply print-log xs)
  (exit error-code))

(define usage
  (string-append
   "Usage: boudin [OPTIONS] FILE ...\n"
   "Options:\n"
   "-h, --help            show this help and exit\n"
   "-o DIR, --output DIR  build site to DIR (default: out/)\n"
   "Operands:\n"
   "FILE ...              files to build\n"))

(define opt/help
  (option '(#\h "help")                 ; Names
          #f                            ; Required arg?
          #f                            ; Optional arg?
          (lambda _                          ; Option proc (opt name arg seeds ...)
            (die 0 usage))))

(define opt/build-directory
  (option '(#\o "output") #t #f
          (lambda (opt name arg seeds)
            (build-directory arg)
            seeds)))

(define opt/build-config
  (option '(#\c "config") #t #f
          (lambda (opt name arg seeds)
            (build-config arg)
            seeds)))

(define (process-args args)
  (let ((pages '()))
    (args-fold args
               ;; Options
               (list opt/help
                     opt/build-directory
                     opt/build-config)
               ;; Unrecognized option proc (option name arg seeds ...)
               (lambda (_ name _ _)
                 (die 1 "Unrecognized option: -" name "\n" usage))
               ;; Operand proc (operand seeds ...)
               (lambda (name seeds)
                 (if (file-exists? name)
                     (set! pages (cons (file->page name) pages))
                     (die 2 "Page not found: " name))
                 seeds)
               ;; Seeds
               '())
    pages))

;;; Main entry point

(define pages (make-parameter #f))

(define (main args)
  (parameterize ((pages (process-args args)))
    (unless (pages)
      (die 0 "No pages to process.  Abort."))
    ;; Check for files, create directories
    (if (file-exists? (build-config))
        (load (build-config))
        (print-log "No config.scm found; using default config"))
    (create-directory (build-directory) 'parents)
    ;; Build the site
    (write-style)                       ; TODO: copy static assets (?)
    (for-each write-page (pages))
    (write-index (pages))
    (write-feed (pages))))

(cond-expand
  ((or chicken-script compiling)
   (main (command-line-arguments)))
  (else))