summary refs log tree commit diff stats
path: root/boudin.scm
blob: 082542b8e246b78706dccc396b7283ed4fb32f44 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
#!/bin/sh
#| -*- scheme -*-
exec csi -R r7rs -s "$0" "$@"
boudin --- a little static site generator
|#

(import (chicken file)
        (chicken file posix)
        (chicken irregex)
        (chicken pathname)
        (chicken port)
        (chicken process-context)
        (chicken random)
        (chicken string)
        (chicken time posix)
        (chicanery)
        (atom)
        (html-parser)
        (srfi 37)
        (srfi 152)
        (sxpath))

;;; Transformations
;; A static site generator can be thought of largely as two sets of
;; transformations: one transforming given input content to output content, and
;; another transforming source paths to destination paths.  Since both, for my
;; purposes, are strings, I have a generic function that can perform both
;; transformations.

(define (transform str . procs) #| string (string ->string) ... -> string
  Apply PROCS to STR, left-to-right, and return the result.
  Each PROC will be called with its predecessor's output, and should take a
  string as input and return a string. |#
  (if (null? procs)
      str
      (apply transform ((car procs) str) (cdr procs))))

;;; Path transformations

(define (indexify path) #| path -> path
  Replace the PATH's extension with "/index.html".
  |#
  (make-pathname (pathname-strip-extension path)
                 "index"
                 "html"))

(define (transform-path path outdir) #| path => path
  Transform PATH according to boudin's needs. |#
  (transform path
             normalize-pathname
             (lambda (p) (pathname-replace-directory p outdir))
             indexify))

;;; Content transformations

(define (split-paragraphs str) #| string -> (list string ...)
  Split STR into paragraphs.
  A paragraph is a contiguous series of text lines separated from other
  paragraphs by at least 2 newline \n characters.  This procedure collapses
  inter-paragraph space. |#
  (let loop ((lines (string-split str "\n"))
             (par '())
             (acc '()))
    (cond
     ((and (null? lines)                ; base case: no more lines
           (null? par))                 ; ... or pending paragraph
      (reverse acc))
     ((null? lines)                     ; add the final paragraph
      (loop '() '() (cons (apply string-append (reverse par)) acc)))
     ((equal? (car lines) "")           ; paragraph break
      (loop (cdr lines)
            '()
            (cons (apply string-append (reverse par)) acc)))
     (else                              ; line break
      (loop (cdr lines)
            (cons (string-append (car lines) "\n") par)
            acc)))))

(define (wrap-paragraphs str) #| string -> string
  Wrap naked paragraphs of STR in <p> tags.
  A 'naked' paragraph is one that doesn't begin with '<' (after optional
  beginning whitespace). |#
  (let loop ((pars (map string-trim (split-paragraphs str)))
             (acc '()))
    (cond
     ((null? pars)
      (apply string-append (reverse acc)))
     ((zero? (string-length (car pars)))
      (loop (cdr pars)
            acc))
     ((eq? #\< (string-ref (car pars) 0))
      (loop (cdr pars)
            (cons (car pars)
                  acc)))
     (else
      (loop (cdr pars)
            (cons (string-append "<p>" (car pars) "</p>\n")
                  acc))))))

(define (expand-string str) #| string -> string
  Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
  Yes, this is as cursed as it sounds.

  To make it slightly less so, all # are duplicated to escape them, except for
  those before ( and {.  To escape /those/, double them. |#
  (let* ((escaped (irregex-replace/all
                   '(or (: #\# #\# (look-ahead (or #\{ #\()))
                        (: #\# (look-ahead (~ #\{ #\()))
                        (: #\# eos))
                   str
                   "##"))
         (delim (let loop ((attempt (number->string (pseudo-random-real))))
                  (if (irregex-search attempt str)
                      (loop (number->string (pseudo-random-real)))
                      attempt)))
         (template (make-concatenated-port
                    (open-input-string (string-append "#<#" delim "\n"))
                    (open-input-string escaped)
                    (open-input-string (string-append "\n" delim "\n"))))
         (expanded (let ((x (open-output-string)))
                     (display (eval (read template)
                                    (interaction-environment))
                              x)
                     (get-output-string x))))
    (irregex-replace/all '(: "#<unspecified>"
                             (* whitespace))
                         expanded
                         "")))

(define (transform-content content) #| string -> string
  Transform CONTENT according to boudin's needs.
  This is the raw html, and will still need to be processed to extract metadata
  and to be further wrapped in a template. |#
  (transform content
             expand-string
             wrap-paragraphs))

;;; Pages
;; A <page> is a record type that wraps the two transformations outlined above.
;; It also includes the extracted metadata from the page for processing.

(define-record-type <page>
  (make-page url meta source dest source-path dest-path)
  page?
  (url page-url (setter page-url))
  (meta page-meta (setter page-meta))
  (source page-source)
  (dest page-dest (setter page-dest))
  (source-path page-source-path)
  (dest-path page-dest-path (setter page-dest-path)))

(define (%read-port port)
  (let ((chunk-size 512))
    (let loop ((next (read-string chunk-size port))
               (blank? #f)
               (acc '()))
      (cond
       ((or (eof-object? next)
            (and blank? (equal? next "")))
        (close-input-port port)
        (apply string-append (reverse acc)))
       ((equal? next "")
        (loop (read-string chunk-size port)
              #t
              (cons next acc)))
       (else
        (loop (read-string chunk-size port)
              blank?
              (cons next acc)))))))

(define read-port
  (case-lambda
    (() (%read-port (current-input-port)))
    ((p) (%read-port p))))

(define (file->page file) #| string -> <page>
  Convert FILE to an sxml tree after transforming it.
  This procedure returns both the sxml of the transformed content, but that
  page's metadata, too. |#
  (let* ((source (with-input-from-file file read-port))
         (dest (html->sxml (transform-content source))))
    (make-page (pathname-directory (transform-path file (site-base-url)))
               (extract-meta dest)
               source
               dest
               file
               (transform-path file (output-directory)))))

(define (extract-meta tree) #| sxml -> alist
  Extract metadata from TREE's comments.
  Returns an alist of (key . value) pairs where keys and values are strings. |#
  (let loop ((tree tree)
             (acc '()))
    (cond
     ((or (atom? tree)
          (null? tree))
      (reverse acc))
     ((and (list? (car tree))
           (eq? (caar tree) '*COMMENT*))
      (loop (cdr tree)
            (let* ((comment (string-trim-both (cadar tree)))
                   (lines (string-split comment "\n")))
              (map (lambda (l)
                     (let ((kv (string-split l ":")))
                       (cons (string-trim-both (car kv))
                             (string-trim
                              (string-intersperse (cdr kv) ":")))))
                   lines))))
     ((list? (car tree))
      (loop (cdr tree)
            (let ((subtree (loop (car tree) '())))
              (if (null? subtree)
                  acc
                  (cons subtree acc)))))
     (else (loop (cdr tree) acc)))))

(define (meta-ref meta key default) #| alist string string -> <?>
  Get KEY's value from META, or DEFAULT if it doesn't exist.
  DEFAULT is required because I think it's a good idea to require it. |#
  (let ((x (assoc key meta)))
    (if x (cdr x) default)))

(define (page-meta-ref page key default) #| <page> string string -> <?>
  Get KEY's value from PAGE's meta, or DEFAULT.
  |#
  (let ((meta (page-meta page)))
    (meta-ref meta key default)))

;;; Time
;; Time really only matters in feeds ... but it really does matter.  So I need a
;; few helper functions.

(define publish-time      ; this is a parameter so it's consistent across a run.
  (make-parameter
   (time->string (seconds->utc-time) "%FT%TZ")))

(define (page-mtime page) #| <page> -> time-string
  Grab the mtime field from PAGE's source file. |#
  (let ((file (page-source-path page)))
    (and file
         (file-exists? file)
         (time->string (seconds->utc-time
                        (file-modification-time
                         file))))))

(define (page-guess-updated page) #| <page> -> time-string
  Guess the "updated" property of PAGE. |#
  (let ((meta-date (page-meta-ref page "date" #f)))
    (if meta-date
        ;; Attempt to parse the date metadata field.
        (time->string (seconds->utc-time ; This double-conversion is /great/
                       (local-time->seconds
                        (or (string->time meta-date "%Y-%m-%d")
                            (string->time meta-date "%Y-%m-%d%n%H:%M")
                            (string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
                            ;; ... more ?
                            (or (page-mtime page)
                                (publish-time))))))
        (or (page-mtime page)
            (publish-time)))))

;;; Templating
;; Templating uses sxml to define a layout for pages and indeces (index.html,
;; feed.xml).  Sxml's "stylesheets" can be used to extract metadata out of html
;; comments and to further process the document.

;; Each template has a default, but the user can override by defining templates
;; in .config.scm (see below).  All templates are function parameters that take
;; a page's sxml tree (argument PAGE) and return a string.

(define page-template
  (make-parameter
   (lambda (page)
     (sxml->html
      `(html (@ (lang "en"))
             (head (title ,(page-meta-ref page "title" "[untitled]")))
             (body ,(let ((title (page-meta-ref page "title" #f)))
                      (if title `(h1 ,title) ""))
                   ,@(cdr (page-dest page))))))))

(define index-template
  (make-parameter
   (lambda pages
     (sxml->html
      `(html (@ (lang "en"))
             (head (title ,(site-name)))
             (body (h1 ,(site-name))
                   (ul
                    ,@(map (lambda (pg)
                             `(li (a (@ (href ,(page-url pg)))
                                     ,(page-meta-ref pg
                                                     "title"
                                                     (pathname-file
                                                      (page-source-path pg))))))
                           pages))))))))

(define feed-template
  (make-parameter
   (lambda pages
     (with-output-to-string
       (lambda ()
         (write-atom-doc
          (make-atom-doc
           (make-feed
            title: (make-title (site-name))
            id: (site-base-url)
            updated: (publish-time)       ; I don't like these semantics ..
            authors: (list (make-author name: (site-author)
                                        uri: (site-base-url)))
            links: (list (make-link type: 'html
                                    uri-language: "en"
                                    uri: (site-base-url))
                         (make-link relation: "self"
                                    type: "application/atom+xml"
                                    uri: (make-pathname
                                          (site-base-url) "feed" "xml")))
            rights: (make-rights (site-rights))
            generator: (make-generator "Boudin"
                                       uri: "https://git.acdw.net/boudin"
                                       version: "0.1.0")
            entries: (map (lambda (pg)
                            (make-entry
                             title: (make-title
                                     (page-meta-ref pg "title" "[untitled]"))
                             links: (list (make-link type: 'html
                                                     uri: (page-url pg)))
                             id: (page-url pg)
                             updated: (page-guess-updated pg)
                             ;;published:
                             content:
                             `(atom:content (@ (type "html"))
                                            ,(cdr (page-dest pg)))))
                          pages)))))))))

;;; Collecting pages from a directory

(define (collect-pages dir ext)
  (map file->page
       (glob (make-pathname dir "*" ext))))

;;; Publishing

(define (apply-template template pages)
  (apply template (if (list? pages)
                      pages
                      (list pages))))

;;; Configuration

(define output-directory
  (make-parameter "out/"))

(define site-name
  (make-parameter "[A boudin web site]"))

(define site-base-url
  (make-parameter "https://example.com/"))

(define site-author
  (make-parameter "nobody"))

(define site-rights
  (make-parameter (string-append "(C) " (site-author))))

;;; Options & Operands (SRFI 37)

(define (die error-code message . args)
  (with-output-to-port (current-error-port)
    (apply print message args)
    (exit error-code)))

(define usage #<<END
Usage: boudin [OPTIONS]
Options:
-h, --help               show this help and exit
-C dir, --directory dir  build site in DIR instead of current directory
END
)

(define opt/help
  (option '(#\h "help")                 ; Names
          #f                            ; Required arg?
          #f                            ; Optional arg?
          (lambda _                          ; Option proc (opt name arg seeds ...)
            (with-output-to-port (current-error-port)
              (lambda () (print usage)))
            (exit))))

(define opt/directory
  (option '(#\C "directory") #t #f
          (lambda (opt name arg seeds)
            (if (directory-exists? arg)
                (change-directory arg)
                (error "Directory doesn't exist" arg))
            seeds)))

(define (process-args args)
  (args-fold args
             ;; Options
             (list opt/help
                   opt/directory)
             ;; Unrecognized option proc (option name arg seeds ...)
             (lambda (_ name _ _)
               (die 1 "Unrecognized option: -" name "\n" usage))
             ;; Operand proc (operand seeds ...)
             (lambda (name _)
               (die 1 "Unrecognized operand: " name "\n" usage))
             ;; Seeds
             '()))

;;; Main entry point

(define (main args)
  (process-args args)
  ;; TODO ...
  #f)

(cond-expand
  ((or chicken-script compiling)
   (main (command-line-arguments)))
  (else))