#!/bin/sh
#| -*- scheme -*-
exec csi -R r7rs -s "$0" "$@"
boudin --- a little static site generator
|#
(import (chicken file)
(chicken file posix)
(chicken irregex)
(chicken pathname)
(chicken port)
(chicken process-context)
(chicken random)
(chicken string)
(chicken time posix)
(chicanery)
(atom)
(html-parser)
(srfi 37)
(srfi 152)
(sxpath))
;;; Transformations
;; A static site generator can be thought of largely as two sets of
;; transformations: one transforming given input content to output content, and
;; another transforming source paths to destination paths. Since both, for my
;; purposes, are strings, I have a generic function that can perform both
;; transformations.
(define (transform str . procs) #| string (string ->string) ... -> string
Apply PROCS to STR, left-to-right, and return the result.
Each PROC will be called with its predecessor's output, and should take a
string as input and return a string. |#
(if (null? procs)
str
(apply transform ((car procs) str) (cdr procs))))
;;; Path transformations
(define (indexify path) #| path -> path
Replace the PATH's extension with "/index.html".
|#
(make-pathname (pathname-strip-extension path)
"index"
"html"))
(define (transform-path path outdir) #| path => path
Transform PATH according to boudin's needs. |#
(transform path
normalize-pathname
(lambda (p) (pathname-replace-directory p outdir))
indexify))
;;; Content transformations
(define (split-paragraphs str) #| string -> (list string ...)
Split STR into paragraphs.
A paragraph is a contiguous series of text lines separated from other
paragraphs by at least 2 newline \n characters. This procedure collapses
inter-paragraph space. |#
(let loop ((lines (string-split str "\n"))
(par '())
(acc '()))
(cond
((and (null? lines) ; base case: no more lines
(null? par)) ; ... or pending paragraph
(reverse acc))
((null? lines) ; add the final paragraph
(loop '() '() (cons (apply string-append (reverse par)) acc)))
((equal? (car lines) "") ; paragraph break
(loop (cdr lines)
'()
(cons (apply string-append (reverse par)) acc)))
(else ; line break
(loop (cdr lines)
(cons (string-append (car lines) "\n") par)
acc)))))
(define (wrap-paragraphs str) #| string -> string
Wrap naked paragraphs of STR in
tags.
A 'naked' paragraph is one that doesn't begin with '<' (after optional
beginning whitespace). |#
(let loop ((pars (map string-trim (split-paragraphs str)))
(acc '()))
(cond
((null? pars)
(apply string-append (reverse acc)))
((zero? (string-length (car pars)))
(loop (cdr pars)
acc))
((eq? #\< (string-ref (car pars) 0))
(loop (cdr pars)
(cons (car pars)
acc)))
(else
(loop (cdr pars)
(cons (string-append "
" (car pars) "
\n")
acc))))))
(define (expand-string str) #| string -> string
Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
Yes, this is as cursed as it sounds.
To make it slightly less so, all # are duplicated to escape them, except for
those before ( and {. To escape /those/, double them. |#
(let* ((escaped (irregex-replace/all
'(or (: #\# #\# (look-ahead (or #\{ #\()))
(: #\# (look-ahead (~ #\{ #\()))
(: #\# eos))
str
"##"))
(delim (let loop ((attempt (number->string (pseudo-random-real))))
(if (irregex-search attempt str)
(loop (number->string (pseudo-random-real)))
attempt)))
(template (make-concatenated-port
(open-input-string (string-append "#<#" delim "\n"))
(open-input-string escaped)
(open-input-string (string-append "\n" delim "\n"))))
(expanded (let ((x (open-output-string)))
(display (eval (read template)
(interaction-environment))
x)
(get-output-string x))))
(irregex-replace/all '(: "#"
(* whitespace))
expanded
"")))
(define (transform-content content) #| string -> string
Transform CONTENT according to boudin's needs.
This is the raw html, and will still need to be processed to extract metadata
and to be further wrapped in a template. |#
(transform content
expand-string
wrap-paragraphs))
;;; Pages
;; A is a record type that wraps the two transformations outlined above.
;; It also includes the extracted metadata from the page for processing.
(define-record-type
(make-page url meta source dest source-path dest-path)
page?
(url page-url (setter page-url))
(meta page-meta (setter page-meta))
(source page-source)
(dest page-dest (setter page-dest))
(source-path page-source-path)
(dest-path page-dest-path (setter page-dest-path)))
(define (%read-port port)
(let ((chunk-size 512))
(let loop ((next (read-string chunk-size port))
(blank? #f)
(acc '()))
(cond
((or (eof-object? next)
(and blank? (equal? next "")))
(close-input-port port)
(apply string-append (reverse acc)))
((equal? next "")
(loop (read-string chunk-size port)
#t
(cons next acc)))
(else
(loop (read-string chunk-size port)
blank?
(cons next acc)))))))
(define read-port
(case-lambda
(() (%read-port (current-input-port)))
((p) (%read-port p))))
(define (file->page file) #| string ->
Convert FILE to an sxml tree after transforming it.
This procedure returns both the sxml of the transformed content, but that
page's metadata, too. |#
(let* ((source (with-input-from-file file read-port))
(dest (html->sxml (transform-content source))))
(make-page (pathname-directory (transform-path file (site-base-url)))
(extract-meta dest)
source
dest
file
(transform-path file (output-directory)))))
(define (extract-meta tree) #| sxml -> alist
Extract metadata from TREE's comments.
Returns an alist of (key . value) pairs where keys and values are strings. |#
(let loop ((tree tree)
(acc '()))
(cond
((or (atom? tree)
(null? tree))
(reverse acc))
((and (list? (car tree))
(eq? (caar tree) '*COMMENT*))
(loop (cdr tree)
(let* ((comment (string-trim-both (cadar tree)))
(lines (string-split comment "\n")))
(map (lambda (l)
(let ((kv (string-split l ":")))
(cons (string-trim-both (car kv))
(string-trim
(string-intersperse (cdr kv) ":")))))
lines))))
((list? (car tree))
(loop (cdr tree)
(let ((subtree (loop (car tree) '())))
(if (null? subtree)
acc
(cons subtree acc)))))
(else (loop (cdr tree) acc)))))
(define (meta-ref meta key default) #| alist string string -> >
Get KEY's value from META, or DEFAULT if it doesn't exist.
DEFAULT is required because I think it's a good idea to require it. |#
(let ((x (assoc key meta)))
(if x (cdr x) default)))
(define (page-meta-ref page key default) #| string string -> >
Get KEY's value from PAGE's meta, or DEFAULT.
|#
(let ((meta (page-meta page)))
(meta-ref meta key default)))
;;; Time
;; Time really only matters in feeds ... but it really does matter. So I need a
;; few helper functions.
(define publish-time ; this is a parameter so it's consistent across a run.
(make-parameter
(time->string (seconds->utc-time) "%FT%TZ")))
(define (page-mtime page) #| -> time-string
Grab the mtime field from PAGE's source file. |#
(let ((file (page-source-path page)))
(and file
(file-exists? file)
(time->string (seconds->utc-time
(file-modification-time
file))))))
(define (page-guess-updated page) #| -> time-string
Guess the "updated" property of PAGE. |#
(let ((meta-date (page-meta-ref page "date" #f)))
(if meta-date
;; Attempt to parse the date metadata field.
(time->string (seconds->utc-time ; This double-conversion is /great/
(local-time->seconds
(or (string->time meta-date "%Y-%m-%d")
(string->time meta-date "%Y-%m-%d%n%H:%M")
(string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
;; ... more ?
(or (page-mtime page)
(publish-time))))))
(or (page-mtime page)
(publish-time)))))
;;; Templating
;; Templating uses sxml to define a layout for pages and indeces (index.html,
;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html
;; comments and to further process the document.
;; Each template has a default, but the user can override by defining templates
;; in .config.scm (see below). All templates are function parameters that take
;; a page's sxml tree (argument PAGE) and return a string.
(define page-template
(make-parameter
(lambda (page)
(sxml->html
`(html (@ (lang "en"))
(head (title ,(page-meta-ref page "title" "[untitled]")))
(body ,(let ((title (page-meta-ref page "title" #f)))
(if title `(h1 ,title) ""))
,@(cdr (page-dest page))))))))
(define index-template
(make-parameter
(lambda pages
(sxml->html
`(html (@ (lang "en"))
(head (title ,(site-name)))
(body (h1 ,(site-name))
(ul
,@(map (lambda (pg)
`(li (a (@ (href ,(page-url pg)))
,(page-meta-ref pg
"title"
(pathname-file
(page-source-path pg))))))
pages))))))))
(define feed-template
(make-parameter
(lambda pages
(with-output-to-string
(lambda ()
(write-atom-doc
(make-atom-doc
(make-feed
title: (make-title (site-name))
id: (site-base-url)
updated: (publish-time) ; I don't like these semantics ..
authors: (list (make-author name: (site-author)
uri: (site-base-url)))
links: (list (make-link type: 'html
uri-language: "en"
uri: (site-base-url))
(make-link relation: "self"
type: "application/atom+xml"
uri: (make-pathname
(site-base-url) "feed" "xml")))
rights: (make-rights (site-rights))
generator: (make-generator "Boudin"
uri: "https://git.acdw.net/boudin"
version: "0.1.0")
entries: (map (lambda (pg)
(make-entry
title: (make-title
(page-meta-ref pg "title" "[untitled]"))
links: (list (make-link type: 'html
uri: (page-url pg)))
id: (page-url pg)
updated: (page-guess-updated pg)
;;published:
content:
`(atom:content (@ (type "html"))
,(cdr (page-dest pg)))))
pages)))))))))
;;; Collecting pages from a directory
(define (collect-pages dir ext)
(map file->page
(glob (make-pathname dir "*" ext))))
;;; Publishing
(define (apply-template template pages)
(apply template (if (list? pages)
pages
(list pages))))
;;; Configuration
(define output-directory
(make-parameter "out/"))
(define site-name
(make-parameter "[A boudin web site]"))
(define site-base-url
(make-parameter "https://example.com/"))
(define site-author
(make-parameter "nobody"))
(define site-rights
(make-parameter (string-append "(C) " (site-author))))
;;; Options & Operands (SRFI 37)
(define opt/help
(option '(#\h "help") ; Names
#f ; Required arg?
#f ; Optional arg?
(lambda _ ; Option proc (opt name arg seeds ...)
(with-output-to-port (current-error-port)
(lambda ()
(print "Usage: boudin [OPTIONS]\n"
"Options:\n"
"-h, --help show this help and exit\n"
"-C dir, --directory dir\n"
" build site in DIR instead of current directory"
)))
(exit))))
(define opt/directory
(option '(#\C "directory") #t #f
(lambda (opt name arg seeds)
(if (directory-exists? arg)
(change-directory arg)
(error "Directory doesn't exist" arg))
seeds)))
(define (process-args args)
(args-fold args
;; Options
(list opt/help
opt/directory)
;; Unrecognized option proc (option name arg seeds ...)
(lambda (_ name _ _)
(error "Unrecognized option" name))
;; Operand proc (operand seeds ...)
(lambda (name _)
(error "Bad operand" name))
;; Seeds
'()))
;;; Main entry point
(define (main args)
(process-args args)
#f)
(cond-expand
((or chicken-script compiling)
(main (command-line-arguments)))
(else))