#!/bin/sh
#| -*- scheme -*-
exec csi -R r7rs -s "$0" "$@"
boudin --- a little static site generator
|#
(import (chicken file)
(chicken file posix)
(chicken irregex)
(chicken pathname)
(chicken port)
(chicken process-context)
(chicken random)
(chicken string)
(chicken time posix)
(chicanery)
(atom)
(html-parser)
(scss)
(srfi 37)
(srfi 152)
(sxml-serializer)
(sxpath))
;;; Utilities
(define el sxml->html)
;;; Transformations
;; A static site generator can be thought of largely as two sets of
;; transformations: one transforming given input content to output content, and
;; another transforming source paths to destination paths. Since both, for my
;; purposes, are strings, I have a generic function that can perform both
;; transformations.
(define (transform str . procs) #| string (string ->string) ... -> string
Apply PROCS to STR, left-to-right, and return the result.
Each PROC will be called with its predecessor's output, and should take a
string as input and return a string. |#
(if (null? procs)
str
(apply transform ((car procs) str) (cdr procs))))
;;; Path transformations
(define (indexify path) #| path -> path
Replace the PATH's extension with "/index.html".
|#
(make-pathname (pathname-strip-extension path)
"index"
"html"))
(define (slugify path)
(transform path
normalize-pathname
;; XXX: this should be much more robust
(lambda (p) (pathname-replace-directory p "/"))
pathname-strip-extension
(lambda (p) (string-append p "/"))))
(define (transform-path path outdir) #| path => path
Transform PATH according to boudin's needs. |#
(transform path
normalize-pathname
(lambda (p) (pathname-replace-directory p outdir))
indexify))
;;; Content transformations
(define (split-paragraphs str) #| string -> (list string ...)
Split STR into paragraphs.
A paragraph is a contiguous series of text lines separated from other
paragraphs by at least 2 newline \n characters. This procedure collapses
inter-paragraph space. |#
(let loop ((lines (string-split str "\n"))
(par '())
(acc '()))
(cond
((and (null? lines) ; base case: no more lines
(null? par)) ; ... or pending paragraph
(reverse acc))
((null? lines) ; add the final paragraph
(loop '() '() (cons (apply string-append (reverse par)) acc)))
((equal? (car lines) "") ; paragraph break
(loop (cdr lines)
'()
(cons (apply string-append (reverse par)) acc)))
(else ; line break
(loop (cdr lines)
(cons (string-append (car lines) "\n") par)
acc)))))
(define (wrap-paragraphs str) #| string -> string
Wrap naked paragraphs of STR in
tags.
A 'naked' paragraph is one that doesn't begin with '<' (after optional
beginning whitespace). |#
(let loop ((pars (map string-trim (split-paragraphs str)))
(acc '()))
(cond
((null? pars)
(apply string-append (reverse acc)))
((zero? (string-length (car pars)))
(loop (cdr pars)
acc))
((eq? #\< (string-ref (car pars) 0))
(loop (cdr pars)
(cons (car pars)
acc)))
(else
(loop (cdr pars)
(cons (string-append "
" (car pars) "
\n")
acc))))))
(define (preprocess-text str) #| string -> string
Preprocess STR before passing it to `expand-string'.
For example, replace all '#' with '##' unless followed by a '{' or '('.
|#
(set! str (irregex-replace/all
'(or (: #\# #\# (look-ahead (or #\{ #\()))
(: #\# (look-ahead (~ #\{ #\()))
(: #\# eos))
str
"##"))
#; (set! str (irregex-replace/all ; XXX: missing end paren
'(: #\@ #\()
str
"#(sxml->html `("))
;; Return transformed string
str)
(define (expand-string str) #| string -> string
Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
Yes, this is as cursed as it sounds.
To make it slightly less so, all # are duplicated to escape them, except for
those before ( and {. To escape /those/, double them. |#
(let* ((delim (let loop ((attempt (number->string (pseudo-random-real))))
(if (irregex-search attempt str)
(loop (number->string (pseudo-random-real)))
attempt)))
(template (make-concatenated-port
(open-input-string (string-append "#<#" delim "\n"))
(open-input-string (preprocess-text str))
(open-input-string (string-append "\n" delim "\n"))))
(expanded (with-output-to-string
(lambda ()
(display (eval (read template)
(interaction-environment)))))))
(irregex-replace/all '(: "#"
(* whitespace))
expanded
"")))
(define (transform-content content) #| string -> string
Transform CONTENT according to boudin's needs.
This is the raw html, and will still need to be processed to extract metadata
and to be further wrapped in a template. |#
(transform content
expand-string
wrap-paragraphs))
;;; Pages
;; A is a record type that wraps the two transformations outlined above.
;; It also includes the extracted metadata from the page for processing.
(define-record-type
(make-page slug meta source dest source-path dest-path)
page?
(slug page-slug (setter page-slug))
(meta page-meta (setter page-meta))
(source page-source)
(dest page-dest (setter page-dest))
(source-path page-source-path)
(dest-path page-dest-path (setter page-dest-path)))
(define (page-url page)
(normalize-pathname
(make-pathname (list (site-base-url)
(page-slug page))
#f)))
(define (%read-port port) #| port -> string
Read PORT until it hits eof and return the results as a string.
|#
(let ((chunk-size 512))
(let loop ((next (read-string chunk-size port))
(blank? #f)
(acc '()))
(cond
((or (eof-object? next)
(and blank? (equal? next "")))
(close-input-port port)
(apply string-append (reverse acc)))
((equal? next "")
(loop (read-string chunk-size port)
#t
(cons next acc)))
(else
(loop (read-string chunk-size port)
blank?
(cons next acc)))))))
(define read-port #| (optional port) -> string
Read PORT completely, returning the results as a string.
PORT defaults to `current-input-port'.
|#
(case-lambda
(() (%read-port (current-input-port)))
((p) (%read-port p))))
(define (file->page file) #| string ->
Convert FILE to an sxml tree after transforming it.
This procedure returns both the sxml of the transformed content, but that
page's metadata, too. |#
(let* ((source (with-input-from-file file read-port))
(dest (html->sxml (transform-content source))))
(make-page (slugify file)
(extract-meta dest)
source
dest
file
(transform-path file (build-directory)))))
(define (extract-meta tree) #| sxml -> alist
Extract metadata from TREE's comments.
Returns an alist of (key . value) pairs where keys and values are strings. |#
(let loop ((tree tree)
(acc '()))
(cond
((or (atom? tree)
(null? tree))
(reverse acc))
((and (list? (car tree))
(eq? (caar tree) '*COMMENT*))
(loop (cdr tree)
(let* ((comment (string-trim-both (cadar tree)))
(lines (string-split comment "\n")))
(map (lambda (l)
(let ((kv (string-split l ":")))
(cons (string-trim-both (car kv))
(string-trim
(string-intersperse (cdr kv) ":")))))
lines))))
((list? (car tree))
(loop (cdr tree)
(let ((subtree (loop (car tree) '())))
(if (null? subtree)
acc
(cons subtree acc)))))
(else (loop (cdr tree) acc)))))
(define (meta-ref meta key default) #| alist string string -> >
Get KEY's value from META, or DEFAULT if it doesn't exist.
DEFAULT is required because I think it's a good idea to require it. |#
(let ((x (assoc key meta)))
(if x (cdr x) default)))
(define (page-meta-ref page key default) #| string string -> >
Get KEY's value from PAGE's meta, or DEFAULT.
|#
(let ((meta (page-meta page)))
(meta-ref meta key default)))
;;; Time
;; Time really only matters in feeds ... but it really does matter. So I need a
;; few helper functions.
(define publish-time ; this is a parameter so it's consistent across a run.
(make-parameter
(time->string (seconds->utc-time) "%FT%TZ")))
(define (page-mtime page) #| -> time-string
Grab the mtime field from PAGE's source file. |#
(let ((file (page-source-path page)))
(and file
(file-exists? file)
(time->string (seconds->utc-time
(file-modification-time
file))))))
(define (page-guess-updated page) #| -> time-string
Guess the "updated" property of PAGE. |#
(let ((meta-date (page-meta-ref page "date" #f)))
(if meta-date
;; Attempt to parse the date metadata field.
(time->string (seconds->utc-time ; This double-conversion is /great/
(local-time->seconds
(or (string->time meta-date "%Y-%m-%d")
(string->time meta-date "%Y-%m-%d%n%H:%M")
(string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
;; ... more ?
(or (page-mtime page)
(publish-time))))))
(or (page-mtime page)
(publish-time)))))
;;; Templating
;; Templating uses sxml to define a layout for pages and indeces (index.html,
;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html
;; comments and to further process the document.
;; Each template has a default, but the user can override by defining templates
;; in .config.scm (see below). All templates are function parameters that take
;; a page's sxml tree (argument PAGE) and return a string.
(define page-template
(make-parameter
(lambda (page)
`(html (@ (lang "en"))
(head (title ,(page-meta-ref page "title" "[untitled]"))
(link (@ (href "../style.css") ; relative
(rel "stylesheet")))
(meta (@ (name "viewport")
(content "initial-scale=1.0"))))
(body ,(let ((title (page-meta-ref page "title" #f)))
(if title `(h1 ,title) ""))
,@(cdr (page-dest page)))))))
(define index-template
(make-parameter
(lambda pages
`(html (@ (lang "en"))
(head (title ,(site-name))
(link (@ (href "./style.css") ; relative
(rel "stylesheet")))
(meta (@ (name "viewport")
(content "initial-scale=1.0"))))
(body (h1 ,(site-name))
(ul
,@(map (lambda (pg)
`(li (a (@ (href ,(page-slug pg)))
,(page-meta-ref pg
"title"
(pathname-file
(page-source-path pg))))))
;; TODO : sort pages
pages)))))))
(define feed-template
(make-parameter
(lambda pages
(make-atom-doc
(make-feed
title: (make-title (site-name))
id: (site-base-url)
updated: (publish-time) ; I don't like these semantics ..
authors: (list (make-author name: (site-author)
uri: (site-base-url)))
links: (list (make-link type: 'html
uri-language: "en"
uri: (site-base-url))
(make-link relation: "self"
type: "application/atom+xml"
uri: (make-pathname
(site-base-url) "feed" "xml")))
rights: (make-rights (site-rights))
generator: (make-generator "Boudin"
uri: "https://git.acdw.net/boudin"
version: "0.1.0")
entries: (map (lambda (pg)
(make-entry
title: (make-title
(page-meta-ref pg "title" "[untitled]"))
links: (list (make-link type: 'html
uri: (page-url pg)))
id: (page-url pg)
updated: (page-guess-updated pg)
;;published:
content: (make-content
(sxml->html (page-dest pg))
type: html)
#;`(atom:content "foo")))
pages))))))
;;; Publishing
(define (write-style)
(print-log "writing style")
(with-output-to-file (make-pathname (build-directory) "style" "css")
(lambda () (write-css (site-style)))))
(define (write-page page)
(print-log "writing " (or (page-meta-ref page "title" #f)
(page-source-path page)))
(create-directory (pathname-directory (page-dest-path page)) 'parents)
(with-output-to-file (page-dest-path page)
(lambda () (sxml-display-as-html ((page-template) page)))))
(define (write-index pages)
(print-log "writing index")
(with-output-to-file (make-pathname (build-directory) "index" "html")
(lambda () (sxml-display-as-html (apply (index-template) pages)))))
(define (write-feed pages)
(print-log "writing feed")
(serialize-sxml (apply (feed-template) pages)
output: (make-pathname (build-directory) "feed" "xml")
cdata-section-elements: '(atom:content)
ns-prefixes: `((*default* . "http://www.w3.org/2005/Atom")
(*default* . "http://www.w3.org/1999/xhtml")
. ,(atom-ns-prefixes))
allow-prefix-redeclarations: #t
))
;;; Configuration
;; Build configuration
(define build-directory
(make-parameter "out/"))
(define build-config
(make-parameter "config.scm"))
;; Site configuration
(define site-name
(make-parameter "[A boudin web site]"))
(define site-base-url
(make-parameter "https://example.com/"))
(define site-author
(make-parameter "nobody"))
(define site-rights
(make-parameter (string-append "(C) " (site-author))))
(define site-style
(make-parameter
`(css+ (body (font "20px/1.4 sans-serif")
(max-width "48em")
(padding "1em")
(margin auto)))))
;;; Options & Operands (SRFI 37)
(define (print-log . xs)
(with-output-to-port (current-error-port)
(lambda () (apply print xs))))
(define (die error-code . xs)
(apply print-log xs)
(exit error-code))
(define usage
(string-append
"Usage: boudin [OPTIONS] FILE ...\n"
"Options:\n"
"-h, --help show this help and exit\n"
"-o DIR, --output DIR build site to DIR (default: out/)\n"
"Operands:\n"
"FILE ... files to build\n"))
(define opt/help
(option '(#\h "help") ; Names
#f ; Required arg?
#f ; Optional arg?
(lambda _ ; Option proc (opt name arg seeds ...)
(die 0 usage))))
(define opt/build-directory
(option '(#\o "output") #t #f
(lambda (opt name arg seeds)
(build-directory arg)
seeds)))
(define opt/build-config
(option '(#\c "config") #t #f
(lambda (opt name arg seeds)
(build-config arg)
seeds)))
(define (process-args args)
(let ((pages '()))
(args-fold args
;; Options
(list opt/help
opt/build-directory
opt/build-config)
;; Unrecognized option proc (option name arg seeds ...)
(lambda (_ name _ _)
(die 1 "Unrecognized option: -" name "\n" usage))
;; Operand proc (operand seeds ...)
(lambda (name seeds)
(if (file-exists? name)
(set! pages (cons name pages))
(die 2 "File not found: " name))
seeds)
;; Seeds
'())
pages))
;;; Main entry point
(define pages (make-parameter #f))
(define (main args)
(parameterize ((pages (process-args args)))
(unless (pages)
(die 0 "No pages to process. Abort."))
;; Check for files, create directories
(if (file-exists? (build-config))
(load (build-config))
(print-log "No config.scm found; using default config"))
(create-directory (build-directory) 'parents)
;; Convert pages to sxml. This needs to be done here because config.scm
;; might define functions used by the pages.
(pages (map file->page (pages)))
;; Build the site
(write-style) ; TODO: copy static assets (?)
(for-each write-page (pages))
(write-index (pages))
(write-feed (pages))))
(cond-expand
((or chicken-script compiling)
(main (command-line-arguments)))
(else))