#!/bin/sh #| -*- scheme -*- exec csi -R r7rs -s "$0" "$@" boudin --- a little static site generator |# (import (chicken file) (chicken file posix) (chicken irregex) (chicken pathname) (chicken port) (chicken process-context) (chicken random) (chicken string) (chicken time posix) (chicanery) (atom) (html-parser) (srfi 37) (srfi 152) (sxpath)) ;;; Transformations ;; A static site generator can be thought of largely as two sets of ;; transformations: one transforming given input content to output content, and ;; another transforming source paths to destination paths. Since both, for my ;; purposes, are strings, I have a generic function that can perform both ;; transformations. (define (transform str . procs) #| string (string ->string) ... -> string Apply PROCS to STR, left-to-right, and return the result. Each PROC will be called with its predecessor's output, and should take a string as input and return a string. |# (if (null? procs) str (apply transform ((car procs) str) (cdr procs)))) ;;; Path transformations (define (indexify path) #| path -> path Replace the PATH's extension with "/index.html". |# (make-pathname (pathname-strip-extension path) "index" "html")) (define (transform-path path outdir) #| path => path Transform PATH according to boudin's needs. |# (transform path normalize-pathname (lambda (p) (pathname-replace-directory p outdir)) indexify)) ;;; Content transformations (define (split-paragraphs str) #| string -> (list string ...) Split STR into paragraphs. A paragraph is a contiguous series of text lines separated from other paragraphs by at least 2 newline \n characters. This procedure collapses inter-paragraph space. |# (let loop ((lines (string-split str "\n")) (par '()) (acc '())) (cond ((and (null? lines) ; base case: no more lines (null? par)) ; ... or pending paragraph (reverse acc)) ((null? lines) ; add the final paragraph (loop '() '() (cons (apply string-append (reverse par)) acc))) ((equal? (car lines) "") ; paragraph break (loop (cdr lines) '() (cons (apply string-append (reverse par)) acc))) (else ; line break (loop (cdr lines) (cons (string-append (car lines) "\n") par) acc))))) (define (wrap-paragraphs str) #| string -> string Wrap naked paragraphs of STR in

tags. A 'naked' paragraph is one that doesn't begin with '<' (after optional beginning whitespace). |# (let loop ((pars (map string-trim (split-paragraphs str))) (acc '())) (cond ((null? pars) (apply string-append (reverse acc))) ((zero? (string-length (car pars))) (loop (cdr pars) acc)) ((eq? #\< (string-ref (car pars) 0)) (loop (cdr pars) (cons (car pars) acc))) (else (loop (cdr pars) (cons (string-append "

" (car pars) "

\n") acc)))))) (define (expand-string str) #| string -> string Expand STR by passing it in a port to CHICKEN's #<# string interpolation. Yes, this is as cursed as it sounds. To make it slightly less so, all # are duplicated to escape them, except for those before ( and {. To escape /those/, double them. |# (let* ((escaped (irregex-replace/all '(or (: #\# #\# (look-ahead (or #\{ #\())) (: #\# (look-ahead (~ #\{ #\())) (: #\# eos)) str "##")) (delim (let loop ((attempt (number->string (pseudo-random-real)))) (if (irregex-search attempt str) (loop (number->string (pseudo-random-real))) attempt))) (template (make-concatenated-port (open-input-string (string-append "#<#" delim "\n")) (open-input-string escaped) (open-input-string (string-append "\n" delim "\n")))) (expanded (let ((x (open-output-string))) (display (eval (read template) (interaction-environment)) x) (get-output-string x)))) (irregex-replace/all '(: "#" (* whitespace)) expanded ""))) (define (transform-content content) #| string -> string Transform CONTENT according to boudin's needs. This is the raw html, and will still need to be processed to extract metadata and to be further wrapped in a template. |# (transform content expand-string wrap-paragraphs)) ;;; Pages ;; A is a record type that wraps the two transformations outlined above. ;; It also includes the extracted metadata from the page for processing. (define-record-type (make-page url meta source dest source-path dest-path) page? (url page-url (setter page-url)) (meta page-meta (setter page-meta)) (source page-source) (dest page-dest (setter page-dest)) (source-path page-source-path) (dest-path page-dest-path (setter page-dest-path))) (define (%read-port port) (let ((chunk-size 512)) (let loop ((next (read-string chunk-size port)) (blank? #f) (acc '())) (cond ((or (eof-object? next) (and blank? (equal? next ""))) (close-input-port port) (apply string-append (reverse acc))) ((equal? next "") (loop (read-string chunk-size port) #t (cons next acc))) (else (loop (read-string chunk-size port) blank? (cons next acc))))))) (define read-port (case-lambda (() (%read-port (current-input-port))) ((p) (%read-port p)))) (define (file->page file) #| string -> Convert FILE to an sxml tree after transforming it. This procedure returns both the sxml of the transformed content, but that page's metadata, too. |# (let* ((source (with-input-from-file file read-port)) (dest (html->sxml (transform-content source)))) (make-page (pathname-directory (transform-path file (site-base-url))) (extract-meta dest) source dest file (transform-path file (output-directory))))) (define (extract-meta tree) #| sxml -> alist Extract metadata from TREE's comments. Returns an alist of (key . value) pairs where keys and values are strings. |# (let loop ((tree tree) (acc '())) (cond ((or (atom? tree) (null? tree)) (reverse acc)) ((and (list? (car tree)) (eq? (caar tree) '*COMMENT*)) (loop (cdr tree) (let* ((comment (string-trim-both (cadar tree))) (lines (string-split comment "\n"))) (map (lambda (l) (let ((kv (string-split l ":"))) (cons (string-trim-both (car kv)) (string-trim (string-intersperse (cdr kv) ":"))))) lines)))) ((list? (car tree)) (loop (cdr tree) (let ((subtree (loop (car tree) '()))) (if (null? subtree) acc (cons subtree acc))))) (else (loop (cdr tree) acc))))) (define (meta-ref meta key default) #| alist string string -> Get KEY's value from META, or DEFAULT if it doesn't exist. DEFAULT is required because I think it's a good idea to require it. |# (let ((x (assoc key meta))) (if x (cdr x) default))) (define (page-meta-ref page key default) #| string string -> Get KEY's value from PAGE's meta, or DEFAULT. |# (let ((meta (page-meta page))) (meta-ref meta key default))) ;;; Time ;; Time really only matters in feeds ... but it really does matter. So I need a ;; few helper functions. (define publish-time ; this is a parameter so it's consistent across a run. (make-parameter (time->string (seconds->utc-time) "%FT%TZ"))) (define (page-mtime page) #| -> time-string Grab the mtime field from PAGE's source file. |# (let ((file (page-source-path page))) (and file (file-exists? file) (time->string (seconds->utc-time (file-modification-time file)))))) (define (page-guess-updated page) #| -> time-string Guess the "updated" property of PAGE. |# (let ((meta-date (page-meta-ref page "date" #f))) (if meta-date ;; Attempt to parse the date metadata field. (time->string (seconds->utc-time ; This double-conversion is /great/ (local-time->seconds (or (string->time meta-date "%Y-%m-%d") (string->time meta-date "%Y-%m-%d%n%H:%M") (string->time meta-date "%Y-%m-%d%n%I:%M%n%p") ;; ... more ? (or (page-mtime page) (publish-time)))))) (or (page-mtime page) (publish-time))))) ;;; Templating ;; Templating uses sxml to define a layout for pages and indeces (index.html, ;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html ;; comments and to further process the document. ;; Each template has a default, but the user can override by defining templates ;; in .config.scm (see below). All templates are function parameters that take ;; a page's sxml tree (argument PAGE) and return a string. (define page-template (make-parameter (lambda (page) (sxml->html `(html (@ (lang "en")) (head (title ,(page-meta-ref page "title" "[untitled]"))) (body ,(let ((title (page-meta-ref page "title" #f))) (if title `(h1 ,title) "")) ,@(cdr (page-dest page)))))))) (define index-template (make-parameter (lambda pages (sxml->html `(html (@ (lang "en")) (head (title ,(site-name))) (body (h1 ,(site-name)) (ul ,@(map (lambda (pg) `(li (a (@ (href ,(page-url pg))) ,(page-meta-ref pg "title" (pathname-file (page-source-path pg)))))) pages)))))))) (define feed-template (make-parameter (lambda pages (with-output-to-string (lambda () (write-atom-doc (make-atom-doc (make-feed title: (make-title (site-name)) id: (site-base-url) updated: (publish-time) ; I don't like these semantics .. authors: (list (make-author name: (site-author) uri: (site-base-url))) links: (list (make-link type: 'html uri-language: "en" uri: (site-base-url)) (make-link relation: "self" type: "application/atom+xml" uri: (make-pathname (site-base-url) "feed" "xml"))) rights: (make-rights (site-rights)) generator: (make-generator "Boudin" uri: "https://git.acdw.net/boudin" version: "0.1.0") entries: (map (lambda (pg) (make-entry title: (make-title (page-meta-ref pg "title" "[untitled]")) links: (list (make-link type: 'html uri: (page-url pg))) id: (page-url pg) updated: (page-guess-updated pg) ;;published: content: `(atom:content (@ (type "html")) ,(cdr (page-dest pg))))) pages))))))))) ;;; Collecting pages from a directory (define (collect-pages dir ext) (map file->page (glob (make-pathname dir "*" ext)))) ;;; Publishing (define (apply-template template pages) (apply template (if (list? pages) pages (list pages)))) ;;; Configuration (define output-directory (make-parameter "out/")) (define site-name (make-parameter "[A boudin web site]")) (define site-base-url (make-parameter "https://example.com/")) (define site-author (make-parameter "nobody")) (define site-rights (make-parameter (string-append "(C) " (site-author)))) ;;; Options & Operands (SRFI 37) (define (die error-code message . args) (with-output-to-port (current-error-port) (apply print message args) (exit error-code))) (define usage #<