#!/bin/sh #| -*- scheme -*- exec csi -R r7rs -s "$0" "$@" boudin --- a little static site generator |# (import (chicken file) (chicken file posix) (chicken irregex) (chicken pathname) (chicken port) (chicken process-context) (chicken random) (chicken string) (chicken time posix) (chicanery) (atom) (html-parser) (scss) (srfi 37) (srfi 152) (sxml-serializer) (sxpath)) ;;; Transformations ;; A static site generator can be thought of largely as two sets of ;; transformations: one transforming given input content to output content, and ;; another transforming source paths to destination paths. Since both, for my ;; purposes, are strings, I have a generic function that can perform both ;; transformations. (define (transform str . procs) #| string (string ->string) ... -> string Apply PROCS to STR, left-to-right, and return the result. Each PROC will be called with its predecessor's output, and should take a string as input and return a string. |# (if (null? procs) str (apply transform ((car procs) str) (cdr procs)))) ;;; Path transformations (define (indexify path) #| path -> path Replace the PATH's extension with "/index.html". |# (make-pathname (pathname-strip-extension path) "index" "html")) (define (transform-path path outdir) #| path => path Transform PATH according to boudin's needs. |# (transform path normalize-pathname (lambda (p) (pathname-replace-directory p outdir)) indexify)) ;;; Content transformations (define (split-paragraphs str) #| string -> (list string ...) Split STR into paragraphs. A paragraph is a contiguous series of text lines separated from other paragraphs by at least 2 newline \n characters. This procedure collapses inter-paragraph space. |# (let loop ((lines (string-split str "\n")) (par '()) (acc '())) (cond ((and (null? lines) ; base case: no more lines (null? par)) ; ... or pending paragraph (reverse acc)) ((null? lines) ; add the final paragraph (loop '() '() (cons (apply string-append (reverse par)) acc))) ((equal? (car lines) "") ; paragraph break (loop (cdr lines) '() (cons (apply string-append (reverse par)) acc))) (else ; line break (loop (cdr lines) (cons (string-append (car lines) "\n") par) acc))))) (define (wrap-paragraphs str) #| string -> string Wrap naked paragraphs of STR in

tags. A 'naked' paragraph is one that doesn't begin with '<' (after optional beginning whitespace). |# (let loop ((pars (map string-trim (split-paragraphs str))) (acc '())) (cond ((null? pars) (apply string-append (reverse acc))) ((zero? (string-length (car pars))) (loop (cdr pars) acc)) ((eq? #\< (string-ref (car pars) 0)) (loop (cdr pars) (cons (car pars) acc))) (else (loop (cdr pars) (cons (string-append "

" (car pars) "

\n") acc)))))) (define (preprocess-text str) #| string -> string Preprocess STR before passing it to `expand-string'. For example, replace all '#' with '##' unless followed by a '{' or '('. |# (set! str (irregex-replace/all '(or (: #\# #\# (look-ahead (or #\{ #\())) (: #\# (look-ahead (~ #\{ #\())) (: #\# eos)) str "##")) #; (set! str (irregex-replace/all ; XXX: missing end paren '(: #\@ #\() str "#(sxml->html `(")) ;; Return transformed string str) (define (expand-string str) #| string -> string Expand STR by passing it in a port to CHICKEN's #<# string interpolation. Yes, this is as cursed as it sounds. To make it slightly less so, all # are duplicated to escape them, except for those before ( and {. To escape /those/, double them. |# (let* ((delim (let loop ((attempt (number->string (pseudo-random-real)))) (if (irregex-search attempt str) (loop (number->string (pseudo-random-real))) attempt))) (template (make-concatenated-port (open-input-string (string-append "#<#" delim "\n")) (open-input-string (preprocess-text str)) (open-input-string (string-append "\n" delim "\n")))) (expanded (with-output-to-string (lambda () (display (eval (read template) (interaction-environment))))))) (irregex-replace/all '(: "#" (* whitespace)) expanded ""))) (define (transform-content content) #| string -> string Transform CONTENT according to boudin's needs. This is the raw html, and will still need to be processed to extract metadata and to be further wrapped in a template. |# (transform content expand-string wrap-paragraphs)) ;;; Pages ;; A is a record type that wraps the two transformations outlined above. ;; It also includes the extracted metadata from the page for processing. (define-record-type (make-page url meta source dest source-path dest-path) page? (url page-url (setter page-url)) (meta page-meta (setter page-meta)) (source page-source) (dest page-dest (setter page-dest)) (source-path page-source-path) (dest-path page-dest-path (setter page-dest-path))) (define (%read-port port) #| port -> string Read PORT until it hits eof and return the results as a string. |# (let ((chunk-size 512)) (let loop ((next (read-string chunk-size port)) (blank? #f) (acc '())) (cond ((or (eof-object? next) (and blank? (equal? next ""))) (close-input-port port) (apply string-append (reverse acc))) ((equal? next "") (loop (read-string chunk-size port) #t (cons next acc))) (else (loop (read-string chunk-size port) blank? (cons next acc))))))) (define read-port #| (optional port) -> string Read PORT completely, returning the results as a string. PORT defaults to `current-input-port'. |# (case-lambda (() (%read-port (current-input-port))) ((p) (%read-port p)))) (define (file->page file) #| string -> Convert FILE to an sxml tree after transforming it. This procedure returns both the sxml of the transformed content, but that page's metadata, too. |# (let* ((source (with-input-from-file file read-port)) (dest (html->sxml (transform-content source)))) (make-page (pathname-directory (transform-path file (site-base-url))) (extract-meta dest) source dest file (transform-path file (build-directory))))) (define (extract-meta tree) #| sxml -> alist Extract metadata from TREE's comments. Returns an alist of (key . value) pairs where keys and values are strings. |# (let loop ((tree tree) (acc '())) (cond ((or (atom? tree) (null? tree)) (reverse acc)) ((and (list? (car tree)) (eq? (caar tree) '*COMMENT*)) (loop (cdr tree) (let* ((comment (string-trim-both (cadar tree))) (lines (string-split comment "\n"))) (map (lambda (l) (let ((kv (string-split l ":"))) (cons (string-trim-both (car kv)) (string-trim (string-intersperse (cdr kv) ":"))))) lines)))) ((list? (car tree)) (loop (cdr tree) (let ((subtree (loop (car tree) '()))) (if (null? subtree) acc (cons subtree acc))))) (else (loop (cdr tree) acc))))) (define (meta-ref meta key default) #| alist string string -> Get KEY's value from META, or DEFAULT if it doesn't exist. DEFAULT is required because I think it's a good idea to require it. |# (let ((x (assoc key meta))) (if x (cdr x) default))) (define (page-meta-ref page key default) #| string string -> Get KEY's value from PAGE's meta, or DEFAULT. |# (let ((meta (page-meta page))) (meta-ref meta key default))) ;;; Time ;; Time really only matters in feeds ... but it really does matter. So I need a ;; few helper functions. (define publish-time ; this is a parameter so it's consistent across a run. (make-parameter (time->string (seconds->utc-time) "%FT%TZ"))) (define (page-mtime page) #| -> time-string Grab the mtime field from PAGE's source file. |# (let ((file (page-source-path page))) (and file (file-exists? file) (time->string (seconds->utc-time (file-modification-time file)))))) (define (page-guess-updated page) #| -> time-string Guess the "updated" property of PAGE. |# (let ((meta-date (page-meta-ref page "date" #f))) (if meta-date ;; Attempt to parse the date metadata field. (time->string (seconds->utc-time ; This double-conversion is /great/ (local-time->seconds (or (string->time meta-date "%Y-%m-%d") (string->time meta-date "%Y-%m-%d%n%H:%M") (string->time meta-date "%Y-%m-%d%n%I:%M%n%p") ;; ... more ? (or (page-mtime page) (publish-time)))))) (or (page-mtime page) (publish-time))))) ;;; Templating ;; Templating uses sxml to define a layout for pages and indeces (index.html, ;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html ;; comments and to further process the document. ;; Each template has a default, but the user can override by defining templates ;; in .config.scm (see below). All templates are function parameters that take ;; a page's sxml tree (argument PAGE) and return a string. (define page-template (make-parameter (lambda (page) `(html (@ (lang "en")) (head (title ,(page-meta-ref page "title" "[untitled]")) (link (@ (href "../style.css") ; relative (rel "stylesheet"))) (meta (@ (name "viewport") (content "initial-scale=1.0")))) (body ,(let ((title (page-meta-ref page "title" #f))) (if title `(h1 ,title) "")) ,@(cdr (page-dest page))))))) (define index-template (make-parameter (lambda pages `(html (@ (lang "en")) (head (title ,(site-name)) (link (@ (href "./style.css") ; relative (rel "stylesheet"))) (meta (@ (name "viewport") (content "initial-scale=1.0")))) (body (h1 ,(site-name)) (ul ,@(map (lambda (pg) `(li (a (@ (href ,(page-url pg))) ,(page-meta-ref pg "title" (pathname-file (page-source-path pg)))))) pages))))))) (define feed-template (make-parameter (lambda pages (make-atom-doc (make-feed title: (make-title (site-name)) id: (site-base-url) updated: (publish-time) ; I don't like these semantics .. authors: (list (make-author name: (site-author) uri: (site-base-url))) links: (list (make-link type: 'html uri-language: "en" uri: (site-base-url)) (make-link relation: "self" type: "application/atom+xml" uri: (make-pathname (site-base-url) "feed" "xml"))) rights: (make-rights (site-rights)) generator: (make-generator "Boudin" uri: "https://git.acdw.net/boudin" version: "0.1.0") entries: (map (lambda (pg) (make-entry title: (make-title (page-meta-ref pg "title" "[untitled]")) links: (list (make-link type: 'html uri: (page-url pg))) id: (page-url pg) updated: (page-guess-updated pg) ;;published: content: `(atom:content (@ (type "html")) ,(cdr (page-dest pg))))) pages)))))) ;;; Publishing (define (write-style) (print-log "writing style") (with-output-to-file (make-pathname (build-directory) "style" "css") (lambda () (write-css (site-style))))) (define (write-page page) (print-log "writing " (or (page-meta-ref page "title" #f) (page-source-path page))) (create-directory (pathname-directory (page-dest-path page)) 'parents) (with-output-to-file (page-dest-path page) (lambda () (sxml-display-as-html ((page-template) page))))) (define (write-index pages) (print-log "writing index") (with-output-to-file (make-pathname (build-directory) "index" "html") (lambda () (sxml-display-as-html (apply (index-template) pages))))) (define (write-feed pages) (print-log "writing feed") (serialize-sxml (apply (feed-template) pages) output: (make-pathname (build-directory) "feed" "xml") cdata-section-elements: '(atom:content) ns-prefixes: `((*default* . "http://www.w3.org/2005/Atom") (*default* . "http://www.w3.org/1999/xhtml") . ,(atom-ns-prefixes)) allow-prefix-redeclarations: #t )) ;;; Configuration ;; Build configuration (define build-directory (make-parameter "out/")) (define build-config (make-parameter "config.scm")) ;; Site configuration (define site-name (make-parameter "[A boudin web site]")) (define site-base-url (make-parameter "https://example.com/")) (define site-author (make-parameter "nobody")) (define site-rights (make-parameter (string-append "(C) " (site-author)))) (define site-style (make-parameter `(css+ (body (font "20px/1.4 sans-serif") (max-width "48em") (padding "1em") (margin auto))))) ;;; Options & Operands (SRFI 37) (define (print-log . xs) (with-output-to-port (current-error-port) (lambda () (apply print xs)))) (define (die error-code . xs) (apply print-log xs) (exit error-code)) (define usage (string-append "Usage: boudin [OPTIONS] FILE ...\n" "Options:\n" "-h, --help show this help and exit\n" "-o DIR, --output DIR build site to DIR (default: out/)\n" "Operands:\n" "FILE ... files to build\n")) (define opt/help (option '(#\h "help") ; Names #f ; Required arg? #f ; Optional arg? (lambda _ ; Option proc (opt name arg seeds ...) (die 0 usage)))) (define opt/build-directory (option '(#\o "output") #t #f (lambda (opt name arg seeds) (build-directory arg) seeds))) (define opt/build-config (option '(#\c "config") #t #f (lambda (opt name arg seeds) (build-config arg) seeds))) (define (process-args args) (let ((pages '())) (args-fold args ;; Options (list opt/help opt/build-directory opt/build-config) ;; Unrecognized option proc (option name arg seeds ...) (lambda (_ name _ _) (die 1 "Unrecognized option: -" name "\n" usage)) ;; Operand proc (operand seeds ...) (lambda (name seeds) (if (file-exists? name) (set! pages (cons (file->page name) pages)) (die 2 "Page not found: " name)) seeds) ;; Seeds '()) pages)) ;;; Main entry point (define pages (make-parameter #f)) (define (main args) (parameterize ((pages (process-args args))) (unless (pages) (die 0 "No pages to process. Abort.")) ;; Check for files, create directories (if (file-exists? (build-config)) (load (build-config)) (print-log "No config.scm found; using default config")) (create-directory (build-directory) 'parents) ;; Build the site (write-style) ; TODO: copy static assets (?) (for-each write-page (pages)) (write-index (pages)) (write-feed (pages)))) (cond-expand ((or chicken-script compiling) (main (command-line-arguments))) (else))