From f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Fri, 7 Jul 2023 23:59:04 -0500
Subject: Initial commit
---
boudin.scm | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
test.html | 19 +++
2 files changed, 433 insertions(+)
create mode 100755 boudin.scm
create mode 100644 test.html
diff --git a/boudin.scm b/boudin.scm
new file mode 100755
index 0000000..737cbf1
--- /dev/null
+++ b/boudin.scm
@@ -0,0 +1,414 @@
+#!/bin/sh
+#| -*- scheme -*-
+exec csi -R r7rs -s "$0" "$@"
+boudin --- a little static site generator
+|#
+
+(import (chicken file)
+ (chicken file posix)
+ (chicken irregex)
+ (chicken pathname)
+ (chicken port)
+ (chicken process-context)
+ (chicken random)
+ (chicken string)
+ (chicken time posix)
+ (chicanery)
+ (atom)
+ (html-parser)
+ (srfi 37)
+ (srfi 152)
+ (sxpath))
+
+;;; Transformations
+;; A static site generator can be thought of largely as two sets of
+;; transformations: one transforming given input content to output content, and
+;; another transforming source paths to destination paths. Since both, for my
+;; purposes, are strings, I have a generic function that can perform both
+;; transformations.
+
+(define (transform str . procs) #| string (string ->string) ... -> string
+ Apply PROCS to STR, left-to-right, and return the result.
+ Each PROC will be called with its predecessor's output, and should take a
+ string as input and return a string. |#
+ (if (null? procs)
+ str
+ (apply transform ((car procs) str) (cdr procs))))
+
+;;; Path transformations
+
+(define (indexify path) #| path -> path
+ Replace the PATH's extension with "/index.html".
+ |#
+ (make-pathname (pathname-strip-extension path)
+ "index"
+ "html"))
+
+(define (transform-path path outdir) #| path => path
+ Transform PATH according to boudin's needs. |#
+ (transform path
+ normalize-pathname
+ (lambda (p) (pathname-replace-directory p outdir))
+ indexify))
+
+;;; Content transformations
+
+(define (split-paragraphs str) #| string -> (list string ...)
+ Split STR into paragraphs.
+ A paragraph is a contiguous series of text lines separated from other
+ paragraphs by at least 2 newline \n characters. This procedure collapses
+ inter-paragraph space. |#
+ (let loop ((lines (string-split str "\n"))
+ (par '())
+ (acc '()))
+ (cond
+ ((and (null? lines) ; base case: no more lines
+ (null? par)) ; ... or pending paragraph
+ (reverse acc))
+ ((null? lines) ; add the final paragraph
+ (loop '() '() (cons (apply string-append (reverse par)) acc)))
+ ((equal? (car lines) "") ; paragraph break
+ (loop (cdr lines)
+ '()
+ (cons (apply string-append (reverse par)) acc)))
+ (else ; line break
+ (loop (cdr lines)
+ (cons (string-append (car lines) "\n") par)
+ acc)))))
+
+(define (wrap-paragraphs str) #| string -> string
+ Wrap naked paragraphs of STR in
tags.
+ A 'naked' paragraph is one that doesn't begin with '<' (after optional
+ beginning whitespace). |#
+ (let loop ((pars (map string-trim (split-paragraphs str)))
+ (acc '()))
+ (cond
+ ((null? pars)
+ (apply string-append (reverse acc)))
+ ((zero? (string-length (car pars)))
+ (loop (cdr pars)
+ acc))
+ ((eq? #\< (string-ref (car pars) 0))
+ (loop (cdr pars)
+ (cons (car pars)
+ acc)))
+ (else
+ (loop (cdr pars)
+ (cons (string-append "
" (car pars) "
\n")
+ acc))))))
+
+(define (expand-string str) #| string -> string
+ Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
+ Yes, this is as cursed as it sounds.
+
+ To make it slightly less so, all # are duplicated to escape them, except for
+ those before ( and {. To escape /those/, double them. |#
+ (let* ((escaped (irregex-replace/all
+ '(or (: #\# #\# (look-ahead (or #\{ #\()))
+ (: #\# (look-ahead (~ #\{ #\()))
+ (: #\# eos))
+ str
+ "##"))
+ (delim (let loop ((attempt (number->string (pseudo-random-real))))
+ (if (irregex-search attempt str)
+ (loop (number->string (pseudo-random-real)))
+ attempt)))
+ (template (make-concatenated-port
+ (open-input-string (string-append "#<#" delim "\n"))
+ (open-input-string escaped)
+ (open-input-string (string-append "\n" delim "\n"))))
+ (expanded (let ((x (open-output-string)))
+ (display (eval (read template)
+ (interaction-environment))
+ x)
+ (get-output-string x))))
+ (irregex-replace/all '(: "#"
+ (* whitespace))
+ expanded
+ "")))
+
+(define (transform-content content) #| string -> string
+ Transform CONTENT according to boudin's needs.
+ This is the raw html, and will still need to be processed to extract metadata
+ and to be further wrapped in a template. |#
+ (transform content
+ expand-string
+ wrap-paragraphs))
+
+;;; Pages
+;; A is a record type that wraps the two transformations outlined above.
+;; It also includes the extracted metadata from the page for processing.
+
+(define-record-type
+ (make-page url meta source dest source-path dest-path)
+ page?
+ (url page-url (setter page-url))
+ (meta page-meta (setter page-meta))
+ (source page-source)
+ (dest page-dest (setter page-dest))
+ (source-path page-source-path)
+ (dest-path page-dest-path (setter page-dest-path)))
+
+(define (%read-port port)
+ (let ((chunk-size 512))
+ (let loop ((next (read-string chunk-size port))
+ (blank? #f)
+ (acc '()))
+ (cond
+ ((or (eof-object? next)
+ (and blank? (equal? next "")))
+ (close-input-port port)
+ (apply string-append (reverse acc)))
+ ((equal? next "")
+ (loop (read-string chunk-size port)
+ #t
+ (cons next acc)))
+ (else
+ (loop (read-string chunk-size port)
+ blank?
+ (cons next acc)))))))
+
+(define read-port
+ (case-lambda
+ (() (%read-port (current-input-port)))
+ ((p) (%read-port p))))
+
+(define (file->page file) #| string ->
+ Convert FILE to an sxml tree after transforming it.
+ This procedure returns both the sxml of the transformed content, but that
+ page's metadata, too. |#
+ (let* ((source (with-input-from-file file read-port))
+ (dest (html->sxml (transform-content source))))
+ (make-page (pathname-directory (transform-path file (site-base-url)))
+ (extract-meta dest)
+ source
+ dest
+ file
+ (transform-path file (output-directory)))))
+
+(define (extract-meta tree) #| sxml -> alist
+ Extract metadata from TREE's comments.
+ Returns an alist of (key . value) pairs where keys and values are strings. |#
+ (let loop ((tree tree)
+ (acc '()))
+ (cond
+ ((or (atom? tree)
+ (null? tree))
+ (reverse acc))
+ ((and (list? (car tree))
+ (eq? (caar tree) '*COMMENT*))
+ (loop (cdr tree)
+ (let* ((comment (string-trim-both (cadar tree)))
+ (lines (string-split comment "\n")))
+ (map (lambda (l)
+ (let ((kv (string-split l ":")))
+ (cons (string-trim-both (car kv))
+ (string-trim
+ (string-intersperse (cdr kv) ":")))))
+ lines))))
+ ((list? (car tree))
+ (loop (cdr tree)
+ (let ((subtree (loop (car tree) '())))
+ (if (null? subtree)
+ acc
+ (cons subtree acc)))))
+ (else (loop (cdr tree) acc)))))
+
+(define (meta-ref meta key default) #| alist string string -> >
+ Get KEY's value from META, or DEFAULT if it doesn't exist.
+ DEFAULT is required because I think it's a good idea to require it. |#
+ (let ((x (assoc key meta)))
+ (if x (cdr x) default)))
+
+(define (page-meta-ref page key default) #| string string -> >
+ Get KEY's value from PAGE's meta, or DEFAULT.
+ |#
+ (let ((meta (page-meta page)))
+ (meta-ref meta key default)))
+
+;;; Time
+;; Time really only matters in feeds ... but it really does matter. So I need a
+;; few helper functions.
+
+(define publish-time ; this is a parameter so it's consistent across a run.
+ (make-parameter
+ (time->string (seconds->utc-time) "%FT%TZ")))
+
+(define (page-mtime page) #| -> time-string
+ Grab the mtime field from PAGE's source file. |#
+ (let ((file (page-source-path page)))
+ (and file
+ (file-exists? file)
+ (time->string (seconds->utc-time
+ (file-modification-time
+ file))))))
+
+(define (page-guess-updated page) #| -> time-string
+ Guess the "updated" property of PAGE. |#
+ (let ((meta-date (page-meta-ref page "date" #f)))
+ (if meta-date
+ ;; Attempt to parse the date metadata field.
+ (time->string (seconds->utc-time ; This double-conversion is /great/
+ (local-time->seconds
+ (or (string->time meta-date "%Y-%m-%d")
+ (string->time meta-date "%Y-%m-%d%n%H:%M")
+ (string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
+ ;; ... more ?
+ (or (page-mtime page)
+ (publish-time))))))
+ (or (page-mtime page)
+ (publish-time)))))
+
+;;; Templating
+;; Templating uses sxml to define a layout for pages and indeces (index.html,
+;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html
+;; comments and to further process the document.
+
+;; Each template has a default, but the user can override by defining templates
+;; in .config.scm (see below). All templates are function parameters that take
+;; a page's sxml tree (argument PAGE) and return a string.
+
+(define page-template
+ (make-parameter
+ (lambda (page)
+ (sxml->html
+ `(html (@ (lang "en"))
+ (head (title ,(page-meta-ref page "title" "[untitled]")))
+ (body ,(let ((title (page-meta-ref page "title" #f)))
+ (if title `(h1 ,title) ""))
+ ,@(cdr (page-dest page))))))))
+
+(define index-template
+ (make-parameter
+ (lambda pages
+ (sxml->html
+ `(html (@ (lang "en"))
+ (head (title ,(site-name)))
+ (body (h1 ,(site-name))
+ (ul
+ ,@(map (lambda (pg)
+ `(li (a (@ (href ,(page-url pg)))
+ ,(page-meta-ref pg
+ "title"
+ (pathname-file
+ (page-source-path pg))))))
+ pages))))))))
+
+(define feed-template
+ (make-parameter
+ (lambda pages
+ (with-output-to-string
+ (lambda ()
+ (write-atom-doc
+ (make-atom-doc
+ (make-feed
+ title: (make-title (site-name))
+ id: (site-base-url)
+ updated: (publish-time) ; I don't like these semantics ..
+ authors: (list (make-author name: (site-author)
+ uri: (site-base-url)))
+ links: (list (make-link type: 'html
+ uri-language: "en"
+ uri: (site-base-url))
+ (make-link relation: "self"
+ type: "application/atom+xml"
+ uri: (make-pathname
+ (site-base-url) "feed" "xml")))
+ rights: (make-rights (site-rights))
+ generator: (make-generator "Boudin"
+ uri: "https://git.acdw.net/boudin"
+ version: "0.1.0")
+ entries: (map (lambda (pg)
+ (make-entry
+ title: (make-title
+ (page-meta-ref pg "title" "[untitled]"))
+ links: (list (make-link type: 'html
+ uri: (page-url pg)))
+ id: (page-url pg)
+ updated: (page-guess-updated pg)
+ ;;published:
+ content:
+ `(atom:content (@ (type "html"))
+ ,(cdr (page-dest pg)))))
+ pages)))))))))
+
+;;; Collecting pages from a directory
+
+(define (collect-pages dir ext)
+ (map file->page
+ (glob (make-pathname dir "*" ext))))
+
+;;; Publishing
+
+(define (apply-template template pages)
+ (apply template (if (list? pages)
+ pages
+ (list pages))))
+
+;;; Configuration
+
+(define output-directory
+ (make-parameter "out/"))
+
+(define site-name
+ (make-parameter "[A boudin web site]"))
+
+(define site-base-url
+ (make-parameter "https://example.com/"))
+
+(define site-author
+ (make-parameter "nobody"))
+
+(define site-rights
+ (make-parameter (string-append "(C) " (site-author))))
+
+;;; Options & Operands (SRFI 37)
+
+(define opt/help
+ (option '(#\h "help") ; Names
+ #f ; Required arg?
+ #f ; Optional arg?
+ (lambda _ ; Option proc (opt name arg seeds ...)
+ (with-output-to-port (current-error-port)
+ (lambda ()
+ (print "Usage: boudin [OPTIONS]\n"
+ "Options:\n"
+ "-h, --help show this help and exit\n"
+ "-C dir, --directory dir\n"
+ " build site in DIR instead of current directory"
+ )))
+ (exit))))
+
+(define opt/directory
+ (option '(#\C "directory") #t #f
+ (lambda (opt name arg seeds)
+ (if (directory-exists? arg)
+ (change-directory arg)
+ (error "Directory doesn't exist" arg))
+ seeds)))
+
+(define (process-args args)
+ (args-fold args
+ ;; Options
+ (list opt/help
+ opt/directory)
+ ;; Unrecognized option proc (option name arg seeds ...)
+ (lambda (_ name _ _)
+ (error "Unrecognized option" name))
+ ;; Operand proc (operand seeds ...)
+ (lambda (name _)
+ (error "Bad operand" name))
+ ;; Seeds
+ '()))
+
+;;; Main entry point
+
+(define (main args)
+ (process-args args)
+
+ #f)
+
+(cond-expand
+ ((or chicken-script compiling)
+ (main (command-line-arguments)))
+ (else))
diff --git a/test.html b/test.html
new file mode 100644
index 0000000..6348f0a
--- /dev/null
+++ b/test.html
@@ -0,0 +1,19 @@
+
+
+some html
+(without p tags)
+
+Here is a test paragraph. example link.
+
+Here's another. I wonder if it'll just do the thing .. or whatever. Maybe I
+should try to make it multiple lines, as well.
+
+
+ -
+ one plus two is #(+ 1 2).
+
+ - two
+
--
cgit 1.4.1-21-gabe81