From f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb Mon Sep 17 00:00:00 2001
From: Case Duckworth
Date: Fri, 7 Jul 2023 23:59:04 -0500
Subject: Initial commit

---
 boudin.scm | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 414 insertions(+)
 create mode 100755 boudin.scm

(limited to 'boudin.scm')
diff --git a/boudin.scm b/boudin.scm
new file mode 100755
index 0000000..737cbf1
--- /dev/null
+++ b/boudin.scm
@@ -0,0 +1,414 @@
+#!/bin/sh
+#| -*- scheme -*-
+exec csi -R r7rs -s "$0" "$@"
+boudin --- a little static site generator
+|#
+
+(import (chicken file)
+        (chicken file posix)
+        (chicken irregex)
+        (chicken pathname)
+        (chicken port)
+        (chicken process-context)
+        (chicken random)
+        (chicken string)
+        (chicken time posix)
+        (chicanery)
+        (atom)
+        (html-parser)
+        (srfi 37)
+        (srfi 152)
+        (sxpath))
+
+;;; Transformations
+;; A static site generator can be thought of largely as two sets of
+;; transformations: one transforming given input content to output content, and
+;; another transforming source paths to destination paths.  Since both, for my
+;; purposes, are strings, I have a generic function that can perform both
+;; transformations.
+
+(define (transform str . procs) #| string (string ->string) ... -> string
+  Apply PROCS to STR, left-to-right, and return the result.
+  Each PROC will be called with its predecessor's output, and should take a
+  string as input and return a string. |#
+  (if (null? procs)
+      str
+      (apply transform ((car procs) str) (cdr procs))))
+
+;;; Path transformations
+
+(define (indexify path) #| path -> path
+  Replace the PATH's extension with "/index.html".
+  |#
+  (make-pathname (pathname-strip-extension path)
+                 "index"
+                 "html"))
+
+(define (transform-path path outdir) #| path => path
+  Transform PATH according to boudin's needs. |#
+  (transform path
+             normalize-pathname
+             (lambda (p) (pathname-replace-directory p outdir))
+             indexify))
+
+;;; Content transformations
+
+(define (split-paragraphs str) #| string -> (list string ...)
+  Split STR into paragraphs.
+  A paragraph is a contiguous series of text lines separated from other
+  paragraphs by at least 2 newline \n characters.  This procedure collapses
+  inter-paragraph space. |#
+  (let loop ((lines (string-split str "\n"))
+             (par '())
+             (acc '()))
+    (cond
+     ((and (null? lines)                ; base case: no more lines
+           (null? par))                 ; ... or pending paragraph
+      (reverse acc))
+     ((null? lines)                     ; add the final paragraph
+      (loop '() '() (cons (apply string-append (reverse par)) acc)))
+     ((equal? (car lines) "")           ; paragraph break
+      (loop (cdr lines)
+            '()
+            (cons (apply string-append (reverse par)) acc)))
+     (else                              ; line break
+      (loop (cdr lines)
+            (cons (string-append (car lines) "\n") par)
+            acc)))))
+
+(define (wrap-paragraphs str) #| string -> string
+  Wrap naked paragraphs of STR in <p> tags.
+  A 'naked' paragraph is one that doesn't begin with '<' (after optional
+  beginning whitespace). |#
+  (let loop ((pars (map string-trim (split-paragraphs str)))
+             (acc '()))
+    (cond
+     ((null? pars)
+      (apply string-append (reverse acc)))
+     ((zero? (string-length (car pars)))
+      (loop (cdr pars)
+            acc))
+     ((eq? #\< (string-ref (car pars) 0))
+      (loop (cdr pars)
+            (cons (car pars)
+                  acc)))
+     (else
+      (loop (cdr pars)
+            (cons (string-append "<p>" (car pars) "</p>\n")
+                  acc))))))
+
+(define (expand-string str) #| string -> string
+  Expand STR by passing it in a port to CHICKEN's #<# string interpolation.
+  Yes, this is as cursed as it sounds.
+
+  To make it slightly less so, all # are duplicated to escape them, except for
+  those before ( and {.  To escape /those/, double them. |#
+  (let* ((escaped (irregex-replace/all
+                   '(or (: #\# #\# (look-ahead (or #\{ #\()))
+                        (: #\# (look-ahead (~ #\{ #\()))
+                        (: #\# eos))
+                   str
+                   "##"))
+         (delim (let loop ((attempt (number->string (pseudo-random-real))))
+                  (if (irregex-search attempt str)
+                      (loop (number->string (pseudo-random-real)))
+                      attempt)))
+         (template (make-concatenated-port
+                    (open-input-string (string-append "#<#" delim "\n"))
+                    (open-input-string escaped)
+                    (open-input-string (string-append "\n" delim "\n"))))
+         (expanded (let ((x (open-output-string)))
+                     (display (eval (read template)
+                                    (interaction-environment))
+                              x)
+                     (get-output-string x))))
+    (irregex-replace/all '(: "#<unspecified>"
+                             (* whitespace))
+                         expanded
+                         "")))
+
+(define (transform-content content) #| string -> string
+  Transform CONTENT according to boudin's needs.
+  This is the raw html, and will still need to be processed to extract metadata
+  and to be further wrapped in a template. |#
+  (transform content
+             expand-string
+             wrap-paragraphs))
+
+;;; Pages
+;; A <page> is a record type that wraps the two transformations outlined above.
+;; It also includes the extracted metadata from the page for processing.
+
+(define-record-type <page>
+  (make-page url meta source dest source-path dest-path)
+  page?
+  (url page-url (setter page-url))
+  (meta page-meta (setter page-meta))
+  (source page-source)
+  (dest page-dest (setter page-dest))
+  (source-path page-source-path)
+  (dest-path page-dest-path (setter page-dest-path)))
+
+(define (%read-port port)
+  (let ((chunk-size 512))
+    (let loop ((next (read-string chunk-size port))
+               (blank? #f)
+               (acc '()))
+      (cond
+       ((or (eof-object? next)
+            (and blank? (equal? next "")))
+        (close-input-port port)
+        (apply string-append (reverse acc)))
+       ((equal? next "")
+        (loop (read-string chunk-size port)
+              #t
+              (cons next acc)))
+       (else
+        (loop (read-string chunk-size port)
+              blank?
+              (cons next acc)))))))
+
+(define read-port
+  (case-lambda
+    (() (%read-port (current-input-port)))
+    ((p) (%read-port p))))
+
+(define (file->page file) #| string -> <page>
+  Convert FILE to an sxml tree after transforming it.
+  This procedure returns both the sxml of the transformed content, but that
+  page's metadata, too. |#
+  (let* ((source (with-input-from-file file read-port))
+         (dest (html->sxml (transform-content source))))
+    (make-page (pathname-directory (transform-path file (site-base-url)))
+               (extract-meta dest)
+               source
+               dest
+               file
+               (transform-path file (output-directory)))))
+
+(define (extract-meta tree) #| sxml -> alist
+  Extract metadata from TREE's comments.
+  Returns an alist of (key . value) pairs where keys and values are strings. |#
+  (let loop ((tree tree)
+             (acc '()))
+    (cond
+     ((or (atom? tree)
+          (null? tree))
+      (reverse acc))
+     ((and (list? (car tree))
+           (eq? (caar tree) '*COMMENT*))
+      (loop (cdr tree)
+            (let* ((comment (string-trim-both (cadar tree)))
+                   (lines (string-split comment "\n")))
+              (map (lambda (l)
+                     (let ((kv (string-split l ":")))
+                       (cons (string-trim-both (car kv))
+                             (string-trim
+                              (string-intersperse (cdr kv) ":")))))
+                   lines))))
+     ((list? (car tree))
+      (loop (cdr tree)
+            (let ((subtree (loop (car tree) '())))
+              (if (null? subtree)
+                  acc
+                  (cons subtree acc)))))
+     (else (loop (cdr tree) acc)))))
+
+(define (meta-ref meta key default) #| alist string string -> <?>
+  Get KEY's value from META, or DEFAULT if it doesn't exist.
+  DEFAULT is required because I think it's a good idea to require it. |#
+  (let ((x (assoc key meta)))
+    (if x (cdr x) default)))
+
+(define (page-meta-ref page key default) #| <page> string string -> <?>
+  Get KEY's value from PAGE's meta, or DEFAULT.
+  |#
+  (let ((meta (page-meta page)))
+    (meta-ref meta key default)))
+
+;;; Time
+;; Time really only matters in feeds ... but it really does matter.  So I need a
+;; few helper functions.
+
+(define publish-time      ; this is a parameter so it's consistent across a run.
+  (make-parameter
+   (time->string (seconds->utc-time) "%FT%TZ")))
+
+(define (page-mtime page) #| <page> -> time-string
+  Grab the mtime field from PAGE's source file. |#
+  (let ((file (page-source-path page)))
+    (and file
+         (file-exists? file)
+         (time->string (seconds->utc-time
+                        (file-modification-time
+                         file))))))
+
+(define (page-guess-updated page) #| <page> -> time-string
+  Guess the "updated" property of PAGE. |#
+  (let ((meta-date (page-meta-ref page "date" #f)))
+    (if meta-date
+        ;; Attempt to parse the date metadata field.
+        (time->string (seconds->utc-time ; This double-conversion is /great/
+                       (local-time->seconds
+                        (or (string->time meta-date "%Y-%m-%d")
+                            (string->time meta-date "%Y-%m-%d%n%H:%M")
+                            (string->time meta-date "%Y-%m-%d%n%I:%M%n%p")
+                            ;; ... more ?
+                            (or (page-mtime page)
+                                (publish-time))))))
+        (or (page-mtime page)
+            (publish-time)))))
+
+;;; Templating
+;; Templating uses sxml to define a layout for pages and indeces (index.html,
+;; feed.xml).  Sxml's "stylesheets" can be used to extract metadata out of html
+;; comments and to further process the document.
+
+;; Each template has a default, but the user can override by defining templates
+;; in .config.scm (see below).  All templates are function parameters that take
+;; a page's sxml tree (argument PAGE) and return a string.
+
+(define page-template
+  (make-parameter
+   (lambda (page)
+     (sxml->html
+      `(html (@ (lang "en"))
+             (head (title ,(page-meta-ref page "title" "[untitled]")))
+             (body ,(let ((title (page-meta-ref page "title" #f)))
+                      (if title `(h1 ,title) ""))
+                   ,@(cdr (page-dest page))))))))
+
+(define index-template
+  (make-parameter
+   (lambda pages
+     (sxml->html
+      `(html (@ (lang "en"))
+             (head (title ,(site-name)))
+             (body (h1 ,(site-name))
+                   (ul
+                    ,@(map (lambda (pg)
+                             `(li (a (@ (href ,(page-url pg)))
+                                     ,(page-meta-ref pg
+                                                     "title"
+                                                     (pathname-file
+                                                      (page-source-path pg))))))
+                           pages))))))))
+
+(define feed-template
+  (make-parameter
+   (lambda pages
+     (with-output-to-string
+       (lambda ()
+         (write-atom-doc
+          (make-atom-doc
+           (make-feed
+            title: (make-title (site-name))
+            id: (site-base-url)
+            updated: (publish-time)       ; I don't like these semantics ..
+            authors: (list (make-author name: (site-author)
+                                        uri: (site-base-url)))
+            links: (list (make-link type: 'html
+                                    uri-language: "en"
+                                    uri: (site-base-url))
+                         (make-link relation: "self"
+                                    type: "application/atom+xml"
+                                    uri: (make-pathname
+                                          (site-base-url) "feed" "xml")))
+            rights: (make-rights (site-rights))
+            generator: (make-generator "Boudin"
+                                       uri: "https://git.acdw.net/boudin"
+                                       version: "0.1.0")
+            entries: (map (lambda (pg)
+                            (make-entry
+                             title: (make-title
+                                     (page-meta-ref pg "title" "[untitled]"))
+                             links: (list (make-link type: 'html
+                                                     uri: (page-url pg)))
+                             id: (page-url pg)
+                             updated: (page-guess-updated pg)
+                             ;;published:
+                             content:
+                             `(atom:content (@ (type "html"))
+                                            ,(cdr (page-dest pg)))))
+                          pages)))))))))
+
+;;; Collecting pages from a directory
+
+(define (collect-pages dir ext)
+  (map file->page
+       (glob (make-pathname dir "*" ext))))
+
+;;; Publishing
+
+(define (apply-template template pages)
+  (apply template (if (list? pages)
+                      pages
+                      (list pages))))
+
+;;; Configuration
+
+(define output-directory
+  (make-parameter "out/"))
+
+(define site-name
+  (make-parameter "[A boudin web site]"))
+
+(define site-base-url
+  (make-parameter "https://example.com/"))
+
+(define site-author
+  (make-parameter "nobody"))
+
+(define site-rights
+  (make-parameter (string-append "(C) " (site-author))))
+
+;;; Options & Operands (SRFI 37)
+
+(define opt/help
+  (option '(#\h "help")                 ; Names
+          #f                            ; Required arg?
+          #f                            ; Optional arg?
+          (lambda _                          ; Option proc (opt name arg seeds ...)
+            (with-output-to-port (current-error-port)
+              (lambda ()
+                (print "Usage: boudin [OPTIONS]\n"
+                       "Options:\n"
+                       "-h, --help     show this help and exit\n"
+                       "-C dir, --directory dir\n"
+                       "     build site in DIR instead of current directory"
+                       )))
+            (exit))))
+
+(define opt/directory
+  (option '(#\C "directory") #t #f
+          (lambda (opt name arg seeds)
+            (if (directory-exists? arg)
+                (change-directory arg)
+                (error "Directory doesn't exist" arg))
+            seeds)))
+
+(define (process-args args)
+  (args-fold args
+             ;; Options
+             (list opt/help
+                   opt/directory)
+             ;; Unrecognized option proc (option name arg seeds ...)
+             (lambda (_ name _ _)
+               (error "Unrecognized option" name))
+             ;; Operand proc (operand seeds ...)
+             (lambda (name _)
+               (error "Bad operand" name))
+             ;; Seeds
+             '()))
+
+;;; Main entry point
+
+(define (main args)
+  (process-args args)
+
+  #f)
+
+(cond-expand
+  ((or chicken-script compiling)
+   (main (command-line-arguments)))
+  (else))
-- 
cgit 1.4.1-21-gabe81