From f1cf20ac8a05a8571deca7fcd1a5118f3fcd77fb Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Fri, 7 Jul 2023 23:59:04 -0500 Subject: Initial commit --- boudin.scm | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.html | 19 +++ 2 files changed, 433 insertions(+) create mode 100755 boudin.scm create mode 100644 test.html diff --git a/boudin.scm b/boudin.scm new file mode 100755 index 0000000..737cbf1 --- /dev/null +++ b/boudin.scm @@ -0,0 +1,414 @@ +#!/bin/sh +#| -*- scheme -*- +exec csi -R r7rs -s "$0" "$@" +boudin --- a little static site generator +|# + +(import (chicken file) + (chicken file posix) + (chicken irregex) + (chicken pathname) + (chicken port) + (chicken process-context) + (chicken random) + (chicken string) + (chicken time posix) + (chicanery) + (atom) + (html-parser) + (srfi 37) + (srfi 152) + (sxpath)) + +;;; Transformations +;; A static site generator can be thought of largely as two sets of +;; transformations: one transforming given input content to output content, and +;; another transforming source paths to destination paths. Since both, for my +;; purposes, are strings, I have a generic function that can perform both +;; transformations. + +(define (transform str . procs) #| string (string ->string) ... -> string + Apply PROCS to STR, left-to-right, and return the result. + Each PROC will be called with its predecessor's output, and should take a + string as input and return a string. |# + (if (null? procs) + str + (apply transform ((car procs) str) (cdr procs)))) + +;;; Path transformations + +(define (indexify path) #| path -> path + Replace the PATH's extension with "/index.html". + |# + (make-pathname (pathname-strip-extension path) + "index" + "html")) + +(define (transform-path path outdir) #| path => path + Transform PATH according to boudin's needs. |# + (transform path + normalize-pathname + (lambda (p) (pathname-replace-directory p outdir)) + indexify)) + +;;; Content transformations + +(define (split-paragraphs str) #| string -> (list string ...) + Split STR into paragraphs. + A paragraph is a contiguous series of text lines separated from other + paragraphs by at least 2 newline \n characters. This procedure collapses + inter-paragraph space. |# + (let loop ((lines (string-split str "\n")) + (par '()) + (acc '())) + (cond + ((and (null? lines) ; base case: no more lines + (null? par)) ; ... or pending paragraph + (reverse acc)) + ((null? lines) ; add the final paragraph + (loop '() '() (cons (apply string-append (reverse par)) acc))) + ((equal? (car lines) "") ; paragraph break + (loop (cdr lines) + '() + (cons (apply string-append (reverse par)) acc))) + (else ; line break + (loop (cdr lines) + (cons (string-append (car lines) "\n") par) + acc))))) + +(define (wrap-paragraphs str) #| string -> string + Wrap naked paragraphs of STR in

tags. + A 'naked' paragraph is one that doesn't begin with '<' (after optional + beginning whitespace). |# + (let loop ((pars (map string-trim (split-paragraphs str))) + (acc '())) + (cond + ((null? pars) + (apply string-append (reverse acc))) + ((zero? (string-length (car pars))) + (loop (cdr pars) + acc)) + ((eq? #\< (string-ref (car pars) 0)) + (loop (cdr pars) + (cons (car pars) + acc))) + (else + (loop (cdr pars) + (cons (string-append "

" (car pars) "

\n") + acc)))))) + +(define (expand-string str) #| string -> string + Expand STR by passing it in a port to CHICKEN's #<# string interpolation. + Yes, this is as cursed as it sounds. + + To make it slightly less so, all # are duplicated to escape them, except for + those before ( and {. To escape /those/, double them. |# + (let* ((escaped (irregex-replace/all + '(or (: #\# #\# (look-ahead (or #\{ #\())) + (: #\# (look-ahead (~ #\{ #\())) + (: #\# eos)) + str + "##")) + (delim (let loop ((attempt (number->string (pseudo-random-real)))) + (if (irregex-search attempt str) + (loop (number->string (pseudo-random-real))) + attempt))) + (template (make-concatenated-port + (open-input-string (string-append "#<#" delim "\n")) + (open-input-string escaped) + (open-input-string (string-append "\n" delim "\n")))) + (expanded (let ((x (open-output-string))) + (display (eval (read template) + (interaction-environment)) + x) + (get-output-string x)))) + (irregex-replace/all '(: "#" + (* whitespace)) + expanded + ""))) + +(define (transform-content content) #| string -> string + Transform CONTENT according to boudin's needs. + This is the raw html, and will still need to be processed to extract metadata + and to be further wrapped in a template. |# + (transform content + expand-string + wrap-paragraphs)) + +;;; Pages +;; A is a record type that wraps the two transformations outlined above. +;; It also includes the extracted metadata from the page for processing. + +(define-record-type + (make-page url meta source dest source-path dest-path) + page? + (url page-url (setter page-url)) + (meta page-meta (setter page-meta)) + (source page-source) + (dest page-dest (setter page-dest)) + (source-path page-source-path) + (dest-path page-dest-path (setter page-dest-path))) + +(define (%read-port port) + (let ((chunk-size 512)) + (let loop ((next (read-string chunk-size port)) + (blank? #f) + (acc '())) + (cond + ((or (eof-object? next) + (and blank? (equal? next ""))) + (close-input-port port) + (apply string-append (reverse acc))) + ((equal? next "") + (loop (read-string chunk-size port) + #t + (cons next acc))) + (else + (loop (read-string chunk-size port) + blank? + (cons next acc))))))) + +(define read-port + (case-lambda + (() (%read-port (current-input-port))) + ((p) (%read-port p)))) + +(define (file->page file) #| string -> + Convert FILE to an sxml tree after transforming it. + This procedure returns both the sxml of the transformed content, but that + page's metadata, too. |# + (let* ((source (with-input-from-file file read-port)) + (dest (html->sxml (transform-content source)))) + (make-page (pathname-directory (transform-path file (site-base-url))) + (extract-meta dest) + source + dest + file + (transform-path file (output-directory))))) + +(define (extract-meta tree) #| sxml -> alist + Extract metadata from TREE's comments. + Returns an alist of (key . value) pairs where keys and values are strings. |# + (let loop ((tree tree) + (acc '())) + (cond + ((or (atom? tree) + (null? tree)) + (reverse acc)) + ((and (list? (car tree)) + (eq? (caar tree) '*COMMENT*)) + (loop (cdr tree) + (let* ((comment (string-trim-both (cadar tree))) + (lines (string-split comment "\n"))) + (map (lambda (l) + (let ((kv (string-split l ":"))) + (cons (string-trim-both (car kv)) + (string-trim + (string-intersperse (cdr kv) ":"))))) + lines)))) + ((list? (car tree)) + (loop (cdr tree) + (let ((subtree (loop (car tree) '()))) + (if (null? subtree) + acc + (cons subtree acc))))) + (else (loop (cdr tree) acc))))) + +(define (meta-ref meta key default) #| alist string string -> + Get KEY's value from META, or DEFAULT if it doesn't exist. + DEFAULT is required because I think it's a good idea to require it. |# + (let ((x (assoc key meta))) + (if x (cdr x) default))) + +(define (page-meta-ref page key default) #| string string -> + Get KEY's value from PAGE's meta, or DEFAULT. + |# + (let ((meta (page-meta page))) + (meta-ref meta key default))) + +;;; Time +;; Time really only matters in feeds ... but it really does matter. So I need a +;; few helper functions. + +(define publish-time ; this is a parameter so it's consistent across a run. + (make-parameter + (time->string (seconds->utc-time) "%FT%TZ"))) + +(define (page-mtime page) #| -> time-string + Grab the mtime field from PAGE's source file. |# + (let ((file (page-source-path page))) + (and file + (file-exists? file) + (time->string (seconds->utc-time + (file-modification-time + file)))))) + +(define (page-guess-updated page) #| -> time-string + Guess the "updated" property of PAGE. |# + (let ((meta-date (page-meta-ref page "date" #f))) + (if meta-date + ;; Attempt to parse the date metadata field. + (time->string (seconds->utc-time ; This double-conversion is /great/ + (local-time->seconds + (or (string->time meta-date "%Y-%m-%d") + (string->time meta-date "%Y-%m-%d%n%H:%M") + (string->time meta-date "%Y-%m-%d%n%I:%M%n%p") + ;; ... more ? + (or (page-mtime page) + (publish-time)))))) + (or (page-mtime page) + (publish-time))))) + +;;; Templating +;; Templating uses sxml to define a layout for pages and indeces (index.html, +;; feed.xml). Sxml's "stylesheets" can be used to extract metadata out of html +;; comments and to further process the document. + +;; Each template has a default, but the user can override by defining templates +;; in .config.scm (see below). All templates are function parameters that take +;; a page's sxml tree (argument PAGE) and return a string. + +(define page-template + (make-parameter + (lambda (page) + (sxml->html + `(html (@ (lang "en")) + (head (title ,(page-meta-ref page "title" "[untitled]"))) + (body ,(let ((title (page-meta-ref page "title" #f))) + (if title `(h1 ,title) "")) + ,@(cdr (page-dest page)))))))) + +(define index-template + (make-parameter + (lambda pages + (sxml->html + `(html (@ (lang "en")) + (head (title ,(site-name))) + (body (h1 ,(site-name)) + (ul + ,@(map (lambda (pg) + `(li (a (@ (href ,(page-url pg))) + ,(page-meta-ref pg + "title" + (pathname-file + (page-source-path pg)))))) + pages)))))))) + +(define feed-template + (make-parameter + (lambda pages + (with-output-to-string + (lambda () + (write-atom-doc + (make-atom-doc + (make-feed + title: (make-title (site-name)) + id: (site-base-url) + updated: (publish-time) ; I don't like these semantics .. + authors: (list (make-author name: (site-author) + uri: (site-base-url))) + links: (list (make-link type: 'html + uri-language: "en" + uri: (site-base-url)) + (make-link relation: "self" + type: "application/atom+xml" + uri: (make-pathname + (site-base-url) "feed" "xml"))) + rights: (make-rights (site-rights)) + generator: (make-generator "Boudin" + uri: "https://git.acdw.net/boudin" + version: "0.1.0") + entries: (map (lambda (pg) + (make-entry + title: (make-title + (page-meta-ref pg "title" "[untitled]")) + links: (list (make-link type: 'html + uri: (page-url pg))) + id: (page-url pg) + updated: (page-guess-updated pg) + ;;published: + content: + `(atom:content (@ (type "html")) + ,(cdr (page-dest pg))))) + pages))))))))) + +;;; Collecting pages from a directory + +(define (collect-pages dir ext) + (map file->page + (glob (make-pathname dir "*" ext)))) + +;;; Publishing + +(define (apply-template template pages) + (apply template (if (list? pages) + pages + (list pages)))) + +;;; Configuration + +(define output-directory + (make-parameter "out/")) + +(define site-name + (make-parameter "[A boudin web site]")) + +(define site-base-url + (make-parameter "https://example.com/")) + +(define site-author + (make-parameter "nobody")) + +(define site-rights + (make-parameter (string-append "(C) " (site-author)))) + +;;; Options & Operands (SRFI 37) + +(define opt/help + (option '(#\h "help") ; Names + #f ; Required arg? + #f ; Optional arg? + (lambda _ ; Option proc (opt name arg seeds ...) + (with-output-to-port (current-error-port) + (lambda () + (print "Usage: boudin [OPTIONS]\n" + "Options:\n" + "-h, --help show this help and exit\n" + "-C dir, --directory dir\n" + " build site in DIR instead of current directory" + ))) + (exit)))) + +(define opt/directory + (option '(#\C "directory") #t #f + (lambda (opt name arg seeds) + (if (directory-exists? arg) + (change-directory arg) + (error "Directory doesn't exist" arg)) + seeds))) + +(define (process-args args) + (args-fold args + ;; Options + (list opt/help + opt/directory) + ;; Unrecognized option proc (option name arg seeds ...) + (lambda (_ name _ _) + (error "Unrecognized option" name)) + ;; Operand proc (operand seeds ...) + (lambda (name _) + (error "Bad operand" name)) + ;; Seeds + '())) + +;;; Main entry point + +(define (main args) + (process-args args) + + #f) + +(cond-expand + ((or chicken-script compiling) + (main (command-line-arguments))) + (else)) diff --git a/test.html b/test.html new file mode 100644 index 0000000..6348f0a --- /dev/null +++ b/test.html @@ -0,0 +1,19 @@ + + +

some html

+

(without p tags)

+ +Here is a test paragraph. example link. + +Here's another. I wonder if it'll just do the thing .. or whatever. Maybe I +should try to make it multiple lines, as well. + +
    +
  • + one plus two is #(+ 1 2). +
  • +
  • two
  • +
-- cgit 1.4.1-21-gabe81