From 3d10340f69651608362bacbe91cc10a90bdf4de3 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Wed, 1 Sep 2021 08:37:17 -0500 Subject: Add titlecase --- init.el | 12 +++-- lisp/titlecase.el | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 lisp/titlecase.el diff --git a/init.el b/init.el index e3fee58..0eb32db 100644 --- a/init.el +++ b/init.el @@ -976,11 +976,16 @@ like a dumbass." (setq read-extended-command-predicate #'command-completion-default-include-p)) + (defvar case-map (make-sparse-keymap) + "A keymap for setting case in various ways.") + (global-set-key (kbd "C-c c") case-map) + (:global "M-=" count-words "C-w" kill-region-or-backward-word - "C-c c" capitalize-dwim - "C-c u" upcase-dwim - "C-c l" downcase-dwim + "C-c c c" capitalize-dwim + "C-c c t" titlecase-dwim + "C-c c u" upcase-dwim + "C-c c l" downcase-dwim "C-c d" acdw/insert-iso-date "M-`" nil) @@ -999,6 +1004,7 @@ like a dumbass." (defalias 'backward-word-with-case 'backward-word "Alias for `backward-word for use in `case-repeat-map'.") + ;; XXX: this isn't repeating correctly ... (defvar case-repeat-map (let ((map (make-sparse-keymap))) (define-key map "c" #'capitalize-word) diff --git a/lisp/titlecase.el b/lisp/titlecase.el new file mode 100644 index 0000000..64da5b4 --- /dev/null +++ b/lisp/titlecase.el @@ -0,0 +1,157 @@ +;;; titlecase.el -*- lexical-binding: t; -*- + +;; https://hungyi.net/posts/programmers-way-to-title-case/ + +(require 'cl-lib) +(require 'subr-x) + +;;;###autoload +(defun titlecase-string (str) + "Convert string STR to title case and return the resulting string." + (let* ((case-fold-search nil) + (str-length (length str)) + ;; A list of markers that indicate start of a new phrase within the + ;; title, e.g. "The Lonely Reindeer: A Christmas Story" + ;; must be followed by one of word-boundary-chars + (new-phrase-chars '(?: ?. ?? ?\; ?\n ?\r)) + ;; immediately triggers new phrase behavior without waiting for word + ;; boundary + (immediate-new-phrase-chars '(?\n ?\r)) + ;; A list of characters that indicate "word boundaries"; used to split + ;; the title into processable segments + (word-boundary-chars (append '(? ?– ?— ?- ?‑ ?/) + immediate-new-phrase-chars)) + ;; A list of small words that should not be capitalized (in the right + ;; conditions) + (small-words '("a" "an" "and" "as" "at" "but" "by" "en" "for" "if" + "in" "of" "on" "or" "the" "to" "v" "v." "vs" "vs." + "via")) + ;; Fix if str is ALL CAPS + (str (if (string-match-p "[a-z]" str) str (downcase str))) + ;; Reduce over a state machine to do title casing + (final-state + (cl-reduce + (lambda (state char) + (let* ((result (aref state 0)) + (last-segment (aref state 1)) + (first-word-p (aref state 2)) + (was-in-path-p (aref state 3)) + (last-char (car last-segment)) + (in-path-p (or (and (eq char ?/) + (or (not last-segment) + (member last-char '(?. ?~)))) + (and was-in-path-p + (not + (or (eq char ? ) + (member + char + immediate-new-phrase-chars)))))) + (end-p + ;; are we at the end of the input string? + (eq (+ (length result) (length last-segment) 1) + str-length)) + (pop-p + ;; do we need to pop a segment onto the output result? + (or end-p (and (not in-path-p) + (member char word-boundary-chars)))) + (segment + ;; add the current char to the current segment + (cons char last-segment)) + (segment-string + ;; the readable version of the segment + (apply #'string (reverse segment))) + (small-word-p + ;; was the last segment a small word? + (member (downcase (substring segment-string 0 -1)) + small-words)) + (capitalize-p + ;; do we need to capitalized this segment or lowercase it? + (or end-p first-word-p (not small-word-p))) + (ignore-segment-p + ;; ignore explicitly capitalized segments + (or (string-match-p "[a-zA-Z].*[A-Z]" segment-string) + ;; ignore URLs + (string-match-p "^https?:" segment-string) + ;; ignore hostnames and namespaces.like.this + (string-match-p "\\w\\.\\w" segment-string) + ;; ignore windows filesystem paths + (string-match-p "^[A-Za-z]:\\\\" segment-string) + ;; ignore unix filesystem paths + was-in-path-p + ;; ignore email addresses and user handles with @ symbol + (member ?@ segment))) + (next-result + (if pop-p + (concat result + (if ignore-segment-p + ;; pop segment onto the result without + ;; processing + segment-string + ;; titlecase the segment before popping onto + ;; result + (titlecase--segment + segment-string capitalize-p))) + result)) + (next-segment + (unless pop-p segment)) + (will-be-first-word-p + (if pop-p + (or (not last-segment) + (member last-char new-phrase-chars) + (member char immediate-new-phrase-chars)) + first-word-p))) + (vector + next-result next-segment will-be-first-word-p in-path-p))) + str + :initial-value + (vector nil ; result stack + nil ; current working segment + t ; is it the first word of a phrase? + nil)))) ; are we inside of a filesystem path? + (aref final-state 0))) + +(defun titlecase--segment (segment capitalize-p) + "Convert a title's inner SEGMENT to capitalized or lower case +depending on CAPITALIZE-P, then return the result." + (let* ((case-fold-search nil) + (ignore-chars '(?' ?\" ?\( ?\[ ?‘ ?“ ?’ ?” ?_)) + (final-state + (cl-reduce + (lambda (state char) + (let ((result (aref state 0)) + (downcase-p (aref state 1))) + (cond + (downcase-p + ;; already upcased start of segment, so lowercase the rest + (vector (cons (downcase char) result) t)) + ((member char ignore-chars) + ;; check if start char of segment needs to be ignored + (vector (cons char result) downcase-p)) + (t + ;; haven't upcased yet, and we can, so do it + (vector (cons (upcase char) result) t))))) + segment + :initial-value (vector nil (not capitalize-p))))) + (thread-last (aref final-state 0) + (reverse) + (apply #'string)))) + +;;;###autoload +(defun titlecase-region (begin end) + "Convert text in region from BEGIN to END to title case." + (interactive "*r") + (let ((pt (point))) + (insert (titlecase-string (delete-and-extract-region begin end))) + (goto-char pt))) + +;;;###autoload +(defun titlecase-dwim () + "Convert the region or current line to title case. +If Transient Mark Mode is on and there is an active region, convert +the region to title case. Otherwise, work on the current line." + (interactive) + (if (and transient-mark-mode mark-active) + (titlecase-region (region-beginning) (region-end)) + (titlecase-region (point-at-bol) (point-at-eol)))) + +(provide 'titlecase) -- cgit 1.4.1-21-gabe81