summary refs log tree commit diff stats
path: root/lisp
diff options
context:
space:
mode:
authorCase Duckworth2021-09-01 08:37:17 -0500
committerCase Duckworth2021-09-01 08:37:17 -0500
commit3d10340f69651608362bacbe91cc10a90bdf4de3 (patch)
tree622d3d6d2d827bacadf0bb74c1d498e8050e7b16 /lisp
parentRename safe-define to safely (diff)
downloademacs-3d10340f69651608362bacbe91cc10a90bdf4de3.tar.gz
emacs-3d10340f69651608362bacbe91cc10a90bdf4de3.zip
Add titlecase
Diffstat (limited to 'lisp')
-rw-r--r--lisp/titlecase.el157
1 files changed, 157 insertions, 0 deletions
diff --git a/lisp/titlecase.el b/lisp/titlecase.el new file mode 100644 index 0000000..64da5b4 --- /dev/null +++ b/lisp/titlecase.el
@@ -0,0 +1,157 @@
1;;; titlecase.el -*- lexical-binding: t; -*-
2
3;; https://hungyi.net/posts/programmers-way-to-title-case/
4
5(require 'cl-lib)
6(require 'subr-x)
7
8;;;###autoload
9(defun titlecase-string (str)
10 "Convert string STR to title case and return the resulting string."
11 (let* ((case-fold-search nil)
12 (str-length (length str))
13 ;; A list of markers that indicate start of a new phrase within the
14 ;; title, e.g. "The Lonely Reindeer: A Christmas Story"
15 ;; must be followed by one of word-boundary-chars
16 (new-phrase-chars '(?: ?. ?? ?\; ?\n ?\r))
17 ;; immediately triggers new phrase behavior without waiting for word
18 ;; boundary
19 (immediate-new-phrase-chars '(?\n ?\r))
20 ;; A list of characters that indicate "word boundaries"; used to split
21 ;; the title into processable segments
22 (word-boundary-chars (append '(? ?– ?— ?- ?‑ ?/)
23 immediate-new-phrase-chars))
24 ;; A list of small words that should not be capitalized (in the right
25 ;; conditions)
26 (small-words '("a" "an" "and" "as" "at" "but" "by" "en" "for" "if"
27 "in" "of" "on" "or" "the" "to" "v" "v." "vs" "vs."
28 "via"))
29 ;; Fix if str is ALL CAPS
30 (str (if (string-match-p "[a-z]" str) str (downcase str)))
31 ;; Reduce over a state machine to do title casing
32 (final-state
33 (cl-reduce
34 (lambda (state char)
35 (let* ((result (aref state 0))
36 (last-segment (aref state 1))
37 (first-word-p (aref state 2))
38 (was-in-path-p (aref state 3))
39 (last-char (car last-segment))
40 (in-path-p (or (and (eq char ?/)
41 (or (not last-segment)
42 (member last-char '(?. ?~))))
43 (and was-in-path-p
44 (not
45 (or (eq char ? )
46 (member
47 char
48 immediate-new-phrase-chars))))))
49 (end-p
50 ;; are we at the end of the input string?
51 (eq (+ (length result) (length last-segment) 1)
52 str-length))
53 (pop-p
54 ;; do we need to pop a segment onto the output result?
55 (or end-p (and (not in-path-p)
56 (member char word-boundary-chars))))
57 (segment
58 ;; add the current char to the current segment
59 (cons char last-segment))
60 (segment-string
61 ;; the readable version of the segment
62 (apply #'string (reverse segment)))
63 (small-word-p
64 ;; was the last segment a small word?
65 (member (downcase (substring segment-string 0 -1))
66 small-words))
67 (capitalize-p
68 ;; do we need to capitalized this segment or lowercase it?
69 (or end-p first-word-p (not small-word-p)))
70 (ignore-segment-p
71 ;; ignore explicitly capitalized segments
72 (or (string-match-p "[a-zA-Z].*[A-Z]" segment-string)
73 ;; ignore URLs
74 (string-match-p "^https?:" segment-string)
75 ;; ignore hostnames and namespaces.like.this
76 (string-match-p "\\w\\.\\w" segment-string)
77 ;; ignore windows filesystem paths
78 (string-match-p "^[A-Za-z]:\\\\" segment-string)
79 ;; ignore unix filesystem paths
80 was-in-path-p
81 ;; ignore email addresses and user handles with @ symbol
82 (member ?@ segment)))
83 (next-result
84 (if pop-p
85 (concat result
86 (if ignore-segment-p
87 ;; pop segment onto the result without
88 ;; processing
89 segment-string
90 ;; titlecase the segment before popping onto
91 ;; result
92 (titlecase--segment
93 segment-string capitalize-p)))
94 result))
95 (next-segment
96 (unless pop-p segment))
97 (will-be-first-word-p
98 (if pop-p
99 (or (not last-segment)
100 (member last-char new-phrase-chars)
101 (member char immediate-new-phrase-chars))
102 first-word-p)))
103 (vector
104 next-result next-segment will-be-first-word-p in-path-p)))
105 str
106 :initial-value
107 (vector nil ; result stack
108 nil ; current working segment
109 t ; is it the first word of a phrase?
110 nil)))) ; are we inside of a filesystem path?
111 (aref final-state 0)))
112
113(defun titlecase--segment (segment capitalize-p)
114 "Convert a title's inner SEGMENT to capitalized or lower case
115depending on CAPITALIZE-P, then return the result."
116 (let* ((case-fold-search nil)
117 (ignore-chars '(?' ?\" ?\( ?\[ ?‘ ?“ ?’ ?” ?_))
118 (final-state
119 (cl-reduce
120 (lambda (state char)
121 (let ((result (aref state 0))
122 (downcase-p (aref state 1)))
123 (cond
124 (downcase-p
125 ;; already upcased start of segment, so lowercase the rest
126 (vector (cons (downcase char) result) t))
127 ((member char ignore-chars)
128 ;; check if start char of segment needs to be ignored
129 (vector (cons char result) downcase-p))
130 (t
131 ;; haven't upcased yet, and we can, so do it
132 (vector (cons (upcase char) result) t)))))
133 segment
134 :initial-value (vector nil (not capitalize-p)))))
135 (thread-last (aref final-state 0)
136 (reverse)
137 (apply #'string))))
138
139;;;###autoload
140(defun titlecase-region (begin end)
141 "Convert text in region from BEGIN to END to title case."
142 (interactive "*r")
143 (let ((pt (point)))
144 (insert (titlecase-string (delete-and-extract-region begin end)))
145 (goto-char pt)))
146
147;;;###autoload
148(defun titlecase-dwim ()
149 "Convert the region or current line to title case.
150If Transient Mark Mode is on and there is an active region, convert
151the region to title case. Otherwise, work on the current line."
152 (interactive)
153 (if (and transient-mark-mode mark-active)
154 (titlecase-region (region-beginning) (region-end))
155 (titlecase-region (point-at-bol) (point-at-eol))))
156
157(provide 'titlecase)