diff options
Diffstat (limited to 'lisp')
-rw-r--r-- | lisp/titlecase.el | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/lisp/titlecase.el b/lisp/titlecase.el new file mode 100644 index 0000000..64da5b4 --- /dev/null +++ b/lisp/titlecase.el | |||
@@ -0,0 +1,157 @@ | |||
1 | ;;; titlecase.el -*- lexical-binding: t; -*- | ||
2 | |||
3 | ;; https://hungyi.net/posts/programmers-way-to-title-case/ | ||
4 | |||
5 | (require 'cl-lib) | ||
6 | (require 'subr-x) | ||
7 | |||
8 | ;;;###autoload | ||
9 | (defun titlecase-string (str) | ||
10 | "Convert string STR to title case and return the resulting string." | ||
11 | (let* ((case-fold-search nil) | ||
12 | (str-length (length str)) | ||
13 | ;; A list of markers that indicate start of a new phrase within the | ||
14 | ;; title, e.g. "The Lonely Reindeer: A Christmas Story" | ||
15 | ;; must be followed by one of word-boundary-chars | ||
16 | (new-phrase-chars '(?: ?. ?? ?\; ?\n ?\r)) | ||
17 | ;; immediately triggers new phrase behavior without waiting for word | ||
18 | ;; boundary | ||
19 | (immediate-new-phrase-chars '(?\n ?\r)) | ||
20 | ;; A list of characters that indicate "word boundaries"; used to split | ||
21 | ;; the title into processable segments | ||
22 | (word-boundary-chars (append '(? ?– ?— ?- ?‑ ?/) | ||
23 | immediate-new-phrase-chars)) | ||
24 | ;; A list of small words that should not be capitalized (in the right | ||
25 | ;; conditions) | ||
26 | (small-words '("a" "an" "and" "as" "at" "but" "by" "en" "for" "if" | ||
27 | "in" "of" "on" "or" "the" "to" "v" "v." "vs" "vs." | ||
28 | "via")) | ||
29 | ;; Fix if str is ALL CAPS | ||
30 | (str (if (string-match-p "[a-z]" str) str (downcase str))) | ||
31 | ;; Reduce over a state machine to do title casing | ||
32 | (final-state | ||
33 | (cl-reduce | ||
34 | (lambda (state char) | ||
35 | (let* ((result (aref state 0)) | ||
36 | (last-segment (aref state 1)) | ||
37 | (first-word-p (aref state 2)) | ||
38 | (was-in-path-p (aref state 3)) | ||
39 | (last-char (car last-segment)) | ||
40 | (in-path-p (or (and (eq char ?/) | ||
41 | (or (not last-segment) | ||
42 | (member last-char '(?. ?~)))) | ||
43 | (and was-in-path-p | ||
44 | (not | ||
45 | (or (eq char ? ) | ||
46 | (member | ||
47 | char | ||
48 | immediate-new-phrase-chars)))))) | ||
49 | (end-p | ||
50 | ;; are we at the end of the input string? | ||
51 | (eq (+ (length result) (length last-segment) 1) | ||
52 | str-length)) | ||
53 | (pop-p | ||
54 | ;; do we need to pop a segment onto the output result? | ||
55 | (or end-p (and (not in-path-p) | ||
56 | (member char word-boundary-chars)))) | ||
57 | (segment | ||
58 | ;; add the current char to the current segment | ||
59 | (cons char last-segment)) | ||
60 | (segment-string | ||
61 | ;; the readable version of the segment | ||
62 | (apply #'string (reverse segment))) | ||
63 | (small-word-p | ||
64 | ;; was the last segment a small word? | ||
65 | (member (downcase (substring segment-string 0 -1)) | ||
66 | small-words)) | ||
67 | (capitalize-p | ||
68 | ;; do we need to capitalized this segment or lowercase it? | ||
69 | (or end-p first-word-p (not small-word-p))) | ||
70 | (ignore-segment-p | ||
71 | ;; ignore explicitly capitalized segments | ||
72 | (or (string-match-p "[a-zA-Z].*[A-Z]" segment-string) | ||
73 | ;; ignore URLs | ||
74 | (string-match-p "^https?:" segment-string) | ||
75 | ;; ignore hostnames and namespaces.like.this | ||
76 | (string-match-p "\\w\\.\\w" segment-string) | ||
77 | ;; ignore windows filesystem paths | ||
78 | (string-match-p "^[A-Za-z]:\\\\" segment-string) | ||
79 | ;; ignore unix filesystem paths | ||
80 | was-in-path-p | ||
81 | ;; ignore email addresses and user handles with @ symbol | ||
82 | (member ?@ segment))) | ||
83 | (next-result | ||
84 | (if pop-p | ||
85 | (concat result | ||
86 | (if ignore-segment-p | ||
87 | ;; pop segment onto the result without | ||
88 | ;; processing | ||
89 | segment-string | ||
90 | ;; titlecase the segment before popping onto | ||
91 | ;; result | ||
92 | (titlecase--segment | ||
93 | segment-string capitalize-p))) | ||
94 | result)) | ||
95 | (next-segment | ||
96 | (unless pop-p segment)) | ||
97 | (will-be-first-word-p | ||
98 | (if pop-p | ||
99 | (or (not last-segment) | ||
100 | (member last-char new-phrase-chars) | ||
101 | (member char immediate-new-phrase-chars)) | ||
102 | first-word-p))) | ||
103 | (vector | ||
104 | next-result next-segment will-be-first-word-p in-path-p))) | ||
105 | str | ||
106 | :initial-value | ||
107 | (vector nil ; result stack | ||
108 | nil ; current working segment | ||
109 | t ; is it the first word of a phrase? | ||
110 | nil)))) ; are we inside of a filesystem path? | ||
111 | (aref final-state 0))) | ||
112 | |||
113 | (defun titlecase--segment (segment capitalize-p) | ||
114 | "Convert a title's inner SEGMENT to capitalized or lower case | ||
115 | depending on CAPITALIZE-P, then return the result." | ||
116 | (let* ((case-fold-search nil) | ||
117 | (ignore-chars '(?' ?\" ?\( ?\[ ?‘ ?“ ?’ ?” ?_)) | ||
118 | (final-state | ||
119 | (cl-reduce | ||
120 | (lambda (state char) | ||
121 | (let ((result (aref state 0)) | ||
122 | (downcase-p (aref state 1))) | ||
123 | (cond | ||
124 | (downcase-p | ||
125 | ;; already upcased start of segment, so lowercase the rest | ||
126 | (vector (cons (downcase char) result) t)) | ||
127 | ((member char ignore-chars) | ||
128 | ;; check if start char of segment needs to be ignored | ||
129 | (vector (cons char result) downcase-p)) | ||
130 | (t | ||
131 | ;; haven't upcased yet, and we can, so do it | ||
132 | (vector (cons (upcase char) result) t))))) | ||
133 | segment | ||
134 | :initial-value (vector nil (not capitalize-p))))) | ||
135 | (thread-last (aref final-state 0) | ||
136 | (reverse) | ||
137 | (apply #'string)))) | ||
138 | |||
139 | ;;;###autoload | ||
140 | (defun titlecase-region (begin end) | ||
141 | "Convert text in region from BEGIN to END to title case." | ||
142 | (interactive "*r") | ||
143 | (let ((pt (point))) | ||
144 | (insert (titlecase-string (delete-and-extract-region begin end))) | ||
145 | (goto-char pt))) | ||
146 | |||
147 | ;;;###autoload | ||
148 | (defun titlecase-dwim () | ||
149 | "Convert the region or current line to title case. | ||
150 | If Transient Mark Mode is on and there is an active region, convert | ||
151 | the region to title case. Otherwise, work on the current line." | ||
152 | (interactive) | ||
153 | (if (and transient-mark-mode mark-active) | ||
154 | (titlecase-region (region-beginning) (region-end)) | ||
155 | (titlecase-region (point-at-bol) (point-at-eol)))) | ||
156 | |||
157 | (provide 'titlecase) | ||