about summary refs log tree commit diff stats
path: root/lisp/titlecase.el
blob: 64da5b46bdb5fbfd8d3a5a41d6b05ce5e6426526 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
;;; titlecase.el -*- lexical-binding: t; -*-

;; https://hungyi.net/posts/programmers-way-to-title-case/

(require 'cl-lib)
(require 'subr-x)

;;;###autoload
(defun titlecase-string (str)
  "Convert string STR to title case and return the resulting string."
  (let* ((case-fold-search nil)
         (str-length (length str))
         ;; A list of markers that indicate start of a new phrase within the
         ;; title, e.g. "The Lonely Reindeer: A Christmas Story"
         ;; must be followed by one of  word-boundary-chars
         (new-phrase-chars '(?: ?. ?? ?\; ?\n ?\r))
         ;; immediately triggers new phrase behavior without waiting for word
         ;; boundary
         (immediate-new-phrase-chars '(?\n ?\r))
         ;; A list of characters that indicate "word boundaries"; used to split
         ;; the title into processable segments
         (word-boundary-chars (append '(?  ?– ?— ?- ?‑ ?/)
                                      immediate-new-phrase-chars))
         ;; A list of small words that should not be capitalized (in the right
         ;; conditions)
         (small-words '("a" "an" "and" "as" "at" "but" "by" "en" "for" "if"
                        "in" "of" "on" "or" "the" "to" "v" "v." "vs" "vs."
                        "via"))
         ;; Fix if str is ALL CAPS
         (str (if (string-match-p "[a-z]" str) str (downcase str)))
         ;; Reduce over a state machine to do title casing
         (final-state
          (cl-reduce
           (lambda (state char)
             (let* ((result (aref state 0))
                    (last-segment (aref state 1))
                    (first-word-p (aref state 2))
                    (was-in-path-p (aref state 3))
                    (last-char (car last-segment))
                    (in-path-p (or (and (eq char ?/)
                                        (or (not last-segment)
                                            (member last-char '(?. ?~))))
                                   (and was-in-path-p
                                        (not
                                         (or (eq char ? )
                                             (member
                                              char
                                              immediate-new-phrase-chars))))))
                    (end-p
                     ;; are we at the end of the input string?
                     (eq (+ (length result) (length last-segment) 1)
                         str-length))
                    (pop-p
                     ;; do we need to pop a segment onto the output result?
                     (or end-p (and (not in-path-p)
                                    (member char word-boundary-chars))))       
                    (segment
                     ;; add the current char to the current segment
                     (cons char last-segment))
                    (segment-string
                     ;; the readable version of the segment
                     (apply #'string (reverse segment)))
                    (small-word-p
                     ;; was the last segment a small word?
                     (member (downcase (substring segment-string 0 -1))
                             small-words))
                    (capitalize-p
                     ;; do we need to capitalized this segment or lowercase it?
                     (or end-p first-word-p (not small-word-p)))
                    (ignore-segment-p
                     ;; ignore explicitly capitalized segments
                     (or (string-match-p "[a-zA-Z].*[A-Z]" segment-string)
                         ;; ignore URLs
                         (string-match-p "^https?:" segment-string)
                         ;; ignore hostnames and namespaces.like.this
                         (string-match-p "\\w\\.\\w" segment-string)
                         ;; ignore windows filesystem paths
                         (string-match-p "^[A-Za-z]:\\\\" segment-string)
                         ;; ignore unix filesystem paths
                         was-in-path-p
                         ;; ignore email addresses and user handles with @ symbol
                         (member ?@ segment)))                     
                    (next-result
                     (if pop-p
                         (concat result
                                 (if ignore-segment-p
                                     ;; pop segment onto the result without
                                     ;; processing
                                     segment-string
                                   ;; titlecase the segment before popping onto
                                   ;; result
                                   (titlecase--segment
                                    segment-string capitalize-p)))
                       result))
                    (next-segment
                     (unless pop-p segment))
                    (will-be-first-word-p
                     (if pop-p
                         (or (not last-segment)
                             (member last-char new-phrase-chars)
                             (member char immediate-new-phrase-chars))
                       first-word-p)))
               (vector
                next-result next-segment will-be-first-word-p in-path-p)))
           str
           :initial-value
           (vector nil      ; result stack
                   nil      ; current working segment
                   t        ; is it the first word of a phrase?
                   nil))))  ; are we inside of a filesystem path?
    (aref final-state 0)))

(defun titlecase--segment (segment capitalize-p)
  "Convert a title's inner SEGMENT to capitalized or lower case
depending on CAPITALIZE-P, then return the result."
  (let* ((case-fold-search nil)
         (ignore-chars '(?' ?\" ?\( ?\[ ?‘ ?“ ?’ ?” ?_))
         (final-state
          (cl-reduce
           (lambda (state char)
             (let ((result (aref state 0))
                   (downcase-p (aref state 1)))
               (cond
                (downcase-p
                 ;; already upcased start of segment, so lowercase the rest
                 (vector (cons (downcase char) result) t))
                ((member char ignore-chars)
                 ;; check if start char of segment needs to be ignored
                 (vector (cons char result) downcase-p))
                (t
                 ;; haven't upcased yet, and we can, so do it
                 (vector (cons (upcase char) result) t)))))
           segment
           :initial-value (vector nil (not capitalize-p)))))
    (thread-last (aref final-state 0)
      (reverse)
      (apply #'string))))

;;;###autoload
(defun titlecase-region (begin end)
  "Convert text in region from BEGIN to END to title case."
  (interactive "*r")
  (let ((pt (point)))
    (insert (titlecase-string (delete-and-extract-region begin end)))
    (goto-char pt)))

;;;###autoload
(defun titlecase-dwim ()
  "Convert the region or current line to title case.
If Transient Mark Mode is on and there is an active region, convert
the region to title case.  Otherwise, work on the current line."
  (interactive)
  (if (and transient-mark-mode mark-active)
      (titlecase-region (region-beginning) (region-end))
    (titlecase-region (point-at-bol) (point-at-eol))))

(provide 'titlecase)