From 176e795a32729410873f66bdf2ee42ff4aaa88c9 Mon Sep 17 00:00:00 2001 From: Case Duckworth Date: Thu, 13 May 2021 18:00:16 -0500 Subject: Add `acdw-org/word-count' --- lisp/acdw-org.el | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/lisp/acdw-org.el b/lisp/acdw-org.el index bb72eb8..3f0c4ea 100644 --- a/lisp/acdw-org.el +++ b/lisp/acdw-org.el @@ -243,4 +243,91 @@ the deletion might narrow the column." (org-table-copy-down n) (acdw-org/return-dwim n))) +(defun acdw-org/word-count (beg end + &optional count-latex-macro-args? + count-footnotes?) + "Report the number of words in the Org mode buffer or selected region. +Ignores: +- comments +- tables +- source code blocks (#+BEGIN_SRC ... #+END_SRC, and inline blocks) +- hyperlinks (but does count words in hyperlink descriptions) +- tags, priorities, and TODO keywords in headers +- sections tagged as 'not for export'. + +The text of footnote definitions is ignored, unless the optional argument +COUNT-FOOTNOTES? is non-nil. + +If the optional argument COUNT-LATEX-MACRO-ARGS? is non-nil, the word count +includes LaTeX macro arguments (the material between {curly braces}). +Otherwise, and by default, every LaTeX macro counts as 1 word regardless +of its arguments." + (interactive "r") + (unless mark-active + (setf beg (point-min) + end (point-max))) + (let ((wc 0) + (latex-macro-regexp "\\\\[A-Za-z]+\\(\\[[^]]*\\]\\|\\){\\([^}]*\\)}")) + (save-excursion + (goto-char beg) + (while (< (point) end) + (cond + ;; Ignore comments. + ((or (org-in-commented-line) (org-at-table-p)) + nil) + ;; Ignore hyperlinks. But if link has a description, count + ;; the words within the description. + ((looking-at org-bracket-link-analytic-regexp) + (when (match-string-no-properties 5) + (let ((desc (match-string-no-properties 5))) + (save-match-data + (incf wc (length (remove "" (org-split-string + desc "\\W"))))))) + (goto-char (match-end 0))) + ((looking-at org-any-link-re) + (goto-char (match-end 0))) + ;; Ignore source code blocks. + ((org-in-regexps-block-p "^#\\+BEGIN_SRC\\W" "^#\\+END_SRC\\W") + nil) + ;; Ignore inline source blocks, counting them as 1 word. + ((save-excursion + (backward-char) + (looking-at org-babel-inline-src-block-regexp)) + (goto-char (match-end 0)) + (setf wc (+ 2 wc))) + ;; Count latex macros as 1 word, ignoring their arguments. + ((save-excursion + (backward-char) + (looking-at latex-macro-regexp)) + (goto-char (if count-latex-macro-args? + (match-beginning 2) + (match-end 0))) + (setf wc (+ 2 wc))) + ;; Ignore footnotes. + ((and (not count-footnotes?) + (or (org-footnote-at-definition-p) + (org-footnote-at-reference-p))) + nil) + (t + (let ((contexts (org-context))) + (cond + ;; Ignore tags and TODO keywords, etc. + ((or (assoc :todo-keyword contexts) + (assoc :priority contexts) + (assoc :keyword contexts) + (assoc :checkbox contexts)) + nil) + ;; Ignore sections marked with tags that are + ;; excluded from export. + ((assoc :tags contexts) + (if (intersection (org-get-tags-at) org-export-exclude-tags + :test 'equal) + (org-forward-same-level 1) + nil)) + (t + (incf wc)))))) + (re-search-forward "\\w+\\W*"))) + (message (format "%d words in %s." wc + (if mark-active "region" "buffer"))))) + (provide 'acdw-org) -- cgit 1.4.1-21-gabe81