From f8cf097a1a6b612411688f776222d93166d8dccb Mon Sep 17 00:00:00 2001
From: Andrew Heiss <andrew@andrewheiss.com>
Date: Mon, 3 Jun 2024 16:31:46 -0400
Subject: [PATCH] Allow code blocks to be optionally counted

---
 README.md                                    | 76 +++++++++++++++++++-
 README.qmd                                   | 69 ++++++++++++++++++
 _extensions/wordcount/wordcount.lua          | 52 +++++++++++---
 tests/testthat/test-code-blocks-asis.qmd     | 16 +++++
 tests/testthat/test-code-blocks-disabled.qmd | 31 ++++++++
 tests/testthat/test-code-blocks-enabled.qmd  | 31 ++++++++
 tests/testthat/test-code-blocks.R            | 47 ++++++++++++
 7 files changed, 311 insertions(+), 11 deletions(-)
 create mode 100644 tests/testthat/test-code-blocks-asis.qmd
 create mode 100644 tests/testthat/test-code-blocks-disabled.qmd
 create mode 100644 tests/testthat/test-code-blocks-enabled.qmd
 create mode 100644 tests/testthat/test-code-blocks.R

diff --git a/README.md b/README.md
index dd1ace8..4842ed8 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@
   - [Terminal output](#terminal-output)
   - [Shortcodes](#shortcodes)
   - [No counting](#no-counting)
+  - [Code blocks](#code-blocks)
   - [Appendices](#appendices)
 - [Example](#example)
 - [Credits](#credits)
@@ -205,10 +206,10 @@ counts directly in the document:
   appendix, which must be wrapped in a div with the `#appendix-count` id
   ([see below for more details](#appendices))
 
-- Use `{{< words-note >}}` to include a count of the words in the notes:
+- Use `{{< words-note >}}` to include a count of the words in the notes
 
 - Use `{{< words-sum ARG >}}` where `ARG` is some concatenation of the
-  four countable areas: `body`, `ref`, `append`, and `note`.
+  four countable areas: `body`, `ref`, `append`, and `note`
 
   For example, `{{< words-sum body-note >}}` includes a count of the
   words in the body and notes; `{{< words-sum ref-append >}}` includes a
@@ -236,6 +237,75 @@ These words don't count.
 :::
 ```
 
+### Code blocks
+
+By default, text inside code blocks ***is*** counted. For example, this:
+
+```` markdown
+---
+title: "Code counting"
+format: wordcount-html
+---
+
+This sentence has seven words in it.
+
+```{r}
+# Here is some code
+
+numbers <- 1:10
+mean(numbers)
+```
+````
+
+…will result in these counts:
+
+``` text
+Overall totals:
+-----------------------------
+- 16 total words
+- 16 words in body and notes
+
+Section totals:
+-----------------------------
+- 16 words in text body
+```
+
+…with 7 words from the sentence and 9 from the code.
+
+Code block counting can be disabled with the YAML option
+`count-code-blocks`:
+
+```` markdown
+---
+title: "Code counting"
+format: 
+  wordcount-html:
+    count-code-blocks: false
+---
+
+This sentence has seven words in it.
+
+```{r}
+# Here is some code
+
+numbers <- 1:10
+mean(numbers)
+```
+````
+
+…which makes these counts:
+
+``` text
+Overall totals:
+----------------------------
+- 7 total words
+- 7 words in body and notes
+
+Section totals:
+----------------------------
+- 7 words in text body
+```
+
 ### Appendices
 
 In academic writing, it’s often helpful to have a separate word count
@@ -311,7 +381,7 @@ that look something like this:
 pandoc whatever.md --output whatever.html --lua-filter wordcount.lua --citeproc
 ```
 
-The order of these arguments matter, so having
+The order of these arguments matters, so having
 `--lua-filter wordcount.lua` come before `--citeproc` makes it so the
 words will be counted before the bibliography is generated, which isn’t
 great.
diff --git a/README.qmd b/README.qmd
index ba8c1d0..0ed7f5e 100644
--- a/README.qmd
+++ b/README.qmd
@@ -163,6 +163,75 @@ These words don't count.
 :::
 ````
 
+### Code blocks
+
+By default, text inside code blocks ***is*** counted. For example, this:
+
+````qmd
+---
+title: "Code counting"
+format: wordcount-html
+---
+
+This sentence has seven words in it.
+
+```{{r}}
+# Here is some code
+
+numbers <- 1:10
+mean(numbers)
+```
+````
+
+…will result in these counts:
+
+```text
+Overall totals:
+-----------------------------
+- 16 total words
+- 16 words in body and notes
+
+Section totals:
+-----------------------------
+- 16 words in text body
+```
+
+…with 7 words from the sentence and 9 from the code.
+
+Code block counting can be disabled with the YAML option `count-code-blocks`:
+
+````qmd
+---
+title: "Code counting"
+format: 
+  wordcount-html:
+    count-code-blocks: false
+---
+
+This sentence has seven words in it.
+
+```{{r}}
+# Here is some code
+
+numbers <- 1:10
+mean(numbers)
+```
+````
+
+…which makes these counts:
+
+```text
+Overall totals:
+----------------------------
+- 7 total words
+- 7 words in body and notes
+
+Section totals:
+----------------------------
+- 7 words in text body
+```
+
+
 ### Appendices
 
 In academic writing, it's often helpful to have a separate word count for content in the appendices, since things there don't typically count against journal word limits. [Quarto has a neat feature for automatically creating an appendix section](https://quarto.org/docs/authoring/appendices.html) and moving content there automatically as needed. It does this (I think) with a fancy Lua filter.
diff --git a/_extensions/wordcount/wordcount.lua b/_extensions/wordcount/wordcount.lua
index 08ac4e6..b5caeff 100644
--- a/_extensions/wordcount/wordcount.lua
+++ b/_extensions/wordcount/wordcount.lua
@@ -231,19 +231,20 @@ body_count = {
       body_words = body_words + 1
     end
   end,
-  
+
   Code = function(el)
-    _,n = el.text:gsub("%S+","")
-    body_words = body_words + n
-  end,
-  
-  CodeBlock = function(el)
-    _,n = el.text:gsub("%S+","")
+    _, n = el.text:gsub("%S+", "")
     body_words = body_words + n
   end
-  
 }
 
+-- if count_code_blocks then
+--   body_count.CodeBlock = function(el)
+--     _,n = el.text:gsub("%S+","")
+--     body_words = body_words + n
+--   end
+-- end
+
 ref_count = {
   Str = function(el)
     -- we don't count a word if it's entirely punctuation:
@@ -259,6 +260,11 @@ appendix_count = {
     if is_word(el.text) then
       appendix_words = appendix_words + 1
     end
+  end,
+  
+  Code = function(el)
+    _, n = el.text:gsub("%S+", "")
+    appendix_words = appendix_words + n
   end
 }
 
@@ -267,6 +273,11 @@ note_count = {
     if is_word(el.text) then
       note_words = note_words + 1
     end
+  end,
+  
+  Code = function(el)
+    _, n = el.text:gsub("%S+", "")
+    note_words = note_words + n
   end
 }
 
@@ -278,6 +289,31 @@ function Pandoc(el)
     return el
   end
   
+  -- Count code blocks in body, notes, and appendix if needed
+  if el.meta["count-code-blocks"] ~= nil then
+    count_code_blocks = el.meta["count-code-blocks"]
+  else
+    count_code_blocks = true
+  end
+  
+  -- Add these functions to the respective section counting functions
+  if count_code_blocks then
+    body_count.CodeBlock = function(el)
+      _, n = el.text:gsub("%S+", "")
+      body_words = body_words + n
+    end
+    
+    appendix_count.CodeBlock = function(el)
+      _, n = el.text:gsub("%S+", "")
+      appendix_words = appendix_words + n
+    end
+    
+    note_count.CodeBlock = function(el)
+      _, n = el.text:gsub("%S+", "")
+      note_words = note_words + n
+    end
+  end
+    
   -- Get all notes
   local all_notes = get_all_notes(el.blocks)
   -- Count words in notes
diff --git a/tests/testthat/test-code-blocks-asis.qmd b/tests/testthat/test-code-blocks-asis.qmd
new file mode 100644
index 0000000..f823ecd
--- /dev/null
+++ b/tests/testthat/test-code-blocks-asis.qmd
@@ -0,0 +1,16 @@
+---
+title: "as-is results work"
+format: 
+  wordcount-markdown:
+    count-code-blocks: true
+---
+
+This sentence has seven words in it.
+
+```{r}
+#| echo: false
+#| results: asis
+
+output <- "Two words"
+cat(output)
+```
diff --git a/tests/testthat/test-code-blocks-disabled.qmd b/tests/testthat/test-code-blocks-disabled.qmd
new file mode 100644
index 0000000..811bcf9
--- /dev/null
+++ b/tests/testthat/test-code-blocks-disabled.qmd
@@ -0,0 +1,31 @@
+---
+title: "Code blocks disabled"
+format: 
+  wordcount-markdown:
+    count-code-blocks: false
+---
+
+This sentence has seven words in it.[^note]
+
+```{r}
+asdf <- 1:10
+mean(asdf)
+```
+
+[^note]: Here's some code:
+
+```{r indent="    "}
+zxcv <- 21:30
+mean(zxcv)
+```
+
+::: {#appendix-count}
+
+There are five words here.
+
+```{r}
+qwer <- 11:20
+mean(qwer)
+```
+
+:::
diff --git a/tests/testthat/test-code-blocks-enabled.qmd b/tests/testthat/test-code-blocks-enabled.qmd
new file mode 100644
index 0000000..87ac05d
--- /dev/null
+++ b/tests/testthat/test-code-blocks-enabled.qmd
@@ -0,0 +1,31 @@
+---
+title: "Code blocks enabled"
+format: 
+  wordcount-markdown:
+    count-code-blocks: true
+---
+
+This sentence has seven words in it.[^note]
+
+```{r}
+asdf <- 1:10
+mean(asdf)
+```
+
+[^note]: Here's some code:
+
+```{r indent="    "}
+zxcv <- 21:30
+mean(zxcv)
+```
+
+::: {#appendix-count}
+
+There are five words here.
+
+```{r}
+qwer <- 11:20
+mean(qwer)
+```
+
+:::
diff --git a/tests/testthat/test-code-blocks.R b/tests/testthat/test-code-blocks.R
new file mode 100644
index 0000000..9dc66c2
--- /dev/null
+++ b/tests/testthat/test-code-blocks.R
@@ -0,0 +1,47 @@
+test_that("disabling code block counting works", {
+  test_file <- test_file_parts(here::here("tests/testthat/test-code-blocks-disabled.qmd"))
+  
+  create_local_quarto_project(test_file = test_file)
+  
+  quarto::quarto_render(input = test_file$qmd, quiet = TRUE)
+  
+  counts <- get_wordcounts(test_file$md)
+  
+  expect_equal(counts$wordcount_appendix_words, 5)
+  expect_equal(counts$wordcount_body_words, 7)
+  expect_equal(counts$wordcount_note_words, 3)
+  expect_equal(counts$wordcount_ref_words, 0)
+  expect_equal(counts$wordcount_total_words, 15)
+})
+
+test_that("enabling code block counting works", {
+  test_file <- test_file_parts(here::here("tests/testthat/test-code-blocks-enabled.qmd"))
+  
+  create_local_quarto_project(test_file = test_file)
+  
+  quarto::quarto_render(input = test_file$qmd, quiet = TRUE)
+  
+  counts <- get_wordcounts(test_file$md)
+  
+  expect_equal(counts$wordcount_appendix_words, 11)
+  expect_equal(counts$wordcount_body_words, 13)
+  expect_equal(counts$wordcount_note_words, 9)
+  expect_equal(counts$wordcount_ref_words, 0)
+  expect_equal(counts$wordcount_total_words, 33)
+})
+
+test_that("as-is output from echo=false chunks gets counted", {
+  test_file <- test_file_parts(here::here("tests/testthat/test-code-blocks-asis.qmd"))
+  
+  create_local_quarto_project(test_file = test_file)
+  
+  quarto::quarto_render(input = test_file$qmd, quiet = TRUE)
+  
+  counts <- get_wordcounts(test_file$md)
+  
+  expect_equal(counts$wordcount_appendix_words, 0)
+  expect_equal(counts$wordcount_body_words, 9)
+  expect_equal(counts$wordcount_note_words, 0)
+  expect_equal(counts$wordcount_ref_words, 0)
+  expect_equal(counts$wordcount_total_words, 9)
+})