From f8cf097a1a6b612411688f776222d93166d8dccb Mon Sep 17 00:00:00 2001 From: Andrew Heiss Date: Mon, 3 Jun 2024 16:31:46 -0400 Subject: [PATCH] Allow code blocks to be optionally counted --- README.md | 76 +++++++++++++++++++- README.qmd | 69 ++++++++++++++++++ _extensions/wordcount/wordcount.lua | 52 +++++++++++--- tests/testthat/test-code-blocks-asis.qmd | 16 +++++ tests/testthat/test-code-blocks-disabled.qmd | 31 ++++++++ tests/testthat/test-code-blocks-enabled.qmd | 31 ++++++++ tests/testthat/test-code-blocks.R | 47 ++++++++++++ 7 files changed, 311 insertions(+), 11 deletions(-) create mode 100644 tests/testthat/test-code-blocks-asis.qmd create mode 100644 tests/testthat/test-code-blocks-disabled.qmd create mode 100644 tests/testthat/test-code-blocks-enabled.qmd create mode 100644 tests/testthat/test-code-blocks.R diff --git a/README.md b/README.md index dd1ace8..4842ed8 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ - [Terminal output](#terminal-output) - [Shortcodes](#shortcodes) - [No counting](#no-counting) + - [Code blocks](#code-blocks) - [Appendices](#appendices) - [Example](#example) - [Credits](#credits) @@ -205,10 +206,10 @@ counts directly in the document: appendix, which must be wrapped in a div with the `#appendix-count` id ([see below for more details](#appendices)) -- Use `{{< words-note >}}` to include a count of the words in the notes: +- Use `{{< words-note >}}` to include a count of the words in the notes - Use `{{< words-sum ARG >}}` where `ARG` is some concatenation of the - four countable areas: `body`, `ref`, `append`, and `note`. + four countable areas: `body`, `ref`, `append`, and `note` For example, `{{< words-sum body-note >}}` includes a count of the words in the body and notes; `{{< words-sum ref-append >}}` includes a @@ -236,6 +237,75 @@ These words don't count. ::: ``` +### Code blocks + +By default, text inside code blocks ***is*** counted. For example, this: + +```` markdown +--- +title: "Code counting" +format: wordcount-html +--- + +This sentence has seven words in it. + +```{r} +# Here is some code + +numbers <- 1:10 +mean(numbers) +``` +```` + +…will result in these counts: + +``` text +Overall totals: +----------------------------- +- 16 total words +- 16 words in body and notes + +Section totals: +----------------------------- +- 16 words in text body +``` + +…with 7 words from the sentence and 9 from the code. + +Code block counting can be disabled with the YAML option +`count-code-blocks`: + +```` markdown +--- +title: "Code counting" +format: + wordcount-html: + count-code-blocks: false +--- + +This sentence has seven words in it. + +```{r} +# Here is some code + +numbers <- 1:10 +mean(numbers) +``` +```` + +…which makes these counts: + +``` text +Overall totals: +---------------------------- +- 7 total words +- 7 words in body and notes + +Section totals: +---------------------------- +- 7 words in text body +``` + ### Appendices In academic writing, it’s often helpful to have a separate word count @@ -311,7 +381,7 @@ that look something like this: pandoc whatever.md --output whatever.html --lua-filter wordcount.lua --citeproc ``` -The order of these arguments matter, so having +The order of these arguments matters, so having `--lua-filter wordcount.lua` come before `--citeproc` makes it so the words will be counted before the bibliography is generated, which isn’t great. diff --git a/README.qmd b/README.qmd index ba8c1d0..0ed7f5e 100644 --- a/README.qmd +++ b/README.qmd @@ -163,6 +163,75 @@ These words don't count. ::: ```` +### Code blocks + +By default, text inside code blocks ***is*** counted. For example, this: + +````qmd +--- +title: "Code counting" +format: wordcount-html +--- + +This sentence has seven words in it. + +```{{r}} +# Here is some code + +numbers <- 1:10 +mean(numbers) +``` +```` + +…will result in these counts: + +```text +Overall totals: +----------------------------- +- 16 total words +- 16 words in body and notes + +Section totals: +----------------------------- +- 16 words in text body +``` + +…with 7 words from the sentence and 9 from the code. + +Code block counting can be disabled with the YAML option `count-code-blocks`: + +````qmd +--- +title: "Code counting" +format: + wordcount-html: + count-code-blocks: false +--- + +This sentence has seven words in it. + +```{{r}} +# Here is some code + +numbers <- 1:10 +mean(numbers) +``` +```` + +…which makes these counts: + +```text +Overall totals: +---------------------------- +- 7 total words +- 7 words in body and notes + +Section totals: +---------------------------- +- 7 words in text body +``` + + ### Appendices In academic writing, it's often helpful to have a separate word count for content in the appendices, since things there don't typically count against journal word limits. [Quarto has a neat feature for automatically creating an appendix section](https://quarto.org/docs/authoring/appendices.html) and moving content there automatically as needed. It does this (I think) with a fancy Lua filter. diff --git a/_extensions/wordcount/wordcount.lua b/_extensions/wordcount/wordcount.lua index 08ac4e6..b5caeff 100644 --- a/_extensions/wordcount/wordcount.lua +++ b/_extensions/wordcount/wordcount.lua @@ -231,19 +231,20 @@ body_count = { body_words = body_words + 1 end end, - + Code = function(el) - _,n = el.text:gsub("%S+","") - body_words = body_words + n - end, - - CodeBlock = function(el) - _,n = el.text:gsub("%S+","") + _, n = el.text:gsub("%S+", "") body_words = body_words + n end - } +-- if count_code_blocks then +-- body_count.CodeBlock = function(el) +-- _,n = el.text:gsub("%S+","") +-- body_words = body_words + n +-- end +-- end + ref_count = { Str = function(el) -- we don't count a word if it's entirely punctuation: @@ -259,6 +260,11 @@ appendix_count = { if is_word(el.text) then appendix_words = appendix_words + 1 end + end, + + Code = function(el) + _, n = el.text:gsub("%S+", "") + appendix_words = appendix_words + n end } @@ -267,6 +273,11 @@ note_count = { if is_word(el.text) then note_words = note_words + 1 end + end, + + Code = function(el) + _, n = el.text:gsub("%S+", "") + note_words = note_words + n end } @@ -278,6 +289,31 @@ function Pandoc(el) return el end + -- Count code blocks in body, notes, and appendix if needed + if el.meta["count-code-blocks"] ~= nil then + count_code_blocks = el.meta["count-code-blocks"] + else + count_code_blocks = true + end + + -- Add these functions to the respective section counting functions + if count_code_blocks then + body_count.CodeBlock = function(el) + _, n = el.text:gsub("%S+", "") + body_words = body_words + n + end + + appendix_count.CodeBlock = function(el) + _, n = el.text:gsub("%S+", "") + appendix_words = appendix_words + n + end + + note_count.CodeBlock = function(el) + _, n = el.text:gsub("%S+", "") + note_words = note_words + n + end + end + -- Get all notes local all_notes = get_all_notes(el.blocks) -- Count words in notes diff --git a/tests/testthat/test-code-blocks-asis.qmd b/tests/testthat/test-code-blocks-asis.qmd new file mode 100644 index 0000000..f823ecd --- /dev/null +++ b/tests/testthat/test-code-blocks-asis.qmd @@ -0,0 +1,16 @@ +--- +title: "as-is results work" +format: + wordcount-markdown: + count-code-blocks: true +--- + +This sentence has seven words in it. + +```{r} +#| echo: false +#| results: asis + +output <- "Two words" +cat(output) +``` diff --git a/tests/testthat/test-code-blocks-disabled.qmd b/tests/testthat/test-code-blocks-disabled.qmd new file mode 100644 index 0000000..811bcf9 --- /dev/null +++ b/tests/testthat/test-code-blocks-disabled.qmd @@ -0,0 +1,31 @@ +--- +title: "Code blocks disabled" +format: + wordcount-markdown: + count-code-blocks: false +--- + +This sentence has seven words in it.[^note] + +```{r} +asdf <- 1:10 +mean(asdf) +``` + +[^note]: Here's some code: + +```{r indent=" "} +zxcv <- 21:30 +mean(zxcv) +``` + +::: {#appendix-count} + +There are five words here. + +```{r} +qwer <- 11:20 +mean(qwer) +``` + +::: diff --git a/tests/testthat/test-code-blocks-enabled.qmd b/tests/testthat/test-code-blocks-enabled.qmd new file mode 100644 index 0000000..87ac05d --- /dev/null +++ b/tests/testthat/test-code-blocks-enabled.qmd @@ -0,0 +1,31 @@ +--- +title: "Code blocks enabled" +format: + wordcount-markdown: + count-code-blocks: true +--- + +This sentence has seven words in it.[^note] + +```{r} +asdf <- 1:10 +mean(asdf) +``` + +[^note]: Here's some code: + +```{r indent=" "} +zxcv <- 21:30 +mean(zxcv) +``` + +::: {#appendix-count} + +There are five words here. + +```{r} +qwer <- 11:20 +mean(qwer) +``` + +::: diff --git a/tests/testthat/test-code-blocks.R b/tests/testthat/test-code-blocks.R new file mode 100644 index 0000000..9dc66c2 --- /dev/null +++ b/tests/testthat/test-code-blocks.R @@ -0,0 +1,47 @@ +test_that("disabling code block counting works", { + test_file <- test_file_parts(here::here("tests/testthat/test-code-blocks-disabled.qmd")) + + create_local_quarto_project(test_file = test_file) + + quarto::quarto_render(input = test_file$qmd, quiet = TRUE) + + counts <- get_wordcounts(test_file$md) + + expect_equal(counts$wordcount_appendix_words, 5) + expect_equal(counts$wordcount_body_words, 7) + expect_equal(counts$wordcount_note_words, 3) + expect_equal(counts$wordcount_ref_words, 0) + expect_equal(counts$wordcount_total_words, 15) +}) + +test_that("enabling code block counting works", { + test_file <- test_file_parts(here::here("tests/testthat/test-code-blocks-enabled.qmd")) + + create_local_quarto_project(test_file = test_file) + + quarto::quarto_render(input = test_file$qmd, quiet = TRUE) + + counts <- get_wordcounts(test_file$md) + + expect_equal(counts$wordcount_appendix_words, 11) + expect_equal(counts$wordcount_body_words, 13) + expect_equal(counts$wordcount_note_words, 9) + expect_equal(counts$wordcount_ref_words, 0) + expect_equal(counts$wordcount_total_words, 33) +}) + +test_that("as-is output from echo=false chunks gets counted", { + test_file <- test_file_parts(here::here("tests/testthat/test-code-blocks-asis.qmd")) + + create_local_quarto_project(test_file = test_file) + + quarto::quarto_render(input = test_file$qmd, quiet = TRUE) + + counts <- get_wordcounts(test_file$md) + + expect_equal(counts$wordcount_appendix_words, 0) + expect_equal(counts$wordcount_body_words, 9) + expect_equal(counts$wordcount_note_words, 0) + expect_equal(counts$wordcount_ref_words, 0) + expect_equal(counts$wordcount_total_words, 9) +})