From c6b7b39da89a1fbfcf9c26c8e1e806465efb2e84 Mon Sep 17 00:00:00 2001 From: crsh Date: Sat, 12 Oct 2024 20:15:22 +0200 Subject: [PATCH] Polishes DOI replacement post-processor. - Renmaes `replace_doi_citations()` to `post_process_doi_citations()`. - Adds new function `replace_resolved_doi_citations()` that can be used inside an R Markdown document instead of defining a custom format. - Adds a vignette that demonstrates how to use the new function. --- .gitignore | 2 + NAMESPACE | 3 +- R/replace_doi.R | 25 +++--- ...tions.Rd => post_process_doi_citations.Rd} | 20 ++--- vignettes/doi2cite.Rmd | 77 +++++++++++++++++++ 5 files changed, 104 insertions(+), 23 deletions(-) rename man/{replace_doi_citations.Rd => post_process_doi_citations.Rd} (52%) create mode 100644 vignettes/doi2cite.Rmd diff --git a/.gitignore b/.gitignore index 26bb2f8..2d07904 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ inst/doc .Rhistory .RData .Ruserdata + +/.luarc.json diff --git a/NAMESPACE b/NAMESPACE index 0f2008d..d297a39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,4 +7,5 @@ export(add_doi2cite_filter) export(add_lua_filter) export(add_replace_ampersands_filter) export(add_wordcount_filter) -export(replace_doi_citations) +export(post_process_doi_citations) +export(replace_resolved_doi_citations) diff --git a/R/replace_doi.R b/R/replace_doi.R index e360e6c..f48d67a 100644 --- a/R/replace_doi.R +++ b/R/replace_doi.R @@ -4,18 +4,12 @@ #' with the corresponding entries from a BibTeX file. Requires the package #' `bibtex` to be installed. #' -#' @param rmd A character vector specifying the path to the R Markdown file -#' (UTF-8 encoding expected). -#' @param bib A character vector specifying the path to the BibTeX file -#' (UTF-8 encoding expected). +#' @param input_file Character. Path to the input file provided to the post-processor. +#' @param bib Character. A (vector of) path(s) to the BibTeX file(s). #' @return Returns `TRUE` invisibly. -#' @examples -#' dontrun({ -#' replace_doi_citations("myreport.Rmd") -#' }) #' @export -replace_doi_citations <- function(rmd, bib = NULL) { +post_process_doi_citations <- function(input_file, bib) { if(!require("bibtex", quietly = TRUE)) { stop("The package `bibtex` is not avialable but required to replace DOI citations in a source document. Please install the package and try again.") } @@ -49,7 +43,7 @@ replace_doi_citations <- function(rmd, bib = NULL) { # Process bib files entries <- lapply(bib[existant_bib & !empty_bib], bibtex::read.bib) |> do.call("c", args = _) |> - (\(x) x$doi)() + (\(x) setNames(x$doi, names(x)))() entries <- entries[!is.na(entries) & !duplicated(entries)] @@ -71,6 +65,17 @@ replace_doi_citations <- function(rmd, bib = NULL) { invisible(TRUE) } +#' @rdname post_process_doi_citations +#' @export + +replace_resolved_doi_citations <- function() { + rmd <- knitr::current_input() + bib <- rmarkdown::metadata$bibliography + if(file.exists(bib)) { + rmdfiltr::post_process_doi_citations(rmd, bib) + } +} + #' @keywords internal readLines_utf8 <- function(con) { diff --git a/man/replace_doi_citations.Rd b/man/post_process_doi_citations.Rd similarity index 52% rename from man/replace_doi_citations.Rd rename to man/post_process_doi_citations.Rd index 753f116..9729398 100644 --- a/man/replace_doi_citations.Rd +++ b/man/post_process_doi_citations.Rd @@ -1,17 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/replace_doi.R -\name{replace_doi_citations} -\alias{replace_doi_citations} +\name{post_process_doi_citations} +\alias{post_process_doi_citations} +\alias{replace_resolved_doi_citations} \title{Replace DOI citations in R Markdown document} \usage{ -replace_doi_citations(rmd, bib = NULL) +post_process_doi_citations(input_file, bib) + +replace_resolved_doi_citations() } \arguments{ -\item{rmd}{A character vector specifying the path to the R Markdown file -(UTF-8 encoding expected).} +\item{input_file}{Character. Path to the input file provided to the post-processor.} -\item{bib}{A character vector specifying the path to the BibTeX file -(UTF-8 encoding expected).} +\item{bib}{Character. A (vector of) path(s) to the BibTeX file(s).} } \value{ Returns `TRUE` invisibly. @@ -21,8 +22,3 @@ This function reads an R Markdown document and replaces all DOI citations with the corresponding entries from a BibTeX file. Requires the package `bibtex` to be installed. } -\examples{ -dontrun({ -replace_doi_citations("myreport.Rmd") -}) -} diff --git a/vignettes/doi2cite.Rmd b/vignettes/doi2cite.Rmd new file mode 100644 index 0000000..60feaa5 --- /dev/null +++ b/vignettes/doi2cite.Rmd @@ -0,0 +1,77 @@ +--- +title : "Cite references using only the DOI" +author : "Frederik Aust" +date : "`r Sys.Date()`" + +output : rmarkdown::html_vignette + +vignette : > + %\VignetteIndexEntry{Lua word count filter} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Using the doi2cite filter + +The [`doi2cite`](https://github.com/korintje/pandoc-doi2cite?tab=readme-ov-file) is a fantastic filter by [@korintje](https://github.com/korintje) that extends `citeproc` and allows you to add citations using only the work's DOI. + +In essence, `doi2cite` searches the Markdown documents for citations that start with `doi:`, `DOI:`, `doi.org/` or `https://doi.org/`, extracts the DOI, queries CrossRef for the bibliographic information, writes it to a local BibTeX-file and replaces the citation key by the proper BibTeX key. +Now `citeproc` can process the citation and will do the rest. +I have adapted the filter to work with multiple bibliography files and and have provide additional post-processing functions to streamline the use with R Markdown. +The key issue to solve here is that `doi2cite` replaces DOI with BibTeX handles in the intermediate Markdown document, but not in the R Markdown source file. +Doing this requies an additional post-processing step that is done by `rmdfilter::replace_doi_citations()`. + +To use the `doi2cite` filter, we need to do two things: + +1. Use `rmdfiltr::add_doi2cite_filter()` to add an argument to the call to pandoc +2. Add the the designated file "__from_DOI.bib" (it currently has to be this file name!) to the `bibliography` field of the YAML front matter + +When adding the filters to `pandoc_args` the R code needs to be preceded by `!expr` to declare it as to-be-interpreted expression. + +~~~yaml +bibliograph: "__from_DOI.bib" +output: + html_document: + pandoc_args: !expr rmdfiltr::add_doi2cite_filter(args = NULL) +~~~ + +In the resulting HTML file, the citation tags `@doi:10.1037/xlm0001360` will be rendered as `Marsh et al. (2024)`. +However, the DOI-based citation tag remains in the source R Markdown file. +To replace it with the BibTeX citation handle requies an additional post-processing step. + +A makeshift solution to this is to call `rmdfiltr::replace_resolved_doi_citations()` in the R Markdown document. +The function will check the bibliography files in the YAML front matter for matching DOIs and replace the DOI in the R Markdown document with the corresponding reference handles. +Because `doi2cite` is run *after* `rmdfiltr::replace_resolved_doi_citations()`, this will only work for DOI citations that were resolved in a previous knitting process. + +To resolve this remaining issue, it is necessary to create a custom **rmarkdown** format. +Now, we can add to the `doi2cite` filter to the pandoc arguments and add `rmdfiltr::replace_resolved_doi_citations()` to the post processor. +The following is sketch of the essential parts of the custom format: + +```{r} +#| eval: false +#| echo: true + +my_format <- rmarkdown::output_format( + pre_processor = \(...) { + rmdfiltr::add_doi2cite_filter(args = NULL) + } + , post_processor = \(input_file, metadata, ...) { + rmdfiltr::post_process_doi_citations(input_file, metadata$bibliography) + } + , ... +) +``` + +With these pre- and post-processors, the DOI-based citations will be replaced by the BibTeX citation handles in the R Markdown source file. +That is, the citation tag `@doi:10.1037/xlm0001360` will be replaced by `@Marsh_2024` in the R Markdown source file and rendered to `Marsh et al. (2024)` in the output. + +# References + +Marsh, John E., Mark J. Hurlstone, Alexandre Marois, Linden J. Ball, Stuart B. Moore, François Vachon, Sabine J. Schlittmeier, et al. (2024). Changing-State Irrelevant Speech Disrupts Visual–Verbal but Not Visual–Spatial Serial Recall. *Journal of Experimental Psychology: Learning, Memory, and Cognition*. https://doi.org/10.1037/xlm0001360.