Skip to content

Commit

Permalink
perf: let colformat_md call pandoc only once (#46)
Browse files Browse the repository at this point in the history
* support Div

* refactor finding lua filters

* run pandoc only once from colformat_md

* handle errors for Pandoc <  2.2.3

* update NEWS

* pkgdown::build_news()
  • Loading branch information
atusy authored Mar 10, 2021
1 parent 35518a5 commit dbd35a0
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 45 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
## Internal changes

* Drop **jpeg** package from suggests, which is formerly used in a vignette (#42).

* `colformat_md` converts cells to Pandoc's AST by a single call of Pandoc. Previously it called pandoc for each cell. This change improves performance around 15X faster (#46).

# ftExtra 0.1.1
* Fix math not rendered on Windows with Pandoc < 2.10 (#33)
Expand Down
96 changes: 61 additions & 35 deletions R/as-paragraph-md.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,29 @@ lua <- function(...) {
c("--lua-filter", system.file("lua", ..., package = "ftExtra"))
}

lua_filters <- function(.sep) {
if (!rmarkdown::pandoc_available("2")) return(NULL)

c(
lua("smart.lua"),
lua("inline-code.lua"),
if (rmarkdown::pandoc_available("2.7.3")) {
c(
lua("math.lua"),
paste0("--metadata=pandoc-path:", rmarkdown::pandoc_exec()),
if (!rmarkdown::pandoc_available("2.10")) {
paste0("--metadata=temporary-directory:", tempdir())
}
)
},
if (rmarkdown::pandoc_available("2.2.3")) {
c(lua("blocks-to-inlines.lua"), paste0("--metadata=sep_blocks:", .sep))
}
)
}



parse_md <- function(x,
auto_color_link = "blue",
pandoc_args = NULL,
Expand All @@ -44,43 +67,37 @@ parse_md <- function(x,
stop("`auto_color_link` must be a string")
}

filters <- if (rmarkdown::pandoc_available("2")) {
c(
lua("smart.lua"),
lua("inline-code.lua"),
if (rmarkdown::pandoc_available("2.7.3")) {
c(
lua("math.lua"),
paste0("--metadata=pandoc-path:", rmarkdown::pandoc_exec()),
if (!rmarkdown::pandoc_available("2.10")) {
paste0("--metadata=temporary-directory:", tempdir())
}
)
},
if (rmarkdown::pandoc_available("2.2.3")) {
c(lua("blocks-to-inlines.lua"), paste0("--metadata=sep_blocks:", .sep))
}
)
}
md_df <- md2df(
x,
pandoc_args = c(lua_filters(.sep = .sep), pandoc_args),
.from = .from,
.check = TRUE
)

md_df <- md2df(x, pandoc_args = c(filters, pandoc_args), .from = .from)

if (is.null(.footnote_options) || (all(names(md_df) != "Note"))) {
y <- md_df
} else {
.footnote_options$n <- .footnote_options$n + 1L
ref <- data.frame(txt = .footnote_options$ref[[.footnote_options$n]],
Superscript = TRUE,
stringsAsFactors = FALSE)
.footnote_options$value <- c(
.footnote_options$value,
list(construct_chunk(as.list(dplyr::bind_rows(ref, md_df[md_df$Note, ])),
auto_color_link))
)
y <- dplyr::bind_rows(md_df[!md_df$Note, ], ref)
id <- pandoc_attrs(md_df$Div, "id")
cells <- unname(split(dplyr::select(md_df, !"Div"), factor(id, levels = unique(id))))

lapply(cells, function(cell) {
y <- solve_footnote(cell, .footnote_options, auto_color_link)
construct_chunk(as.list(y), auto_color_link)
})
}

solve_footnote <- function(md_df, .footnote_options, auto_color_link) {
if (is.null(.footnote_options) || !any(md_df[["Note"]])) {
return(md_df)
}

construct_chunk(as.list(y), auto_color_link)
.footnote_options$n <- .footnote_options$n + 1L
ref <- data.frame(txt = .footnote_options$ref[[.footnote_options$n]],
Superscript = TRUE,
stringsAsFactors = FALSE)
.footnote_options$value <- c(
.footnote_options$value,
list(construct_chunk(as.list(dplyr::bind_rows(ref, md_df[md_df$Note, ])),
auto_color_link))
)
dplyr::bind_rows(md_df[!md_df$Note, ], ref)
}

construct_chunk <- function(x, auto_color_link = "blue") {
Expand Down Expand Up @@ -147,7 +164,16 @@ as_paragraph_md <- function(x,
pandoc_args = NULL,
.from = "markdown+autolink_bare_uris",
...) {
structure(lapply(x, parse_md,
x <- paste(
purrr::map2_chr(
x,
paste0('cell', seq_along(x)),
function(x, id) sprintf('<div id="%s">%s</div>', id, x)
),
collapse = ''
)

structure(parse_md(x,
auto_color_link = auto_color_link,
pandoc_args = pandoc_args,
.from = paste0(.from, paste(md_extensions, collapse="")),
Expand Down
15 changes: 9 additions & 6 deletions R/md2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ has_attr <- function(x) {
}

add_type <- function(x, t) {
parents <- if (is.list(t)) t else stats::setNames(list(TRUE), t)
child <- stats::setNames(
list(structure(
if (isTRUE(x$t %in% c("Image", "Link"))) x$c[[3]][[1]] else TRUE,
Expand All @@ -35,7 +34,7 @@ add_type <- function(x, t) {
x$t
)

x$t <- c(child, parents)
x$t <- c(child, if (is.list(t)) t else stats::setNames(list(TRUE), t))

if (has_attr(x$c)) x$c <- x$c[[2]]

Expand All @@ -53,6 +52,9 @@ resolve_type <- function(x) {
if (identical(names(x$c), c("t", "c"))) {
return(add_type(x$c, x$t))
}
if (identical(x$t, "Div")) {
return(resolve_type(add_type(x, list())))
}
return(lapply(x$c, add_type, x$t))
}

Expand Down Expand Up @@ -127,7 +129,8 @@ ast2df <- function(x) {
x$blocks %>%
flatten_ast() %>%
lapply(branch2list) %>%
lapply(purrr::map_at, "Image", list) %>%
# Div is not for users, but for processing multiple cells at once
lapply(purrr::map_at, c("Div", "Image"), list) %>%
lapply(format_by_attr) %>%
lapply(drop_Para) %>%
dplyr::bind_rows() %>%
Expand All @@ -137,16 +140,16 @@ ast2df <- function(x) {

#' Convert Pandoc's Markdown to data frame
#' @noRd
md2df <- function(x, .from = "markdown", pandoc_args = NULL) {
md2df <- function(x, .from = "markdown", pandoc_args = NULL, .check = FALSE) {
ast <- md2ast(x, .from = .from, pandoc_args = pandoc_args)

ast$blocks <- ast$blocks[
!vapply(ast$blocks,
function(x) identical(c(x$t, x$c[[1]][[1]]), c("Div", "refs")),
function(x) identical(c(x$t, x$c[[1L]][[1L]]), c("Div", "refs")),
NA)
]

if ((ast$blocks[[1]]$t != "Para") || (length(ast$blocks) > 1)) {
if (.check && any(vapply(ast$blocks, function(x) length(x$c[[2L]]), 0L) > 1L)) {
stop("With Pandoc < 2.2.3, markdown text must be a single paragraph")
}

Expand Down
2 changes: 2 additions & 0 deletions docs/news/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions inst/lua/blocks-to-inlines.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ function Meta(meta)
) or {pandoc.LineBreak(), pandoc.LineBreak()}
end

function Pandoc(doc)
doc.blocks = {pandoc.Para(pandoc.utils.blocks_to_inlines(doc.blocks, sep))}
return doc
function Div(div)
div.content = {pandoc.Para(pandoc.utils.blocks_to_inlines(div.content, sep))}
return div
end

return {{Meta = Meta}, {Div = Div}}

0 comments on commit dbd35a0

Please sign in to comment.