diff --git a/NEWS.md b/NEWS.md index ceeedf5..998c933 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,7 +11,7 @@ ## Internal changes * Drop **jpeg** package from suggests, which is formerly used in a vignette (#42). - +* `colformat_md` converts cells to Pandoc's AST by a single call of Pandoc. Previously it called pandoc for each cell. This change improves performance around 15X faster (#46). # ftExtra 0.1.1 * Fix math not rendered on Windows with Pandoc < 2.10 (#33) diff --git a/R/as-paragraph-md.R b/R/as-paragraph-md.R index bc42c59..771dd96 100644 --- a/R/as-paragraph-md.R +++ b/R/as-paragraph-md.R @@ -34,6 +34,29 @@ lua <- function(...) { c("--lua-filter", system.file("lua", ..., package = "ftExtra")) } +lua_filters <- function(.sep) { + if (!rmarkdown::pandoc_available("2")) return(NULL) + + c( + lua("smart.lua"), + lua("inline-code.lua"), + if (rmarkdown::pandoc_available("2.7.3")) { + c( + lua("math.lua"), + paste0("--metadata=pandoc-path:", rmarkdown::pandoc_exec()), + if (!rmarkdown::pandoc_available("2.10")) { + paste0("--metadata=temporary-directory:", tempdir()) + } + ) + }, + if (rmarkdown::pandoc_available("2.2.3")) { + c(lua("blocks-to-inlines.lua"), paste0("--metadata=sep_blocks:", .sep)) + } + ) +} + + + parse_md <- function(x, auto_color_link = "blue", pandoc_args = NULL, @@ -44,43 +67,37 @@ parse_md <- function(x, stop("`auto_color_link` must be a string") } - filters <- if (rmarkdown::pandoc_available("2")) { - c( - lua("smart.lua"), - lua("inline-code.lua"), - if (rmarkdown::pandoc_available("2.7.3")) { - c( - lua("math.lua"), - paste0("--metadata=pandoc-path:", rmarkdown::pandoc_exec()), - if (!rmarkdown::pandoc_available("2.10")) { - paste0("--metadata=temporary-directory:", tempdir()) - } - ) - }, - if (rmarkdown::pandoc_available("2.2.3")) { - c(lua("blocks-to-inlines.lua"), paste0("--metadata=sep_blocks:", .sep)) - } - ) - } + md_df <- md2df( + x, + pandoc_args = c(lua_filters(.sep = .sep), pandoc_args), + .from = .from, + .check = TRUE + ) - md_df <- md2df(x, pandoc_args = c(filters, pandoc_args), .from = .from) - - if (is.null(.footnote_options) || (all(names(md_df) != "Note"))) { - y <- md_df - } else { - .footnote_options$n <- .footnote_options$n + 1L - ref <- data.frame(txt = .footnote_options$ref[[.footnote_options$n]], - Superscript = TRUE, - stringsAsFactors = FALSE) - .footnote_options$value <- c( - .footnote_options$value, - list(construct_chunk(as.list(dplyr::bind_rows(ref, md_df[md_df$Note, ])), - auto_color_link)) - ) - y <- dplyr::bind_rows(md_df[!md_df$Note, ], ref) + id <- pandoc_attrs(md_df$Div, "id") + cells <- unname(split(dplyr::select(md_df, !"Div"), factor(id, levels = unique(id)))) + + lapply(cells, function(cell) { + y <- solve_footnote(cell, .footnote_options, auto_color_link) + construct_chunk(as.list(y), auto_color_link) + }) +} + +solve_footnote <- function(md_df, .footnote_options, auto_color_link) { + if (is.null(.footnote_options) || !any(md_df[["Note"]])) { + return(md_df) } - construct_chunk(as.list(y), auto_color_link) + .footnote_options$n <- .footnote_options$n + 1L + ref <- data.frame(txt = .footnote_options$ref[[.footnote_options$n]], + Superscript = TRUE, + stringsAsFactors = FALSE) + .footnote_options$value <- c( + .footnote_options$value, + list(construct_chunk(as.list(dplyr::bind_rows(ref, md_df[md_df$Note, ])), + auto_color_link)) + ) + dplyr::bind_rows(md_df[!md_df$Note, ], ref) } construct_chunk <- function(x, auto_color_link = "blue") { @@ -147,7 +164,16 @@ as_paragraph_md <- function(x, pandoc_args = NULL, .from = "markdown+autolink_bare_uris", ...) { - structure(lapply(x, parse_md, + x <- paste( + purrr::map2_chr( + x, + paste0('cell', seq_along(x)), + function(x, id) sprintf('
%s
', id, x) + ), + collapse = '' + ) + + structure(parse_md(x, auto_color_link = auto_color_link, pandoc_args = pandoc_args, .from = paste0(.from, paste(md_extensions, collapse="")), diff --git a/R/md2df.R b/R/md2df.R index 17940c1..797369f 100644 --- a/R/md2df.R +++ b/R/md2df.R @@ -22,7 +22,6 @@ has_attr <- function(x) { } add_type <- function(x, t) { - parents <- if (is.list(t)) t else stats::setNames(list(TRUE), t) child <- stats::setNames( list(structure( if (isTRUE(x$t %in% c("Image", "Link"))) x$c[[3]][[1]] else TRUE, @@ -35,7 +34,7 @@ add_type <- function(x, t) { x$t ) - x$t <- c(child, parents) + x$t <- c(child, if (is.list(t)) t else stats::setNames(list(TRUE), t)) if (has_attr(x$c)) x$c <- x$c[[2]] @@ -53,6 +52,9 @@ resolve_type <- function(x) { if (identical(names(x$c), c("t", "c"))) { return(add_type(x$c, x$t)) } + if (identical(x$t, "Div")) { + return(resolve_type(add_type(x, list()))) + } return(lapply(x$c, add_type, x$t)) } @@ -127,7 +129,8 @@ ast2df <- function(x) { x$blocks %>% flatten_ast() %>% lapply(branch2list) %>% - lapply(purrr::map_at, "Image", list) %>% + # Div is not for users, but for processing multiple cells at once + lapply(purrr::map_at, c("Div", "Image"), list) %>% lapply(format_by_attr) %>% lapply(drop_Para) %>% dplyr::bind_rows() %>% @@ -137,16 +140,16 @@ ast2df <- function(x) { #' Convert Pandoc's Markdown to data frame #' @noRd -md2df <- function(x, .from = "markdown", pandoc_args = NULL) { +md2df <- function(x, .from = "markdown", pandoc_args = NULL, .check = FALSE) { ast <- md2ast(x, .from = .from, pandoc_args = pandoc_args) ast$blocks <- ast$blocks[ !vapply(ast$blocks, - function(x) identical(c(x$t, x$c[[1]][[1]]), c("Div", "refs")), + function(x) identical(c(x$t, x$c[[1L]][[1L]]), c("Div", "refs")), NA) ] - if ((ast$blocks[[1]]$t != "Para") || (length(ast$blocks) > 1)) { + if (.check && any(vapply(ast$blocks, function(x) length(x$c[[2L]]), 0L) > 1L)) { stop("With Pandoc < 2.2.3, markdown text must be a single paragraph") } diff --git a/docs/news/index.html b/docs/news/index.html index 45bbda5..d282709 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -156,6 +156,8 @@

Internal changes

diff --git a/inst/lua/blocks-to-inlines.lua b/inst/lua/blocks-to-inlines.lua index 0f22eaa..497ab3d 100644 --- a/inst/lua/blocks-to-inlines.lua +++ b/inst/lua/blocks-to-inlines.lua @@ -4,7 +4,9 @@ function Meta(meta) ) or {pandoc.LineBreak(), pandoc.LineBreak()} end -function Pandoc(doc) - doc.blocks = {pandoc.Para(pandoc.utils.blocks_to_inlines(doc.blocks, sep))} - return doc +function Div(div) + div.content = {pandoc.Para(pandoc.utils.blocks_to_inlines(div.content, sep))} + return div end + +return {{Meta = Meta}, {Div = Div}}