From 3d587d2f3241263a75cf1642be51dc77153e560a Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sat, 30 Nov 2024 12:33:52 +0100 Subject: [PATCH] [load] read file with d3p1 namespace --- R/wb_load.R | 23 ++++++++++++++++++++++- tests/testthat/test-read_sources.R | 15 +++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/R/wb_load.R b/R/wb_load.R index 8998f0dba..4b2343232 100644 --- a/R/wb_load.R +++ b/R/wb_load.R @@ -106,8 +106,13 @@ wb_load <- function( wb <- wb_workbook() wb$path <- file - grep_xml <- function(pattern, perl = TRUE, value = TRUE, ...) { + # There is one known file in #1194. this file has lower case folders, while + # the references in the file are the usual camel case. + needs_lower <- ifelse(any(grepl("\\[content_types\\].xml$", xmlFiles)), TRUE, FALSE) + + grep_xml <- function(pattern, perl = TRUE, value = TRUE, to_lower = needs_lower, ...) { # targets xmlFiles; has presents + if (to_lower) pattern <- tolower(pattern) grep(pattern, xmlFiles, perl = perl, value = value, ...) } @@ -423,6 +428,22 @@ wb_load <- function( sheets <- xml_attr(workbook_xml, "workbook", "sheets", "sheet") sheets <- rbindlist(sheets) + if (any(grepl("d3p1", nams <- names(sheets)))) { + msg <- paste0( + "The `{%s}` namespace(s) has been removed from the xml files, for example:\n", + "\t<%s:ID> changed to:\n", + "\t\n", + "See 'Details' in ?openxlsx2::wb_load() for more information." + ) + warning(sprintf(msg, "d3p1", "d3p1")) + nams <- stringi::stri_replace_all_fixed( + str = nams, + pattern = c("d3p1:", ":d3p1"), + replacement = c("r:", "") + ) + names(sheets) <- nams + } + ## Some veryHidden sheets do not have a sheet content and their rId is empty. ## Such sheets need to be filtered out because otherwise their sheet names ## occur in the list of all sheet names, leading to a wrong association diff --git a/tests/testthat/test-read_sources.R b/tests/testthat/test-read_sources.R index f631d5214..e6350d29c 100644 --- a/tests/testthat/test-read_sources.R +++ b/tests/testthat/test-read_sources.R @@ -470,3 +470,18 @@ test_that("file extension handling works", { expect_silent(wb_save(wb, file = tempfile(fileext = ".XLSM"))) }) + +test_that("loading d3p1 file works", { + + fl <- testfile_path("gh_issue_1194.xlsx") + + expect_warning( + df <- wb_to_df(fl), + "d3p1" + ) + + exp <- c(1347, 31) + got <- dim(df) + expect_equal(exp, got) + +})