From 0d52ba8f63a128012a5e5b956b14bf35dccb1e39 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Sat, 30 Nov 2024 12:33:52 +0100 Subject: [PATCH] [load] read file with d3p1 namespace --- R/wb_load.R | 15 ++++++++++++--- tests/testthat/test-read_sources.R | 12 ++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/R/wb_load.R b/R/wb_load.R index 8998f0dba..c36d208f8 100644 --- a/R/wb_load.R +++ b/R/wb_load.R @@ -106,8 +106,13 @@ wb_load <- function( wb <- wb_workbook() wb$path <- file - grep_xml <- function(pattern, perl = TRUE, value = TRUE, ...) { + # There is one known file in #1194. this file has lower case folders, while + # the references in the file are the usual camel case. + needs_lower <- ifelse(any(grepl("\\[content_types\\].xml$", xmlFiles)), TRUE, FALSE) + + grep_xml <- function(pattern, perl = TRUE, value = TRUE, to_lower = needs_lower, ...) { # targets xmlFiles; has presents + if (to_lower) pattern <- tolower(pattern) grep(pattern, xmlFiles, perl = perl, value = value, ...) } @@ -423,11 +428,15 @@ wb_load <- function( sheets <- xml_attr(workbook_xml, "workbook", "sheets", "sheet") sheets <- rbindlist(sheets) + # Usually the id variable is called `r:id`, but there is one known sheet + # that has `d3p1:id` + r_id <- names(sheets)[grepl(":id", names(sheets))] + ## Some veryHidden sheets do not have a sheet content and their rId is empty. ## Such sheets need to be filtered out because otherwise their sheet names ## occur in the list of all sheet names, leading to a wrong association ## of sheet names with sheet indeces. - sheets <- sheets[sheets$`r:id` != "", ] + sheets <- sheets[sheets[r_id] != "", ] # if wb_relsxml is not available, the workbook has no relationships, not # sure if this is possible @@ -439,7 +448,7 @@ wb_load <- function( sheets <- merge( sheets, wb_rels_xml, - by.x = "r:id", by.y = "Id", + by.x = r_id, by.y = "Id", all.x = TRUE, all.y = FALSE, sort = FALSE diff --git a/tests/testthat/test-read_sources.R b/tests/testthat/test-read_sources.R index f631d5214..26f03bc68 100644 --- a/tests/testthat/test-read_sources.R +++ b/tests/testthat/test-read_sources.R @@ -470,3 +470,15 @@ test_that("file extension handling works", { expect_silent(wb_save(wb, file = tempfile(fileext = ".XLSM"))) }) + +test_that("loading d3p1 file works", { + + fl <- testfile_path("gh_issue_1194.xlsx") + + df <- wb_to_df(fl) + + exp <- c(1347, 31) + got <- dim(df) + expect_equal(exp, got) + +})