Skip to content

Commit

Permalink
[load] read file with d3p1 namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
JanMarvin committed Dec 11, 2024
1 parent e949006 commit 0d52ba8
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
15 changes: 12 additions & 3 deletions R/wb_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,13 @@ wb_load <- function(
wb <- wb_workbook()
wb$path <- file

grep_xml <- function(pattern, perl = TRUE, value = TRUE, ...) {
# There is one known file in #1194. this file has lower case folders, while
# the references in the file are the usual camel case.
needs_lower <- ifelse(any(grepl("\\[content_types\\].xml$", xmlFiles)), TRUE, FALSE)

grep_xml <- function(pattern, perl = TRUE, value = TRUE, to_lower = needs_lower, ...) {
# targets xmlFiles; has presents
if (to_lower) pattern <- tolower(pattern)
grep(pattern, xmlFiles, perl = perl, value = value, ...)
}

Expand Down Expand Up @@ -423,11 +428,15 @@ wb_load <- function(
sheets <- xml_attr(workbook_xml, "workbook", "sheets", "sheet")
sheets <- rbindlist(sheets)

# Usually the id variable is called `r:id`, but there is one known sheet
# that has `d3p1:id`
r_id <- names(sheets)[grepl(":id", names(sheets))]

## Some veryHidden sheets do not have a sheet content and their rId is empty.
## Such sheets need to be filtered out because otherwise their sheet names
## occur in the list of all sheet names, leading to a wrong association
## of sheet names with sheet indeces.
sheets <- sheets[sheets$`r:id` != "", ]
sheets <- sheets[sheets[r_id] != "", ]

# if wb_relsxml is not available, the workbook has no relationships, not
# sure if this is possible
Expand All @@ -439,7 +448,7 @@ wb_load <- function(

sheets <- merge(
sheets, wb_rels_xml,
by.x = "r:id", by.y = "Id",
by.x = r_id, by.y = "Id",
all.x = TRUE,
all.y = FALSE,
sort = FALSE
Expand Down
12 changes: 12 additions & 0 deletions tests/testthat/test-read_sources.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,3 +470,15 @@ test_that("file extension handling works", {
expect_silent(wb_save(wb, file = tempfile(fileext = ".XLSM")))

})

test_that("loading d3p1 file works", {

fl <- testfile_path("gh_issue_1194.xlsx")

df <- wb_to_df(fl)

exp <- c(1347, 31)
got <- dim(df)
expect_equal(exp, got)

})

0 comments on commit 0d52ba8

Please sign in to comment.