From c79b109193a83c3cbdca99b5f5622883414001b0 Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 17:56:43 +0800 Subject: [PATCH 01/14] read comments as UTF-8 encoded --- R/loadWorkbook.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/loadWorkbook.R b/R/loadWorkbook.R index d36c6665..51b905d2 100644 --- a/R/loadWorkbook.R +++ b/R/loadWorkbook.R @@ -803,7 +803,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { target <- unlist(lapply(commentXMLrelationship[[i]], function(x) regmatches(x, gregexpr('(?<=Target=").*?"', x, perl = TRUE))[[1]])) target <- basename(gsub('"$', "", target)) - txt <- paste(readLines(commentsXML[grepl(target, commentsXML)], warn = FALSE), collapse = "\n") + txt <- paste(readLines(commentsXML[grepl(target, commentsXML)], warn = FALSE, encoding = "UTF-8"), collapse = "\n") txt <- removeHeadTag(txt) authors <- getNodes(xml = txt, tagIn = "") From 5dfb27485fa4bbbde4022347b54ed96572a8c9d3 Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 18:32:07 +0800 Subject: [PATCH 02/14] add a helper function markUTF8() --- R/RcppExports.R | 4 ++++ src/RcppExports.cpp | 12 ++++++++++++ src/helper_functions.cpp | 10 ++++++++++ src/openxlsx.h | 3 +-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 3feb7939..bde88f3a 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -37,6 +37,10 @@ get_letters <- function() { .Call(`_openxlsx_get_letters`) } +markUTF8 <- function(x) { + .Call(`_openxlsx_markUTF8`, x) +} + loadworksheets <- function(wb, styleObjects, xmlFiles, is_chart_sheet) { .Call(`_openxlsx_loadworksheets`, wb, styleObjects, xmlFiles, is_chart_sheet) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index c5585f66..b5a1a37c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -115,6 +115,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// markUTF8 +CharacterVector markUTF8(CharacterVector x); +RcppExport SEXP _openxlsx_markUTF8(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(markUTF8(x)); + return rcpp_result_gen; +END_RCPP +} // loadworksheets SEXP loadworksheets(Reference wb, List styleObjects, std::vector xmlFiles, LogicalVector is_chart_sheet); RcppExport SEXP _openxlsx_loadworksheets(SEXP wbSEXP, SEXP styleObjectsSEXP, SEXP xmlFilesSEXP, SEXP is_chart_sheetSEXP) { @@ -456,6 +467,7 @@ static const R_CallMethodDef CallEntries[] = { {"_openxlsx_cppReadFile", (DL_FUNC) &_openxlsx_cppReadFile, 1}, {"_openxlsx_read_file_newline", (DL_FUNC) &_openxlsx_read_file_newline, 1}, {"_openxlsx_get_letters", (DL_FUNC) &_openxlsx_get_letters, 0}, + {"_openxlsx_markUTF8", (DL_FUNC) &_openxlsx_markUTF8, 1}, {"_openxlsx_loadworksheets", (DL_FUNC) &_openxlsx_loadworksheets, 4}, {"_openxlsx_getNodes", (DL_FUNC) &_openxlsx_getNodes, 2}, {"_openxlsx_getOpenClosedNode", (DL_FUNC) &_openxlsx_getOpenClosedNode, 3}, diff --git a/src/helper_functions.cpp b/src/helper_functions.cpp index b24de6da..2555411f 100644 --- a/src/helper_functions.cpp +++ b/src/helper_functions.cpp @@ -312,3 +312,13 @@ std::vector get_letters(){ return(LETTERS); } + + +// [[Rcpp::export]] +CharacterVector markUTF8(CharacterVector x) { + const size_t n = x.size(); + for (size_t i = 0; i < n; ++i) { + x[i] = Rf_mkCharCE(x[i], CE_UTF8); + } + return x; +} diff --git a/src/openxlsx.h b/src/openxlsx.h index 063a6de1..b28d8b34 100644 --- a/src/openxlsx.h +++ b/src/openxlsx.h @@ -81,5 +81,4 @@ LogicalVector isInternalHyperlink(CharacterVector x); // helper functions string itos(int i); SEXP write_file(std::string parent, std::string xmlText, std::string parentEnd, std::string R_fileName); - - \ No newline at end of file +CharacterVector markUTF8(CharacterVector x); From 333abb726761c51768cace6cc93767db367faa76 Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 18:45:35 +0800 Subject: [PATCH 03/14] add a clone argument --- src/helper_functions.cpp | 12 +++++++++--- src/openxlsx.h | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/helper_functions.cpp b/src/helper_functions.cpp index 2555411f..6c20f835 100644 --- a/src/helper_functions.cpp +++ b/src/helper_functions.cpp @@ -315,10 +315,16 @@ std::vector get_letters(){ // [[Rcpp::export]] -CharacterVector markUTF8(CharacterVector x) { +CharacterVector markUTF8(CharacterVector x, bool clone) { + CharacterVector out; + if (clone) { + out = Rcpp::clone(x); + } else { + out = x; + } const size_t n = x.size(); for (size_t i = 0; i < n; ++i) { - x[i] = Rf_mkCharCE(x[i], CE_UTF8); + out[i] = Rf_mkCharCE(x[i], CE_UTF8); } - return x; + return out; } diff --git a/src/openxlsx.h b/src/openxlsx.h index b28d8b34..7d3901d7 100644 --- a/src/openxlsx.h +++ b/src/openxlsx.h @@ -81,4 +81,4 @@ LogicalVector isInternalHyperlink(CharacterVector x); // helper functions string itos(int i); SEXP write_file(std::string parent, std::string xmlText, std::string parentEnd, std::string R_fileName); -CharacterVector markUTF8(CharacterVector x); +CharacterVector markUTF8(CharacterVector x, bool clone = false); From d11f35d99e36a4e30717a7aaedd49e32e5395d18 Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 18:47:29 +0800 Subject: [PATCH 04/14] looks like a bug to me, should be && --- src/write_file.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/write_file.cpp b/src/write_file.cpp index addad85e..fc3f44ef 100644 --- a/src/write_file.cpp +++ b/src/write_file.cpp @@ -216,7 +216,7 @@ SEXP buildMatrixMixed(CharacterVector v, // If column is date class and no strings exist in column - if( (std::find(dateCols.begin(), dateCols.end(), i) != dateCols.end()) & + if( (std::find(dateCols.begin(), dateCols.end(), i) != dateCols.end()) && (std::find(charCols.begin(), charCols.end(), i) == charCols.end()) ){ // these are all dates and no characters --> safe to convert numerics From e9ccad7a4fcf641e249f6512939fdd8f23e88a0e Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 18:49:48 +0800 Subject: [PATCH 05/14] mark UTF8 when cpp functions return CharacterVector and nonASCII characters are possible involved --- src/load_workbook.cpp | 17 +++++++++-------- src/write_file.cpp | 3 ++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/load_workbook.cpp b/src/load_workbook.cpp index 2b82edcd..090e6c00 100644 --- a/src/load_workbook.cpp +++ b/src/load_workbook.cpp @@ -778,8 +778,8 @@ SEXP getNodes(std::string xml, std::string tagIn){ } - return wrap(r) ; - + CharacterVector out = wrap(r); + return markUTF8(out); } @@ -814,9 +814,8 @@ SEXP getOpenClosedNode(std::string xml, std::string open_tag, std::string close_ } - return wrap(r) ; - - + CharacterVector out = wrap(r); + return markUTF8(out); } @@ -852,7 +851,7 @@ SEXP getAttr(CharacterVector x, std::string tag){ } } - return wrap(r) ; + return markUTF8(r); // no need to wrap as r is already a CharacterVector } @@ -912,7 +911,8 @@ CharacterVector getChildlessNode(std::string xml, std::string tag){ } - return wrap(r) ; + CharacterVector out = wrap(r); + return markUTF8(out); } @@ -960,7 +960,8 @@ CharacterVector get_extLst_Major(std::string xml){ } - return wrap(r) ; + CharacterVector out = wrap(r); + return markUTF8(out); } diff --git a/src/write_file.cpp b/src/write_file.cpp index fc3f44ef..9e3fc7b9 100644 --- a/src/write_file.cpp +++ b/src/write_file.cpp @@ -342,7 +342,8 @@ CharacterVector build_table_xml(std::string table, std::string tableStyleXML, st table = table + tableCols + tableStyleXML + ""; - return wrap(table); + CharacterVector out = wrap(table); + return markUTF8(out); } From 7897200c4c2e8bc341bbf9d7325820be381e1ccf Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 18:55:50 +0800 Subject: [PATCH 06/14] define a helper function readUTF8() --- R/utils.R | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 R/utils.R diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 00000000..b303583f --- /dev/null +++ b/R/utils.R @@ -0,0 +1,4 @@ + +readUTF8 <- function(x) { + readLines(x, warn = FALSE, encoding = "UTF-8") +} From 6355f966c3069a62d440c70e43797f9fd630ddbd Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 18:56:12 +0800 Subject: [PATCH 07/14] replace all the readLines() with readUTF8() --- R/WorkbookClass.R | 5 ++--- R/loadWorkbook.R | 26 ++++++++++++------------ R/readWorkbook.R | 6 +++--- R/wrappers.R | 6 +++--- tests/testthat/test-writing_sheet_data.R | 4 ++-- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/R/WorkbookClass.R b/R/WorkbookClass.R index ef2711dc..6d67de55 100644 --- a/R/WorkbookClass.R +++ b/R/WorkbookClass.R @@ -313,7 +313,7 @@ Workbook$methods( # The result is saved to a new chart xml file newfl <- file.path(dirname(fl), newname) charts[newname] <<- newfl - chart <- readLines(fl, warn = FALSE, encoding = "UTF-8") + chart <- readUTF8(fl) chart <- gsub( stri_join("(?<=')", sheet_names[[clonedSheet]], "(?='!)"), @@ -17825,8 +17825,7 @@ Workbook$methods( Workbook$methods( loadStyles = function(stylesXML) { ## Build style objects from the styles XML - stylesTxt <- - readLines(stylesXML, warn = FALSE, encoding = "UTF-8") + stylesTxt <- readUTF8(stylesXML) stylesTxt <- removeHeadTag(stylesTxt) ## Indexed colours diff --git a/R/loadWorkbook.R b/R/loadWorkbook.R index 51b905d2..fe6a4c40 100644 --- a/R/loadWorkbook.R +++ b/R/loadWorkbook.R @@ -105,7 +105,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { ## core if (length(coreXML) == 1) { - coreXML <- paste(readLines(con = coreXML, encoding = "UTF-8", warn = FALSE), collapse = "") + coreXML <- paste(readUTF8(coreXML), collapse = "") wb$core <- removeHeadTag(x = coreXML) } @@ -115,7 +115,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { worksheet_rId_mapping <- NULL workbookRelsXML <- xmlFiles[grepl("workbook.xml.rels$", xmlFiles, perl = TRUE)] if (length(workbookRelsXML) > 0) { - workbookRelsXML <- paste(readLines(con = workbookRelsXML, encoding = "UTF-8", warn = FALSE), collapse = "") + workbookRelsXML <- paste(readUTF8(workbookRelsXML), collapse = "") workbookRelsXML <- getChildlessNode(xml = workbookRelsXML, tag = " 0) { - workbook <- readLines(workbookXML, warn = FALSE, encoding = "UTF-8") + workbook <- readUTF8(workbookXML) workbook <- removeHeadTag(workbook) sheets <- unlist(regmatches(workbook, gregexpr("(?<=).*(?=)", workbook, perl = TRUE))) @@ -176,7 +176,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { for (i in 1:length(sheetrId)) { if (is_chart_sheet[i]) { count <- 0 - txt <- paste(readLines(chartSheetsXML[j], warn = FALSE, encoding = "UTF-8"), collapse = "") + txt <- paste(readUTF8(chartSheetsXML[j]), collapse = "") zoom <- regmatches(txt, regexpr('(?<=zoomScale=")[0-9]+', txt, perl = TRUE)) if (length(zoom) == 0) { @@ -235,7 +235,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { ## xl\sharedStrings if (length(sharedStringsXML) > 0) { - sharedStrings <- readLines(sharedStringsXML, warn = FALSE, encoding = "UTF-8") + sharedStrings <- readUTF8(sharedStringsXML) sharedStrings <- paste(sharedStrings, collapse = "\n") sharedStrings <- removeHeadTag(sharedStrings) @@ -419,7 +419,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { ## xl\theme if (length(themeXML) > 0) { - wb$theme <- removeHeadTag(paste(unlist(lapply(sort(themeXML)[[1]], function(x) readLines(x, warn = FALSE, encoding = "UTF-8"))), collapse = "")) + wb$theme <- removeHeadTag(paste(unlist(lapply(sort(themeXML)[[1]], readUTF8)), collapse = "")) } @@ -521,7 +521,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { xml <- lapply(1:length(allRels), function(i) { if (haveRels[i]) { - xml <- readLines(allRels[[i]], warn = FALSE, encoding = "UTF-8") + xml <- readUTF8(allRels[[i]]) xml <- removeHeadTag(xml) xml <- gsub("", "", xml) xml <- gsub("", "", xml) @@ -622,7 +622,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { tablesXML <- tablesXML[sprintf("table%s.xml", unlist(tables))] ## tables are now in correct order so we can read them in as they are - wb$tables <- sapply(tablesXML, function(x) removeHeadTag(paste(readLines(x, warn = FALSE), collapse = ""))) + wb$tables <- sapply(tablesXML, function(x) removeHeadTag(paste(readUTF8(x), collapse = ""))) ## pull out refs and attach names refs <- regmatches(wb$tables, regexpr('(?<=ref=")[0-9A-Z:]+', wb$tables, perl = TRUE)) @@ -690,14 +690,14 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { hasDrawing <- sapply(drawXMLrelationship, length) > 0 ## which sheets have a drawing if (length(drawingRelsXML) > 0) { - dRels <- lapply(drawingRelsXML, readLines, warn = FALSE) + dRels <- lapply(drawingRelsXML, readUTF8) dRels <- unlist(lapply(dRels, removeHeadTag)) dRels <- gsub("", "", dRels) dRels <- gsub("", "", dRels) } if (length(drawingsXML) > 0) { - dXML <- lapply(drawingsXML, readLines, warn = FALSE, encoding = "UTF-8") + dXML <- lapply(drawingsXML, readUTF8) dXML <- unlist(lapply(dXML, removeHeadTag)) dXML <- gsub("", "", dXML) dXML <- gsub("", "", dXML) @@ -753,7 +753,7 @@ loadWorkbook <- function(file, xlsxFile = NULL, isUnzipped = FALSE) { ind <- grepl(target, vmlDrawingXML) if (any(ind)) { - txt <- paste(readLines(vmlDrawingXML[ind], warn = FALSE), collapse = "\n") + txt <- paste(readUTF8(vmlDrawingXML[ind]), collapse = "\n") txt <- removeHeadTag(txt) i1 <- regexpr(").*(?=)", workbook, perl = TRUE))) sheets <- unlist(regmatches(sheets, gregexpr("]*>", sheets, perl = TRUE))) diff --git a/tests/testthat/test-writing_sheet_data.R b/tests/testthat/test-writing_sheet_data.R index 35fb58ef..2b445d04 100644 --- a/tests/testthat/test-writing_sheet_data.R +++ b/tests/testthat/test-writing_sheet_data.R @@ -11,7 +11,7 @@ test_that("Writing sheetData rows XML - iris", { openxlsx::write.xlsx(iris, temp_file) unzip(temp_file, exdir = tempdir()) - x <- readLines(file.path(tempdir(), "xl", "worksheets", "sheet1.xml"), warn = FALSE, encoding = "UTF-8") + x <- readUTF8(file.path(tempdir(), "xl", "worksheets", "sheet1.xml")) rows <- unlist(regmatches(x = x, gregexpr("", x))) expected_rows <- c( @@ -187,7 +187,7 @@ test_that("Writing sheetData rows XML - mtcars", { openxlsx::write.xlsx(mtcars, temp_file, row.names = TRUE) unzip(temp_file, exdir = tempdir()) - x <- readLines(file.path(tempdir(), "xl", "worksheets", "sheet1.xml"), warn = FALSE, encoding = "UTF-8") + x <- readUTF8(file.path(tempdir(), "xl", "worksheets", "sheet1.xml")) rows <- unlist(regmatches(x = x, gregexpr("", x))) expected_rows <- c( From d626832f8c8d8d6ee97c48f98d19122d33596f8a Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 19:05:14 +0800 Subject: [PATCH 08/14] roxygen modifies --- DESCRIPTION | 1 + R/RcppExports.R | 4 ++-- src/RcppExports.cpp | 9 +++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 364afddd..64772399 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,6 +69,7 @@ Collate: 'openxlsxCoerce.R' 'readWorkbook.R' 'sheet_data_class.R' + 'utils.R' 'workbook_column_widths.R' 'workbook_read_workbook.R' 'workbook_write_data.R' diff --git a/R/RcppExports.R b/R/RcppExports.R index bde88f3a..dbe8c6c5 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -37,8 +37,8 @@ get_letters <- function() { .Call(`_openxlsx_get_letters`) } -markUTF8 <- function(x) { - .Call(`_openxlsx_markUTF8`, x) +markUTF8 <- function(x, clone) { + .Call(`_openxlsx_markUTF8`, x, clone) } loadworksheets <- function(wb, styleObjects, xmlFiles, is_chart_sheet) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index b5a1a37c..06382175 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -116,13 +116,14 @@ BEGIN_RCPP END_RCPP } // markUTF8 -CharacterVector markUTF8(CharacterVector x); -RcppExport SEXP _openxlsx_markUTF8(SEXP xSEXP) { +CharacterVector markUTF8(CharacterVector x, bool clone); +RcppExport SEXP _openxlsx_markUTF8(SEXP xSEXP, SEXP cloneSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP); - rcpp_result_gen = Rcpp::wrap(markUTF8(x)); + Rcpp::traits::input_parameter< bool >::type clone(cloneSEXP); + rcpp_result_gen = Rcpp::wrap(markUTF8(x, clone)); return rcpp_result_gen; END_RCPP } @@ -467,7 +468,7 @@ static const R_CallMethodDef CallEntries[] = { {"_openxlsx_cppReadFile", (DL_FUNC) &_openxlsx_cppReadFile, 1}, {"_openxlsx_read_file_newline", (DL_FUNC) &_openxlsx_read_file_newline, 1}, {"_openxlsx_get_letters", (DL_FUNC) &_openxlsx_get_letters, 0}, - {"_openxlsx_markUTF8", (DL_FUNC) &_openxlsx_markUTF8, 1}, + {"_openxlsx_markUTF8", (DL_FUNC) &_openxlsx_markUTF8, 2}, {"_openxlsx_loadworksheets", (DL_FUNC) &_openxlsx_loadworksheets, 4}, {"_openxlsx_getNodes", (DL_FUNC) &_openxlsx_getNodes, 2}, {"_openxlsx_getOpenClosedNode", (DL_FUNC) &_openxlsx_getOpenClosedNode, 3}, From 18348b7a81cd28aa29c5c5488734006056bf3205 Mon Sep 17 00:00:00 2001 From: shrektan Date: Wed, 18 Nov 2020 19:52:07 +0800 Subject: [PATCH 09/14] allows non-ASCII named regione being created --- R/wrappers.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/wrappers.R b/R/wrappers.R index a8a0eb0b..485722d1 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -2716,8 +2716,6 @@ createNamedRegion <- function(wb, sheet, cols, rows, name) { if (tolower(name) %in% ex_names) { stop(sprintf("Named region with name '%s' already exists!", name)) - } else if (grepl("[^A-Z0-9_\\.]", name[1], ignore.case = TRUE)) { - stop("Invalid characters in name") } else if (grepl("^[A-Z]{1,3}[0-9]+$", name)) { stop("name cannot look like a cell reference.") } From 61cc79317332ecaf73cb2be7259d62634dbce8a2 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 21 Nov 2020 20:28:00 +0800 Subject: [PATCH 10/14] move readUTF8() to helperFunctions.R --- DESCRIPTION | 1 - R/helperFunctions.R | 4 ++++ R/utils.R | 4 ---- 3 files changed, 4 insertions(+), 5 deletions(-) delete mode 100644 R/utils.R diff --git a/DESCRIPTION b/DESCRIPTION index 64772399..364afddd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,7 +69,6 @@ Collate: 'openxlsxCoerce.R' 'readWorkbook.R' 'sheet_data_class.R' - 'utils.R' 'workbook_column_widths.R' 'workbook_read_workbook.R' 'workbook_write_data.R' diff --git a/R/helperFunctions.R b/R/helperFunctions.R index 0145950c..86304b95 100644 --- a/R/helperFunctions.R +++ b/R/helperFunctions.R @@ -917,3 +917,7 @@ hashPassword <- function(password) { hash <- bitwXor(bitwXor(hash, length(chars)), 0xCE4B) format(as.hexmode(hash), upper.case = TRUE) } + +readUTF8 <- function(x) { + readLines(x, warn = FALSE, encoding = "UTF-8") +} diff --git a/R/utils.R b/R/utils.R deleted file mode 100644 index b303583f..00000000 --- a/R/utils.R +++ /dev/null @@ -1,4 +0,0 @@ - -readUTF8 <- function(x) { - readLines(x, warn = FALSE, encoding = "UTF-8") -} From c212d837d4b3b9c42e9e26caa62295471d2502f5 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 21 Nov 2020 21:03:11 +0800 Subject: [PATCH 11/14] allow non-ASCII name in writeData() --- R/writeData.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/writeData.R b/R/writeData.R index 3304434e..106906cd 100644 --- a/R/writeData.R +++ b/R/writeData.R @@ -240,8 +240,6 @@ writeData <- function(wb, if (name %in% ex_names) { stop(sprintf("Named region with name '%s' already exists!", name)) - } else if (grepl("[^A-Z0-9_\\.]", name[1], ignore.case = TRUE)) { - stop("Invalid characters in name") } else if (grepl("^[A-Z]{1,3}[0-9]+$", name)) { stop("name cannot look like a cell reference.") } From 8402aee31aa5fcb40655f04bb588aea4c5b0ffb7 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 21 Nov 2020 21:19:06 +0800 Subject: [PATCH 12/14] writeDataTable() support non-ASCII table names --- R/WorkbookClass.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/WorkbookClass.R b/R/WorkbookClass.R index 6d67de55..3dbb815d 100644 --- a/R/WorkbookClass.R +++ b/R/WorkbookClass.R @@ -1150,6 +1150,9 @@ Workbook$methods( ref, as.integer(totalsRowCount) ) + # because tableName might be native encoded non-ASCII strings, we need to ensure + # it's UTF-8 encoded + table <- enc2utf8(table) nms <- names(tables) tSheets <- attr(tables, "sheet") From 7f5f77858a9bf3cc580e5c1a834ae7145043c89d Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 21 Nov 2020 21:49:19 +0800 Subject: [PATCH 13/14] add non-ASCII tests --- tests/testthat/test-encoding.R | 52 ++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/testthat/test-encoding.R b/tests/testthat/test-encoding.R index 696677cb..28c183a1 100644 --- a/tests/testthat/test-encoding.R +++ b/tests/testthat/test-encoding.R @@ -37,3 +37,55 @@ test_that("Write read encoding equality", { unlink(tempFile, recursive = TRUE, force = TRUE) rm(wb) }) + + +test_that("Support non-ASCII strings not in UTF-8 encodings", { + non_ascii <- c("\u4f60\u597d", "\u4e2d\u6587", "\u6c49\u5b57", + "\u5de5\u4f5c\u7c3f1", "\u6d4b\u8bd5\u540d\u5b571", + "\u6d4b2", "\u5de52", "\u5de5\u4f5c3") + # Ideally, we should test agains native encodings. However, the testing machine's + # locale encoding may not be able to represent the non-ascii letters, when + # it's the case, we use the UTF-8 encoding as it is. + if (identical( enc2utf8(enc2native(non_ascii)), non_ascii )) { + non_ascii <- enc2native(non_ascii) + } + non_ascii_df <- data.frame( + X = non_ascii, Y = seq_along(non_ascii), stringsAsFactors = FALSE + ) + colnames(non_ascii_df) <- non_ascii[3:4] + file <- tempfile(fileext = ".xlsx") + wb <- createWorkbook(creator = non_ascii[1]) + ws <- addWorksheet(wb, non_ascii[2]) + writeDataTable(wb, ws, non_ascii_df, tableName = non_ascii[3]) + writeData(wb, ws, non_ascii_df, xy = list("D", 1), name = non_ascii[4]) + writeComment(wb, ws, 1, 1, comment = createComment(non_ascii[5], non_ascii[6])) + writeFormula(wb, ws, x = sprintf('"%s"&"%s"', non_ascii[1], non_ascii[2]), xy = list("G", 1)) + createNamedRegion(wb, ws, 7, 1, name = non_ascii[7]) + saveWorkbook(wb, file) + + wb2 <- loadWorkbook(file) + expect_equal( + getCreators(wb2), non_ascii[1] + ) + expect_equal( + getSheetNames(file), non_ascii[2] + ) + expect_equivalent( + getTables(wb2, ws), non_ascii[3] + ) + expect_equivalent( + getNamedRegions(wb2), non_ascii[c(4, 7)] + ) + expect_equal( + wb2$comments[[1]][[1]][c("comment", "author")], + setNames(as.list(non_ascii[5:6]), c("comment", "author")) + ) + expect_equal( + read.xlsx(file, ws, cols = 1:2), + non_ascii_df + ) + expect_equal( + read.xlsx(file, ws, cols = 4:5), + non_ascii_df + ) +}) From 2b6b5ba6d6ba814c07826448a5f54911cdb5c5dd Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 21 Nov 2020 21:58:33 +0800 Subject: [PATCH 14/14] add NEWS entry --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 4c607ca0..b72baa02 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # openxlsx 4.2.3 +## New Features + +* Most of functions in openxlsx now support non-ASCII arguments better. More specifically, we can use non-ASCII strings as names or contents for `createNamedRegion()` ([#103](https://github.com/ycphs/openxlsx/issues/103)), `writeComment()`, `writeData()`, `writeDataTable()` and `writeFormula()`. In addition, openxlsx now reads comments and region names that contain non-ASCII strings correctly on Windows. Thanks to @shrektan for the PR [#118](https://github.com/ycphs/openxlsx/pull/118). + ## Fixes for Check issues * Fix to pass the tests for link-time optimization type mismatches