refactor df simplify when reading. Fixes #13

coolbutuseless · Sep 13, 2023 · b7973fc · b7973fc
1 parent 9b99579
commit b7973fc
Show file tree

Hide file tree

Showing 8 changed files with 76 additions and 14 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: yyjsonr
 Type: Package
 Title: Fast JSON, GeoJSON and NDJSON Parsing and Serialisation
-Version: 0.1.9
+Version: 0.1.10
 Authors@R: c(
     person("Mike", "FC", role = c("aut", "cre"), email = "[email protected]"),
     person("Yao", "Yuan", role = "cph", email = "[email protected]", 

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,10 @@
 
+# yyjsonr 0.1.10 2023-09-14
+
+* Refactored options for simplification to data.frame
+    * removed `vectors_to_df`
+    * replaced with `obj_of_arrs_to_df`
+    * added `arr_of_obs_to_df`
 
 # yyjsonr 0.1.9 2023-09-13
 

diff --git a/R/json-opts.R b/R/json-opts.R
@@ -169,9 +169,12 @@ write_flag <- list(
 #'        be stored in this type.
 #' @param missing_list_elem how to handle missing elements in list columns in 
 #'        data.frames. Options, 'na', or 'null.  Default: 'null'
-#' @param vectors_to_df logical. Should a named list of equal-length
+#' @param obj_of_arrs_to_df logical. Should a named list of equal-length
 #'        vectors be promoted to a data.frame?  Default: TRUE.  If FALSE, then
 #'        result will be left as a list.
+#' @param arr_of_objs_to_df logical. Should an array or objects be promoted to a 
+#'        a data.frame? Default: TRUE. If FALSE, then results will be read as a
+#'        list-of-lists.
 #' @param yyjson_read_flag integer vector of internal \code{yyjson}
 #'        options.  See \code{read_flag} in this package, and read
 #'        the yyjson API documentation for more information.  This is considered
@@ -196,7 +199,8 @@ write_flag <- list(
 opts_read_json <- function(
     int64                 = c('string', 'bit64'),
     missing_list_elem     = c('null', 'na'),
-    vectors_to_df         = TRUE,
+    obj_of_arrs_to_df     = TRUE,
+    arr_of_objs_to_df     = TRUE,
     str_specials          = c('string', 'special'),
     num_specials          = c('special', 'string'),
     promote_num_to_string = FALSE,
@@ -207,7 +211,8 @@ opts_read_json <- function(
     list(
       int64             = match.arg(int64),
       missing_list_elem = match.arg(missing_list_elem),
-      vectors_to_df     = isTRUE(vectors_to_df),
+      obj_of_arrs_to_df = isTRUE(obj_of_arrs_to_df),
+      arr_of_objs_to_df = isTRUE(arr_of_objs_to_df),
       str_specials      = match.arg(str_specials),
       num_specials      = match.arg(num_specials),
       yyjson_read_flag  = as.integer(yyjson_read_flag)

diff --git a/man/opts_read_json.Rd b/man/opts_read_json.Rd
diff --git a/src/R-yyjson-parse.c b/src/R-yyjson-parse.c
@@ -22,7 +22,8 @@ parse_options create_parse_options(SEXP parse_opts_) {
   parse_options opt = {
     .int64                 = INT64_AS_STR,
     .missing_list_elem     = MISSING_AS_NULL,
-    .vectors_to_df         = true,
+    .obj_of_arrs_to_df     = true,
+    .arr_of_objs_to_df     = true,
     .str_specials          = STR_SPECIALS_AS_STRING,
     .num_specials          = NUM_SPECIALS_AS_SPECIAL,
     .promote_num_to_string = false,
@@ -56,8 +57,10 @@ parse_options create_parse_options(SEXP parse_opts_) {
       for (unsigned int idx = 0; idx < length(val_); idx++) {
         opt.yyjson_read_flag |= INTEGER(val_)[idx];
       }
-    } else if (strcmp(opt_name, "vectors_to_df") == 0) {
-      opt.vectors_to_df = asLogical(val_);
+    } else if (strcmp(opt_name, "obj_of_arrs_to_df") == 0) {
+      opt.obj_of_arrs_to_df = asLogical(val_);
+    } else if (strcmp(opt_name, "arr_of_objs_to_df") == 0) {
+      opt.arr_of_objs_to_df = asLogical(val_);
     } else if (strcmp(opt_name, "str_specials") == 0) {
       const char *val = CHAR(STRING_ELT(val_, 0));
       opt.str_specials = strcmp(val, "string") == 0 ? STR_SPECIALS_AS_STRING : STR_SPECIALS_AS_SPECIAL;
@@ -1241,7 +1244,7 @@ SEXP json_array_as_robj(yyjson_val *arr, parse_options *opt) {
       }
 
     }    
-  } else if (ctn_bitset == CTN_OBJ) {
+  } else if (ctn_bitset == CTN_OBJ && opt->arr_of_objs_to_df) {
     //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // []-array ONLY contains {}-objects!
     // Parse as a data.frame
@@ -1600,7 +1603,7 @@ SEXP json_object_as_list(yyjson_val *obj, parse_options *opt) {
   // * All elements are atomic arrays or vecsxp
   // * All these elements are the same length
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  if (opt->vectors_to_df) {
+  if (opt->obj_of_arrs_to_df) {
     R_xlen_t nrow = 0;
     bool possible_data_frame = true;
     for (unsigned int col = 0; col < idx; col++) {

diff --git a/src/R-yyjson-parse.h b/src/R-yyjson-parse.h
@@ -96,7 +96,8 @@
 typedef struct {
   unsigned int int64;
   unsigned int missing_list_elem;
-  bool vectors_to_df;
+  bool obj_of_arrs_to_df;
+  bool arr_of_objs_to_df;
   unsigned int str_specials;
   unsigned int num_specials;
   bool promote_num_to_string;

diff --git a/tests/testthat/test-simplify-df.R b/tests/testthat/test-simplify-df.R
@@ -0,0 +1,42 @@
+
+
+
+test_that("multiplication works", {
+
+  ref <- head(iris, 5)
+  ref$Species <- as.character(ref$Species)
+
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #' Parse array of objects to a data.frame
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  arr_of_objs <- write_json_str(ref, auto_unbox = TRUE, pretty = TRUE)
+  x <- read_json_str(arr_of_objs, arr_of_objs_to_df = TRUE)
+  expect_identical(x, ref)
+
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #' Leave array-of-objects as a list
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  x <- read_json_str(arr_of_objs, arr_of_objs_to_df = FALSE)
+  expect_true(is.list(x))
+  expect_false(is.data.frame(x))
+  expect_length(x, 5)
+  expect_null(names(x))
+  expect_identical(names(x[[1]]), colnames(ref))
+
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  #' Object of arrays
+  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  obj_of_arrs <- write_json_str(as.list(ref), auto_unbox = TRUE, pretty = TRUE)
+
+  # Read as data.frame
+  x <- read_json_str(obj_of_arrs, obj_of_arrs_to_df = TRUE)
+  expect_identical(x, ref)
+
+  # read as list
+  x <- read_json_str(obj_of_arrs, obj_of_arrs_to_df = FALSE)
+  expect_false(is.data.frame(x))
+  expect_true(is.list(x))
+  expect_identical(names(x), colnames(ref))
+
+
+})
diff --git a/vignettes/from_json_options.Rmd b/vignettes/from_json_options.Rmd
@@ -82,9 +82,9 @@ Vectors to data.frame
 ```{r}
 str <- '{"a":[1,2],"b":["apple","banana"]}'
 
-read_json_str(str, vectors_to_df = FALSE)
+read_json_str(str, obj_of_arrs_to_df = FALSE)
 
-read_json_str(str, vectors_to_df =  TRUE)
+read_json_str(str, obj_of_arrs_to_df =  TRUE)
 ```