Skip to content

Commit

Permalink
refactor df simplify when reading. Fixes #13
Browse files Browse the repository at this point in the history
  • Loading branch information
coolbutuseless committed Sep 13, 2023
1 parent 9b99579 commit b7973fc
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 14 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: yyjsonr
Type: Package
Title: Fast JSON, GeoJSON and NDJSON Parsing and Serialisation
Version: 0.1.9
Version: 0.1.10
Authors@R: c(
person("Mike", "FC", role = c("aut", "cre"), email = "[email protected]"),
person("Yao", "Yuan", role = "cph", email = "[email protected]",
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

# yyjsonr 0.1.10 2023-09-14

* Refactored options for simplification to data.frame
* removed `vectors_to_df`
* replaced with `obj_of_arrs_to_df`
* added `arr_of_obs_to_df`

# yyjsonr 0.1.9 2023-09-13

Expand Down
11 changes: 8 additions & 3 deletions R/json-opts.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,12 @@ write_flag <- list(
#' be stored in this type.
#' @param missing_list_elem how to handle missing elements in list columns in
#' data.frames. Options, 'na', or 'null. Default: 'null'
#' @param vectors_to_df logical. Should a named list of equal-length
#' @param obj_of_arrs_to_df logical. Should a named list of equal-length
#' vectors be promoted to a data.frame? Default: TRUE. If FALSE, then
#' result will be left as a list.
#' @param arr_of_objs_to_df logical. Should an array or objects be promoted to a
#' a data.frame? Default: TRUE. If FALSE, then results will be read as a
#' list-of-lists.
#' @param yyjson_read_flag integer vector of internal \code{yyjson}
#' options. See \code{read_flag} in this package, and read
#' the yyjson API documentation for more information. This is considered
Expand All @@ -196,7 +199,8 @@ write_flag <- list(
opts_read_json <- function(
int64 = c('string', 'bit64'),
missing_list_elem = c('null', 'na'),
vectors_to_df = TRUE,
obj_of_arrs_to_df = TRUE,
arr_of_objs_to_df = TRUE,
str_specials = c('string', 'special'),
num_specials = c('special', 'string'),
promote_num_to_string = FALSE,
Expand All @@ -207,7 +211,8 @@ opts_read_json <- function(
list(
int64 = match.arg(int64),
missing_list_elem = match.arg(missing_list_elem),
vectors_to_df = isTRUE(vectors_to_df),
obj_of_arrs_to_df = isTRUE(obj_of_arrs_to_df),
arr_of_objs_to_df = isTRUE(arr_of_objs_to_df),
str_specials = match.arg(str_specials),
num_specials = match.arg(num_specials),
yyjson_read_flag = as.integer(yyjson_read_flag)
Expand Down
9 changes: 7 additions & 2 deletions man/opts_read_json.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions src/R-yyjson-parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ parse_options create_parse_options(SEXP parse_opts_) {
parse_options opt = {
.int64 = INT64_AS_STR,
.missing_list_elem = MISSING_AS_NULL,
.vectors_to_df = true,
.obj_of_arrs_to_df = true,
.arr_of_objs_to_df = true,
.str_specials = STR_SPECIALS_AS_STRING,
.num_specials = NUM_SPECIALS_AS_SPECIAL,
.promote_num_to_string = false,
Expand Down Expand Up @@ -56,8 +57,10 @@ parse_options create_parse_options(SEXP parse_opts_) {
for (unsigned int idx = 0; idx < length(val_); idx++) {
opt.yyjson_read_flag |= INTEGER(val_)[idx];
}
} else if (strcmp(opt_name, "vectors_to_df") == 0) {
opt.vectors_to_df = asLogical(val_);
} else if (strcmp(opt_name, "obj_of_arrs_to_df") == 0) {
opt.obj_of_arrs_to_df = asLogical(val_);
} else if (strcmp(opt_name, "arr_of_objs_to_df") == 0) {
opt.arr_of_objs_to_df = asLogical(val_);
} else if (strcmp(opt_name, "str_specials") == 0) {
const char *val = CHAR(STRING_ELT(val_, 0));
opt.str_specials = strcmp(val, "string") == 0 ? STR_SPECIALS_AS_STRING : STR_SPECIALS_AS_SPECIAL;
Expand Down Expand Up @@ -1241,7 +1244,7 @@ SEXP json_array_as_robj(yyjson_val *arr, parse_options *opt) {
}

}
} else if (ctn_bitset == CTN_OBJ) {
} else if (ctn_bitset == CTN_OBJ && opt->arr_of_objs_to_df) {
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// []-array ONLY contains {}-objects!
// Parse as a data.frame
Expand Down Expand Up @@ -1600,7 +1603,7 @@ SEXP json_object_as_list(yyjson_val *obj, parse_options *opt) {
// * All elements are atomic arrays or vecsxp
// * All these elements are the same length
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (opt->vectors_to_df) {
if (opt->obj_of_arrs_to_df) {
R_xlen_t nrow = 0;
bool possible_data_frame = true;
for (unsigned int col = 0; col < idx; col++) {
Expand Down
3 changes: 2 additions & 1 deletion src/R-yyjson-parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@
typedef struct {
unsigned int int64;
unsigned int missing_list_elem;
bool vectors_to_df;
bool obj_of_arrs_to_df;
bool arr_of_objs_to_df;
unsigned int str_specials;
unsigned int num_specials;
bool promote_num_to_string;
Expand Down
42 changes: 42 additions & 0 deletions tests/testthat/test-simplify-df.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@



test_that("multiplication works", {

ref <- head(iris, 5)
ref$Species <- as.character(ref$Species)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#' Parse array of objects to a data.frame
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arr_of_objs <- write_json_str(ref, auto_unbox = TRUE, pretty = TRUE)
x <- read_json_str(arr_of_objs, arr_of_objs_to_df = TRUE)
expect_identical(x, ref)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#' Leave array-of-objects as a list
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
x <- read_json_str(arr_of_objs, arr_of_objs_to_df = FALSE)
expect_true(is.list(x))
expect_false(is.data.frame(x))
expect_length(x, 5)
expect_null(names(x))
expect_identical(names(x[[1]]), colnames(ref))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#' Object of arrays
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
obj_of_arrs <- write_json_str(as.list(ref), auto_unbox = TRUE, pretty = TRUE)

# Read as data.frame
x <- read_json_str(obj_of_arrs, obj_of_arrs_to_df = TRUE)
expect_identical(x, ref)

# read as list
x <- read_json_str(obj_of_arrs, obj_of_arrs_to_df = FALSE)
expect_false(is.data.frame(x))
expect_true(is.list(x))
expect_identical(names(x), colnames(ref))


})
4 changes: 2 additions & 2 deletions vignettes/from_json_options.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ Vectors to data.frame
```{r}
str <- '{"a":[1,2],"b":["apple","banana"]}'
read_json_str(str, vectors_to_df = FALSE)
read_json_str(str, obj_of_arrs_to_df = FALSE)
read_json_str(str, vectors_to_df = TRUE)
read_json_str(str, obj_of_arrs_to_df = TRUE)
```


Expand Down

0 comments on commit b7973fc

Please sign in to comment.