Skip to content

Commit

Permalink
#5 implement json_complexity
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeremy committed Aug 26, 2016
1 parent 73d4336 commit 9b50ac2
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 1 deletion.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(gather_keys)
export(is.tbl_json)
export(jlogical)
export(jnumber)
export(json_complexity)
export(json_lengths)
export(json_structure)
export(json_types)
Expand Down
33 changes: 33 additions & 0 deletions R/json_complexity.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Add a column that contains the complexity (recursively unlisted length) of the JSON data
#'
#' When investigating complex JSON data it can be helpful to identify the
#' complexity of deeply nested documents. The json_complexity() function adds a
#' column (default name "complexity") that contains the 'complexity' of the JSON
#' associated with each row. Essentially, every on-null scalar value is found in the
#' object by recursively stripping away all objects or arrays, and the complexity
#' is the count of these scalar values. Note that 'null' has complexity 0.
#'
#' @param x a tbl_json object
#' @param column.name the name to specify for the length column
#' @return a tbl_json object with column.name column that tells the length
#' @export
#' @examples
#' library(magrittr) # for %>%
#' c('[1, 2, [3, 4]]', '{"k1": 1, "k2": [2, [3, 4]]}', '1', {}) %>%
#' json_lengths %>% json_complexity
json_complexity <- function(x, column.name = "complexity") {

if (!is.tbl_json(x)) x <- as.tbl_json(x)

# Extract json
json <- attr(x, "JSON")

# Determine lengths
lengths <- json %>% map(unlist, recursive = TRUE) %>% map_int(length)

# Add as a column to x
x[column.name] <- lengths

tbl_json(x, json)

}
30 changes: 30 additions & 0 deletions man/json_complexity.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions tests/testthat/test-json_complexity.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
context("json_complexity")

test_that("works for arrays", {

json <- c('[]', '[1]', '[1, 2]')
expect_identical(
json %>% json_complexity %>% `$`(complexity),
c(0L, 1L, 2L)
)

}
)

test_that("works for objects", {

json <- c('{}', '{"k":"v"}', '{"k1":"v1", "k2":"v2"}')
expect_identical(
json %>% json_complexity %>% `$`(complexity),
c(0L, 1L, 2L)
)

}
)

test_that("works for scalars", {

json <- c('[1, "a", true]')
expect_identical(
json %>% gather_array %>% json_complexity %>% `$`(complexity),
rep(1L, 3)
)

}
)

test_that("works for emtpy objects", {

json <- character(0)
expect_identical(
json %>% json_complexity %>% `$`(complexity),
integer(0)
)

json <- c('[null, [], {}]')
expect_identical(
json %>% gather_array %>% json_complexity %>% `$`(complexity),
rep(0L, 3)
)

}
)

test_that("works for nested JSON", {

json <- c('{"key": [1, 2]}', '{"key1": [1], "key2": [1, 2]}',
'{"key1": [1, 2], "key2": true, "key3": false}')
expect_identical(
json %>% json_complexity %>% `$`(complexity),
c(2L, 3L, 4L),
)

}
)
2 changes: 1 addition & 1 deletion vignettes/visualizing-json.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ We can then find out how complex each record is by recursively unlisting it
and computing the length:

```{r}
co_length <- co_list %>% map(unlist, recursive = TRUE) %>% map_int(length)
co_length <- companies %>% json_complexity %>% extract2("complexity")
```

Then we can visualize the distribution of lengths on a log-scale:
Expand Down

0 comments on commit 9b50ac2

Please sign in to comment.