Skip to content

Commit

Permalink
Merge pull request #13 from jeremystan/json_struct
Browse files Browse the repository at this point in the history
Json struct
  • Loading branch information
Jeremy Stanley authored Aug 26, 2016
2 parents 093b2dd + ad41449 commit 73d4336
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 28 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import(assertthat)
import(dplyr)
import(jsonlite)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_chr)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
43 changes: 29 additions & 14 deletions R/json_structure.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,29 @@
#' \code{document.id} 1L if \code{x} is a single JSON string, otherwise the
#' index of \code{x}.
#'
#' \code{parent.id} the string identifier of the parent node for this child
#' \code{parent.id} the string identifier of the parent node for this child.
#'
#' \code{level} what level of the hierarchy this child resides at, starting
#' at 1L and incrementing for each level of nesting.
#' at \code{0L} for the root and incrementing for each level of nested
#' array or object.
#'
#' \code{index} what index of the parent object / array this child resides
#' at (from \code{gather_array} for arrays)
#' at (from \code{gather_array} for arrays).
#'
#' \code{child.id} a unique ID for this leaf in this document, represented
#' as <parent>.<index> where <parent> is the ID for the parent and <index>
#' is this index
#' is this index.
#'
#' \code{seq} the sequence of keys / indices that led to this child
#' (parents that are arrays are excluded) as a list, where character strings
#' denote objects and integers denote array positions
#'
#' \code{key} if this is the value of an object, what was the key that it
#' is listed under (from \code{gather_keys})
#' is listed under (from \code{gather_keys}).
#'
#' \code{type} the type of this object (from \code{json_types})
#' \code{type} the type of this object (from \code{json_types}).
#'
#' \code{length} the length of this object (from \code{json_lengths})
#' \code{length} the length of this object (from \code{json_lengths}).
#'
#' @export
#' @examples
Expand All @@ -51,7 +56,7 @@ json_structure <- function(x) {
# Create initial structure for top level
structure <- json_structure_init(x)

this_level <- 1L
this_level <- 0L
while(structure %>% should_json_structure_expand_more(this_level)) {

structure <- rbind_tbl_json(
Expand All @@ -72,9 +77,10 @@ json_structure_init <- function(x) {
x %>%
mutate(
parent.id = NA_character_,
level = 1L,
level = 0L,
index = 1L,
child.id = "1",
seq = replicate(n(), list()),
key = NA_character_
) %>%
json_types %>%
Expand All @@ -101,6 +107,7 @@ json_structure_empty <- function() {
level = integer(0),
index = integer(0),
child.id = character(0),
seq = list(),
key = character(0),
type = factor(character(0), levels = allowed_json_types),
length = integer(0)
Expand Down Expand Up @@ -141,6 +148,7 @@ json_structure_objects <- function(s) {
transmute(
document.id,
parent.id = child.id,
seq,
level = level + 1L
) %>%
gather_keys %>%
Expand All @@ -152,9 +160,12 @@ json_structure_objects <- function(s) {
group_by(parent.id) %>%
mutate(index = 1L:n()) %>%
ungroup %>%
mutate(child.id = paste(parent.id, index, sep = ".")) %>%
mutate(
child.id = paste(parent.id, index, sep = "."),
seq = map2(seq, key, c)
) %>%
select(
document.id, parent.id, level, index, child.id, key, type, length
document.id, parent.id, level, index, child.id, seq, key, type, length
)

# Reconstruct tbl_json object
Expand All @@ -169,14 +180,19 @@ json_structure_arrays <- function(s) {
transmute(
document.id,
parent.id = child.id,
seq,
level = level + 1L
) %>%
gather_array("index") %>%
json_types %>%
json_lengths %>%
mutate(child.id = paste(parent.id, index, sep = ".")) %>%
mutate(
child.id = paste(parent.id, index, sep = "."),
seq = map2(seq, index, c)
) %>%
transmute(
document.id, parent.id, level, index, child.id, key = NA_character_, type, length
document.id, parent.id, level, index, child.id,
seq, key = NA_character_, type, length
)

}
Expand All @@ -190,4 +206,3 @@ rbind_tbl_json <- function(x, y) {
)

}

2 changes: 1 addition & 1 deletion R/tidyjson-package.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
#' @docType package
#' @import assertthat
#' @import jsonlite
#' @importFrom purrr map map_lgl map_int map_chr
#' @importFrom purrr map map_lgl map_int map_chr map2
#' @import dplyr
NULL
19 changes: 12 additions & 7 deletions man/json_structure.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 24 additions & 6 deletions tests/testthat/test-json_structure.r
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ test_that("simple string works", {
data_frame(
document.id = 1L,
parent.id = NA_character_,
level = 1L,
level = 0L,
index = 1L,
child.id = "1",
seq = list(list()),
key = NA_character_,
type = "string" %>% factor(levels = allowed_json_types),
length = 1L
Expand All @@ -29,9 +30,10 @@ test_that("simple object works", {
data_frame(
document.id = c(1L, 1L),
parent.id = c(NA_character_, "1"),
level = c(1L, 2L),
level = c(0L, 1L),
index = c(1L, 1L),
child.id = c("1", "1.1"),
seq = list(list(), list("key")),
key = c(NA_character_, "key"),
type = c("object", "string") %>% factor(levels = allowed_json_types),
length = c(1L, 1L)
Expand All @@ -50,9 +52,10 @@ test_that("simple array works", {
data_frame(
document.id = c(1L, 1L, 1L),
parent.id = c(NA_character_, "1", "1"),
level = c(1L, 2L, 2L),
level = c(0L, 1L, 1L),
index = c(1L, 1L, 2L),
child.id = c("1", "1.1", "1.2"),
seq = list(list(), list(1L), list(2L)),
key = rep(NA_character_, 3),
type = c("array", "number", "number") %>% factor(levels = allowed_json_types),
length = c(2L, 1L, 1L)
Expand All @@ -71,9 +74,10 @@ test_that("nested object works", {
data_frame(
document.id = c(1L, 1L, 1L),
parent.id = c(NA_character_, "1", "1.1"),
level = c(1L, 2L, 3L),
level = c(0L, 1L, 2L),
index = c(1L, 1L, 1L),
child.id = c("1", "1.1", "1.1.1"),
seq = list(list(), list("k1"), list("k1", "k2")),
key = c(NA_character_, "k1", "k2"),
type = c("object", "object", "string") %>% factor(levels = allowed_json_types),
length = c(1L, 1L, 1L)
Expand All @@ -94,9 +98,10 @@ test_that("works with empty values appropriately", {
data_frame(
document.id = 1L,
parent.id = NA_character_,
level = 1L,
level = 0L,
index = 1L,
child.id = "1",
seq = list(list()),
key = NA_character_,
type = "null" %>% factor(levels = allowed_json_types),
length = 0L
Expand All @@ -115,9 +120,10 @@ test_that("works with tbl_json already", {
data_frame(
document.id = c(1L, 2L),
parent.id = rep(NA_character_, 2),
level = rep(1L, 2),
level = rep(0L, 2),
index = rep(1L, 2),
child.id = rep("1", 2),
seq = list(list(), list()),
key = rep(NA_character_, 2),
type = rep("string", 2) %>% factor(levels = allowed_json_types),
length = rep(1L, 2)
Expand All @@ -127,3 +133,15 @@ test_that("works with tbl_json already", {
)

})

test_that("seq works for a deeply nested sequence", {

expect_identical(
'{"a": {"2": [1, {"3": "value"}] } }' %>%
json_structure %>%
`[[`("seq") %>%
`[[`(6),
list("a", "2", 2L, "3")
)

})

0 comments on commit 73d4336

Please sign in to comment.