From 3f1adff5520828ad876161a6ca14bac1f10a13a1 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 25 Aug 2016 09:19:49 -0700 Subject: [PATCH] first version of new vignette #4 --- vignettes/making-tidyjson-purrr.Rmd | 120 +++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/vignettes/making-tidyjson-purrr.Rmd b/vignettes/making-tidyjson-purrr.Rmd index 94bc56a..1b9252f 100644 --- a/vignettes/making-tidyjson-purrr.Rmd +++ b/vignettes/making-tidyjson-purrr.Rmd @@ -10,13 +10,15 @@ vignette: > --- ```{r, echo = FALSE} -knitr::opts_chunk$set(collapse = TRUE, comment = "#>") +knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5) options(dplyr.print_min = 4L, dplyr.print_max = 4L) ``` ## Load required libraries ```{r, message = FALSE} +library(igraph) +library(RColorBrewer) library(tidyjson) # this library library(dplyr) # for %>% and other dplyr functions library(purrr) # to compare against purrr @@ -26,6 +28,7 @@ library(tibble) # for tibble creation library(magrittr) # for `%$%` library(ggplot2) # for plotting library(forcats) # easier factor manipulation +library(stringr) # for truncating values ``` ## Companies Data @@ -43,6 +46,121 @@ We can see the structure of a sample record with `str` str(fromJSON(samp_co[[1]])) ``` +Alternatively, we can compute the structure using `tidyjson::json_structure` + +```{r} +structure <- samp_co[[1]] %>% json_structure + +max_length <- 20 +structure %>% + mutate(json = structure %@% "JSON") %>% + transmute(child.id, key, type, length, + json = ifelse(!(type %in% c("object", "array")), as.character(json), NA_character_))%>% + mutate(json = paste0(json %>% str_sub(end = max_length - 3), + ifelse(str_length(json) > max_length - 3, "...", ""))) +``` + +Hard to understand this using ggplot + +```{r} +structure %>% + replace_na(list(key = "NA")) %>% + mutate(level = factor(level)) %>% + ggplot(aes(type, key)) + + geom_tile() + + facet_grid(level ~ ., scale = "free", space = "free") +``` + +Plot as a network graph + +```{r} +plot_structure_graph <- function(json, legend = TRUE) { + + structure <- json %>% json_structure + + type_colors <- brewer.pal(6, "Accent") + + g <- graph_from_data_frame( + structure %>% + filter(!is.na(parent.id)) %>% + select(parent.id, child.id), + directed = TRUE, + vertices = structure %>% + transmute(child.id, + vertex.color = type_colors[as.integer(type)], + vertex.label = ifelse(type %in% c("object", "array") & length > 0, + key, NA_character_))) + + op <- par(mar = c(0, 0, 0, 0)) + plot(g, edge.arrow.size = .1, vertex.color = V(g)$vertex.color, vertex.size = 4, + vertex.label = V(g)$vertex.label, layout = layout_with_kk, + edge.color = 'grey70', edge.width = 2) + + if (legend) + legend(x = -1.3, y = -.6, levels(structure$type), pch = 21, + col="#777777", pt.bg = type_colors, + pt.cex = 2, cex = .8, bty = "n", ncol = 1) + + par(op) + + NULL + +} +``` + +Plot a single company + +```{r} +samp_co[[1]] %>% plot_structure_graph +``` + +A lot of variety + +```{r} +nrow <- 3 +ncol <- 4 +op <- par(mfrow = c(nrow, ncol)) +walk(samp_co[1:(nrow*ncol)], plot_structure_graph, legend = FALSE) +par(op) +``` + +The most complex + +```{r} +lengths <- companies %>% + map(fromJSON, simplifyVector = FALSE) %>% + map(unlist, recursive = TRUE) %>% + map_int(length) +most_complex <- companies[which(lengths == max(lengths))] + +most_complex %>% spread_values(name = jstring("name")) %>% extract2("name") +``` + +Let's try to plot it! + +```{r} +plot_structure_graph(most_complex) +``` + +That is just too big, let's simplify things + +```{r, fig.height = 8} +sub_objects <- most_complex %>% + gather_keys %>% + json_types %>% + json_lengths %>% + filter(length > 1) + +nrow <- 5 +ncol <- 3 +op <- par(mfrow = c(nrow, ncol)) +for(i in 1:nrow(sub_objects)) { + plot_structure_graph(sub_objects[i, ], legend = FALSE) + title(sub_objects$key[i], col.main = 'red') +} +par(op) +``` + ## `tbl_json` We are working with a character array of JSON, which tidyjson coerces directly