From f293b65d6d83964921d7b8d63447a30c660ed3a5 Mon Sep 17 00:00:00 2001 From: Cole Arendt Date: Fri, 19 May 2017 07:40:10 -0400 Subject: [PATCH] Add bind_rows support Update multiple-apis.Rmd --- R/tbl_json.R | 14 ++++++++++++ R/utils.R | 44 +++++++++++++++++++++++++++++++++++++ packrat/packrat.lock | 6 +++++ vignettes/multiple-apis.Rmd | 11 ++++++---- 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/R/tbl_json.R b/R/tbl_json.R index f107c9a..c24e10e 100644 --- a/R/tbl_json.R +++ b/R/tbl_json.R @@ -206,6 +206,20 @@ slice_.tbl_json <- wrap_dplyr_verb(dplyr::slice_) #' @method slice tbl_json slice.tbl_json <- wrap_dplyr_verb(dplyr::slice) +#' @export +bind_rows <- function(...) { + r <- dplyr::bind_rows(...) + + d <- list_or_dots(...) + if (all(unlist(lapply(d,is.tbl_json)))) { + j <- unlist(lapply(d, attr, 'JSON'), recursive=FALSE) + return(tbl_json(r,j)) + } else { + warning('Some non-tbl_json objects. Reverting to dplyr::bind_rows') + return(r) + } +} + #' Convert the JSON in an tbl_json object back to a JSON string #' #' @param x a tbl_json object diff --git a/R/utils.R b/R/utils.R index a2b5606..b4d6e74 100644 --- a/R/utils.R +++ b/R/utils.R @@ -35,3 +35,47 @@ rbind_tbl_json <- function(x, y) { ) } + + +#' Handles dots or a list +list_or_dots <- function (...) +{ + dots <- list(...) + data_lists <- vapply(dots, is_data_list, logical(1)) + dots[data_lists] <- lapply(dots[data_lists], list) + unlist(dots, recursive = FALSE) +} + +#' +#' Checks whether a list is being provided +#' +is_data_list <- function (x) +{ + if (is.data.frame(x) || is.null(x)) + return(TRUE) + if (!is.list(x)) + return(FALSE) + if (!is.null(names(x)) && length(x) == 0) + return(TRUE) + if (any(!has_names(x))) + return(FALSE) + is_1d <- vapply(x, is_1d, logical(1)) + if (any(!is_1d)) + return(FALSE) + n <- vapply(x, length, integer(1)) + if (any(n != n[1])) + return(FALSE) + TRUE +} + +#' Check for Names +has_names <- function (x) +{ + nms <- names(x) + if (is.null(nms)) { + rep(FALSE, length(x)) + } + else { + !is.na(nms) & nms != "" + } +} \ No newline at end of file diff --git a/packrat/packrat.lock b/packrat/packrat.lock index fd019f5..5102d12 100644 --- a/packrat/packrat.lock +++ b/packrat/packrat.lock @@ -338,6 +338,12 @@ Source: CRAN Version: 1.0.0 Hash: 77da8f1df873a4b91e5c4a68fe2fb1b6 +Package: pryr +Source: CRAN +Version: 0.1.2 +Hash: 4189249ad9cfa35bb1f70ce398fce673 +Requires: Rcpp, stringr + Package: purrr Source: CRAN Version: 0.2.2.2 diff --git a/vignettes/multiple-apis.Rmd b/vignettes/multiple-apis.Rmd index cf1e0e5..204c785 100644 --- a/vignettes/multiple-apis.Rmd +++ b/vignettes/multiple-apis.Rmd @@ -43,12 +43,13 @@ library(tidyjson) Tidyverse is used heavily for data cleansing. Let's explore some of the data through Github's APIs. We are going to grab the data directly and then explore the structure of the JSON with `json_schema`. ```{r, echo=TRUE} -dplyr_issues <- as.tbl_json('https://api.github.com/repos/tidyverse/dplyr/issues') +baseurl <- 'https://api.github.com/repos/tidyverse/dplyr/issues' +dplyr_issues <- as.tbl_json(baseurl) dplyr_issues %>% json_schema %>% prettify ``` -After exploring the structure of the data, we decide we want to look at a high level of what sort of issues we have. +After exploring the structure of the data, we decide we want to look at a high level of the isssues we have. ```{r, echo=TRUE} @@ -77,9 +78,11 @@ highlevel %>% group_by(state) %>% summarize(nissues=n()) ``` -Let's aggregate a few more api calls. Documentation can be found at the [github API docs](https://developer.github.com/guides/traversing-with-pagination/). +Let's aggregate a few more api calls. Documentation can be found at the [github API docs](https://developer.github.com/guides/traversing-with-pagination/) and in particular [here](https://developer.github.com/v3/issues/#list-issues). ```{r, echo=TRUE} +manyissues <- lapply(c(1:7), function(x){as.tbl_json(paste0(baseurl,'?state=all&per_page=50&page=',x))}) - +## Collapse into one tbl_json +manyissues <- bind_rows(manyissues) ```