Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply styler to entire package #3

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 45 additions & 45 deletions R/arrange.R
Original file line number Diff line number Diff line change
@@ -1,95 +1,95 @@
#' Arrange rows by variables within a nested dataframe
#'
#'
#' Arrange rows by variables within a nested dataframe
#' @param df A nested dataframe
#' @param df A nested dataframe
#' @param ... Comma separated list of unquoted variable names
#'
#'
#' @importFrom dplyr progress_estimated
#' @importFrom emo ji
#' @importFrom purrr map
#' @export
arrange_nested <- function(df, ...){
arrange_nested <- function(df, ...) {
var_expr <- enquos(...)
colname_nested_data <- names(df[ , grepl("^data$", names(df))])
arrange_with_progress <- function(data){
colname_nested_data <- names(df[, grepl("^data$", names(df))])

arrange_with_progress <- function(data) {
pb$tick()$print()
data %>%
data %>%
arrange(!!!var_expr)
}
#create the progress bar

# create the progress bar
pb <- dplyr::progress_estimated(nrow(df))

start.time <- Sys.time()
message(paste(emo::ji("hammer_and_wrench"), "Start sorting..."))
output <- df %>%
dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~arrange_with_progress(.)))
dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ arrange_with_progress(.)))
end.time <- Sys.time()
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)

message("\n")
message(paste(emo::ji("white_check_mark"), "Finish sorting!"))
if(time.taken > 60){
time.taken <- round(time.taken/60, 2)

if (time.taken > 60) {
time.taken <- round(time.taken / 60, 2)
message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "mins"))
}else{
} else {
message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "secs"))
}
message("\n")

return(output)
}


#' Arrange rows by variables within a double-nested dataframe
#'
#'
#' Arrange rows by variables within a double-nested dataframe
#' @param df A nested dataframe
#' @param df A nested dataframe
#' @param nest_cols Name of columns to nest in existing list-column
#' @param ... Comma separated list of unquoted variable names
#'
#'
#' @importFrom emo ji
#' @importFrom purrr map
#'
#'
#' @export
arrange_double_nested <- function(df, nest_cols, ...){
if(nrow(df) == 0){
arrange_double_nested <- function(df, nest_cols, ...) {
if (nrow(df) == 0) {
stop(paste(emo::ji("bomb"), "No user left, tune your threshold and try again."))
}

stopifnot(
is.list(df[ , grepl("^data$", names(df))])
is.list(df[, grepl("^data$", names(df))])
)

var_expr <- enquos(..., .named = TRUE)
colname_nested_data <- names(df[ , grepl("^data$", names(df))])
colname_nested_data <- names(df[, grepl("^data$", names(df))])

arrange_column <- . %>% arrange(!!!var_expr)
arrange_columns <- . %>%
dplyr::mutate(data = purrr::map(data, arrange_column))
# double nest
df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~.x %>% nest(data = nest_cols))

arrange_columns <- . %>%
dplyr::mutate(data = purrr::map(data, arrange_column))

# double nest
df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~ .x %>% nest(data = nest_cols))

start.time <- Sys.time()
message(paste(emo::ji("hammer_and_wrench"), "Start sorting..."))
output <- df %>%
dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], arrange_columns))
output <- df %>%
dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], arrange_columns))
end.time <- Sys.time()
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)

message(paste(emo::ji("white_check_mark"), "Finish sorting!"))
if(time.taken > 60){
time.taken <- round(time.taken/60, 2)

if (time.taken > 60) {
time.taken <- round(time.taken / 60, 2)
message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "mins"))
} else{
} else {
message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "secs"))
}

message("\n")
return(output)
}
}
14 changes: 7 additions & 7 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Tweets sent by 100 random users
#'
#' De-identified test data includes 100 random users,
#'
#' De-identified test data includes 100 random users,
#' and it can be used as an example to get started with homelocator package
#'
#'
#' @format A dataframe of 16,300 rows and 3 variables:
#' \describe{
#' \item{u_id}{unique identifier for each user}
Expand All @@ -13,11 +13,11 @@


#' Neighbours for locations
#'
#' Spatial neighbours for locations, where a neighbour has
#'
#' Spatial neighbours for locations, where a neighbour has
#' at least one line in common, but its interior does not intersect with the location
#'
#'
#'
#'
#' @format A nested dataframe of 1,942 rows and 2 variables:
#' \describe{
#' \item{grid_id}{unique identifier for each location}
Expand Down
86 changes: 38 additions & 48 deletions R/enrich_timestamp.R
Original file line number Diff line number Diff line change
@@ -1,76 +1,66 @@
#' Create new variables dervied from timestamp
#'
#' Create new variables from existing timestamp, which are often used/needed as intermediate variables in home location algorithms.
#' @param df A nested dataframe
#'
#' Create new variables from existing timestamp, which are often used/needed as intermediate variables in home location algorithms.
#' @param df A nested dataframe
#' @param timestamp Name of column that holds specific timestamp for each data point and it should be POSIXct
#'
#'
#' @importFrom emo ji
#' @importFrom rlang sym
#' @importFrom lubridate year
#' @importFrom lubridate month
#' @importFrom lubridate day
#' @importFrom lubridate wday
#' @importFrom lubridate hour
#' @importFrom lubridate year
#' @importFrom lubridate month
#' @importFrom lubridate day
#' @importFrom lubridate wday
#' @importFrom lubridate hour
#' @importFrom dplyr progress_estimated
#' @importFrom purrr map
#'
#' @importFrom purrr map
#'
#' @export
enrich_timestamp <- function(df, timestamp = "created_at"){
if(!is.list(df[ , grepl("^data$", names(df)), with=F])){
enrich_timestamp <- function(df, timestamp = "created_at") {
if (!is.list(df[, grepl("^data$", names(df)), with = F])) {
stop(paste(emo::ji("bomb"), "Input dataset is not nested!"))
}
colname_nested_data <- names(df[ , grepl("^data$", names(df)), with=F])

colname_nested_data <- names(df[, grepl("^data$", names(df)), with = F])
timestamp <- rlang::sym(timestamp)
if(!is(df[[colname_nested_data]][[1]] %>% pull({{timestamp}}), "POSIXct")){

if (!is(df[[colname_nested_data]][[1]] %>% pull({{ timestamp }}), "POSIXct")) {
stop(paste(emo::ji("bomb"), "Timestamp is not POSIXct!"))
}
#define reading function which includes the progress bar
enrich_with_progress <- function(data){

# define reading function which includes the progress bar
enrich_with_progress <- function(data) {
pb$tick()$print()
data %>%
dplyr::mutate(year = lubridate::year({{timestamp}}),
month = lubridate::month({{timestamp}}),
day = lubridate::day({{timestamp}}),
wday = lubridate::wday({{timestamp}}), # day of the week
hour = lubridate::hour({{timestamp}}), # hour of the day
ymd = format({{timestamp}}, "%Y-%m-%d"))
dplyr::mutate(
year = lubridate::year({{ timestamp }}),
month = lubridate::month({{ timestamp }}),
day = lubridate::day({{ timestamp }}),
wday = lubridate::wday({{ timestamp }}), # day of the week
hour = lubridate::hour({{ timestamp }}), # hour of the day
ymd = format({{ timestamp }}, "%Y-%m-%d")
)
}

#create the progress bar
# create the progress bar
pb <- dplyr::progress_estimated(nrow(df))

start.time <- Sys.time()
message(paste(emo::ji("hammer_and_wrench"), "Enriching variables from timestamp..."))
output <- df %>%
dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~enrich_with_progress(.)))
dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ enrich_with_progress(.)))
end.time <- Sys.time()
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)

message("\n")
message(paste(emo::ji("white_check_mark"), "Finish enriching! New added variables: year, month, day, wday, hour, ymd."))
if(time.taken > 60){
time.taken <- round(time.taken/60, 2)

if (time.taken > 60) {
time.taken <- round(time.taken / 60, 2)
message(paste(emo::ji("hourglass"), "Enriching time:", time.taken, "mins"))
}else{
} else {
message(paste(emo::ji("hourglass"), "Enriching time:", time.taken, "secs"))
}
message("\n")

return(output)
}












74 changes: 38 additions & 36 deletions R/extract_location.R
Original file line number Diff line number Diff line change
@@ -1,72 +1,74 @@
#' Extract identified home locations for users
#'
#' Extract the most likely home location for each user based on choosed column value
#' @param df A nested dataframe by user
#' @param show_n_loc Number of homes to be shown
#' @param keep_score Choice to keep score or not
#' @param score_var Name of column that holds weighted score for each user
#' Extract identified home locations for users
#'
#' Extract the most likely home location for each user based on choosed column value
#' @param df A nested dataframe by user
#' @param show_n_loc Number of homes to be shown
#' @param keep_score Choice to keep score or not
#' @param score_var Name of column that holds weighted score for each user
#' @param user Name of column that holds unique identifier for each user
#' @param location Name of column that holds unique identifier for each location
#'
#'
#' @importFrom rlang has_name
#' @importFrom emo ji
#' @importFrom dplyr progress_estimated
#' @importFrom dplyr progress_estimated
#' @importFrom dplyr arrange
#' @importFrom dplyr select
#' @importFrom purrr map_chr
#'
#'
#'
#'
#' @export
extract_location <- function(df, user = "u_id", location = "loc_id", show_n_loc = 1, keep_score = F, ...){
extract_location <- function(df, user = "u_id", location = "loc_id", show_n_loc = 1, keep_score = F, ...) {
if (!rlang::has_name(df, user)) {
stop(paste(emo::ji("bomb"), "User column does not exist!"))
}
user <- rlang::sym(user)

user <- rlang::sym(user)
location <- rlang::sym(location)
var_expr <- enquos(..., .named = TRUE)
colname_nested_data <- names(df[ , grepl("^data$", names(df))])
get_loc_with_progress <- function(data){
colname_nested_data <- names(df[, grepl("^data$", names(df))])

get_loc_with_progress <- function(data) {
pb$tick()$print()
get_loc <- data %>%
dplyr::arrange(!!!var_expr) %>%
dplyr::arrange(!!!var_expr) %>%
slice(1:show_n_loc) %>%
pull({{location}})
if(show_n_loc == 1){
pull({{ location }})
if (show_n_loc == 1) {
get_loc
} else{
} else {
paste(get_loc, collapse = "; ")
}
}
# create the progress bar
pb <- dplyr::progress_estimated(nrow(df))

start.time <- Sys.time()
message(paste(emo::ji("hammer_and_wrench"), "Start extracting homes for users..."))
if(keep_score){

if (keep_score) {
output <- df %>%
dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~get_loc_with_progress(.)))
} else{
dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~ get_loc_with_progress(.)))
} else {
output <- df %>%
dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~get_loc_with_progress(.))) %>%
dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~ get_loc_with_progress(.))) %>%
dplyr::select(-colname_nested_data) %>%
dplyr::select({{user}}, home)
dplyr::select({{ user }}, home)
}
end.time <- Sys.time()
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)

n_user <- output %>% pull(!!user) %>% n_distinct()
time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)

n_user <- output %>%
pull(!!user) %>%
n_distinct()
message("\n")
message(paste(emo::ji("tada"), "Congratulations!! Your have found", n_user, "users' potential home(s)."))
if(time.taken > 60){
time.taken <- round(time.taken/60, 2)

if (time.taken > 60) {
time.taken <- round(time.taken / 60, 2)
message(paste(emo::ji("hourglass"), "Extracting time:", time.taken, "mins"))
}else{
} else {
message(paste(emo::ji("hourglass"), "Extracting time:", time.taken, "secs"))
}
message("\n")
return(output)
}
}
Loading