diff --git a/R/arrange.R b/R/arrange.R
index 5348059..d344bd8 100644
--- a/R/arrange.R
+++ b/R/arrange.R
@@ -1,95 +1,95 @@
 #' Arrange rows by variables within a nested dataframe
-#' 
+#'
 #' Arrange rows by variables within a nested dataframe
-#' @param df A nested dataframe 
+#' @param df A nested dataframe
 #' @param ... Comma separated list of unquoted variable names
-#' 
+#'
 #' @importFrom dplyr progress_estimated
 #' @importFrom emo ji
 #' @importFrom purrr map
 #' @export
-arrange_nested <- function(df, ...){
+arrange_nested <- function(df, ...) {
   var_expr <- enquos(...)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  arrange_with_progress <- function(data){
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  arrange_with_progress <- function(data) {
     pb$tick()$print()
-    data %>% 
+    data %>%
       arrange(!!!var_expr)
   }
-  
-  #create the progress bar
+
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
- 
+
   start.time <- Sys.time()
   message(paste(emo::ji("hammer_and_wrench"), "Start sorting..."))
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~arrange_with_progress(.))) 
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ arrange_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
 
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish sorting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 #' Arrange rows by variables within a double-nested dataframe
-#' 
+#'
 #' Arrange rows by variables within a double-nested dataframe
-#' @param df A nested dataframe 
+#' @param df A nested dataframe
 #' @param nest_cols Name of columns to nest in existing list-column
 #' @param ... Comma separated list of unquoted variable names
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom purrr map
-#' 
+#'
 #' @export
-arrange_double_nested <- function(df, nest_cols, ...){
-  if(nrow(df) == 0){
+arrange_double_nested <- function(df, nest_cols, ...) {
+  if (nrow(df) == 0) {
     stop(paste(emo::ji("bomb"), "No user left, tune your threshold and try again."))
   }
-  
+
   stopifnot(
-    is.list(df[ , grepl("^data$", names(df))])
+    is.list(df[, grepl("^data$", names(df))])
   )
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
   arrange_column <- . %>% arrange(!!!var_expr)
-  
-  arrange_columns <- . %>% 
-    dplyr::mutate(data = purrr::map(data, arrange_column)) 
-  
-  # double nest 
-  df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~.x %>% nest(data = nest_cols))
-  
+
+  arrange_columns <- . %>%
+    dplyr::mutate(data = purrr::map(data, arrange_column))
+
+  # double nest
+  df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~ .x %>% nest(data = nest_cols))
+
   start.time <- Sys.time()
   message(paste(emo::ji("hammer_and_wrench"), "Start sorting..."))
-  output <- df %>% 
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], arrange_columns))
+  output <- df %>%
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], arrange_columns))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
 
   message(paste(emo::ji("white_check_mark"), "Finish sorting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Sorting time:", time.taken, "secs"))
   }
-  
+
   message("\n")
   return(output)
-}
\ No newline at end of file
+}
diff --git a/R/data.R b/R/data.R
index 4497646..ab7cbd8 100644
--- a/R/data.R
+++ b/R/data.R
@@ -1,8 +1,8 @@
 #' Tweets sent by 100 random users
-#' 
-#' De-identified test data includes 100 random users, 
+#'
+#' De-identified test data includes 100 random users,
 #' and it can be used as an example to get started with homelocator package
-#' 
+#'
 #' @format A dataframe of 16,300 rows and 3 variables:
 #' \describe{
 #'   \item{u_id}{unique identifier for each user}
@@ -13,11 +13,11 @@
 
 
 #' Neighbours for locations
-#' 
-#' Spatial neighbours for locations, where a neighbour has 
+#'
+#' Spatial neighbours for locations, where a neighbour has
 #' at least one line in common, but its interior does not intersect with the location
-#' 
-#' 
+#'
+#'
 #' @format A nested dataframe of 1,942 rows and 2 variables:
 #' \describe{
 #'   \item{grid_id}{unique identifier for each location}
diff --git a/R/enrich_timestamp.R b/R/enrich_timestamp.R
index 04db0c0..fa45d36 100644
--- a/R/enrich_timestamp.R
+++ b/R/enrich_timestamp.R
@@ -1,76 +1,66 @@
 #' Create new variables dervied from timestamp
-#' 
-#' Create new variables from existing timestamp, which are often used/needed as intermediate variables in home location algorithms. 
-#' @param df A nested dataframe 
+#'
+#' Create new variables from existing timestamp, which are often used/needed as intermediate variables in home location algorithms.
+#' @param df A nested dataframe
 #' @param timestamp Name of column that holds specific timestamp for each data point and it should be POSIXct
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom rlang sym
-#' @importFrom lubridate year 
-#' @importFrom lubridate month 
-#' @importFrom lubridate day 
-#' @importFrom lubridate wday 
-#' @importFrom lubridate hour 
+#' @importFrom lubridate year
+#' @importFrom lubridate month
+#' @importFrom lubridate day
+#' @importFrom lubridate wday
+#' @importFrom lubridate hour
 #' @importFrom dplyr progress_estimated
-#' @importFrom purrr map 
-#' 
+#' @importFrom purrr map
+#'
 #' @export
-enrich_timestamp <- function(df, timestamp = "created_at"){
-  if(!is.list(df[ , grepl("^data$", names(df)), with=F])){
+enrich_timestamp <- function(df, timestamp = "created_at") {
+  if (!is.list(df[, grepl("^data$", names(df)), with = F])) {
     stop(paste(emo::ji("bomb"), "Input dataset is not nested!"))
   }
-  
-  colname_nested_data <- names(df[ , grepl("^data$", names(df)), with=F])
+
+  colname_nested_data <- names(df[, grepl("^data$", names(df)), with = F])
   timestamp <- rlang::sym(timestamp)
- 
-  if(!is(df[[colname_nested_data]][[1]] %>% pull({{timestamp}}), "POSIXct")){
+
+  if (!is(df[[colname_nested_data]][[1]] %>% pull({{ timestamp }}), "POSIXct")) {
     stop(paste(emo::ji("bomb"), "Timestamp is not POSIXct!"))
   }
-  
-  #define reading function which includes the progress bar
-  enrich_with_progress <- function(data){
+
+  # define reading function which includes the progress bar
+  enrich_with_progress <- function(data) {
     pb$tick()$print()
     data %>%
-      dplyr::mutate(year = lubridate::year({{timestamp}}),
-             month = lubridate::month({{timestamp}}),
-             day = lubridate::day({{timestamp}}),
-             wday = lubridate::wday({{timestamp}}), # day of the week
-             hour = lubridate::hour({{timestamp}}), # hour of the day
-             ymd = format({{timestamp}}, "%Y-%m-%d"))
+      dplyr::mutate(
+        year = lubridate::year({{ timestamp }}),
+        month = lubridate::month({{ timestamp }}),
+        day = lubridate::day({{ timestamp }}),
+        wday = lubridate::wday({{ timestamp }}), # day of the week
+        hour = lubridate::hour({{ timestamp }}), # hour of the day
+        ymd = format({{ timestamp }}, "%Y-%m-%d")
+      )
   }
 
-  #create the progress bar
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   start.time <- Sys.time()
   message(paste(emo::ji("hammer_and_wrench"), "Enriching variables from timestamp..."))
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~enrich_with_progress(.)))
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ enrich_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish enriching! New added variables: year, month, day, wday, hour, ymd."))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Enriching time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Enriching time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/R/extract_location.R b/R/extract_location.R
index 412ae11..e85e517 100644
--- a/R/extract_location.R
+++ b/R/extract_location.R
@@ -1,72 +1,74 @@
-#' Extract identified home locations for users  
-#' 
-#' Extract the most likely home location for each user based on choosed column value 
-#' @param df A nested dataframe by user 
-#' @param show_n_loc Number of homes to be shown 
-#' @param keep_score Choice to keep score or not 
-#' @param score_var Name of column that holds weighted score for each user 
+#' Extract identified home locations for users
+#'
+#' Extract the most likely home location for each user based on choosed column value
+#' @param df A nested dataframe by user
+#' @param show_n_loc Number of homes to be shown
+#' @param keep_score Choice to keep score or not
+#' @param score_var Name of column that holds weighted score for each user
 #' @param user Name of column that holds unique identifier for each user
 #' @param location Name of column that holds unique identifier for each location
-#' 
+#'
 #' @importFrom rlang has_name
 #' @importFrom emo ji
-#' @importFrom dplyr progress_estimated 
+#' @importFrom dplyr progress_estimated
 #' @importFrom dplyr arrange
 #' @importFrom dplyr select
 #' @importFrom purrr map_chr
-#' 
-#' 
+#'
+#'
 #' @export
-extract_location <- function(df, user = "u_id", location = "loc_id", show_n_loc = 1, keep_score = F, ...){
+extract_location <- function(df, user = "u_id", location = "loc_id", show_n_loc = 1, keep_score = F, ...) {
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
-  
-  user <- rlang::sym(user) 
+
+  user <- rlang::sym(user)
   location <- rlang::sym(location)
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  get_loc_with_progress <- function(data){
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  get_loc_with_progress <- function(data) {
     pb$tick()$print()
     get_loc <- data %>%
-      dplyr::arrange(!!!var_expr) %>% 
+      dplyr::arrange(!!!var_expr) %>%
       slice(1:show_n_loc) %>%
-      pull({{location}}) 
-    if(show_n_loc == 1){
+      pull({{ location }})
+    if (show_n_loc == 1) {
       get_loc
-    } else{
+    } else {
       paste(get_loc, collapse = "; ")
     }
   }
   # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   start.time <- Sys.time()
   message(paste(emo::ji("hammer_and_wrench"), "Start extracting homes for users..."))
-  
-  if(keep_score){
+
+  if (keep_score) {
     output <- df %>%
-      dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~get_loc_with_progress(.))) 
-  } else{
+      dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~ get_loc_with_progress(.)))
+  } else {
     output <- df %>%
-      dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~get_loc_with_progress(.))) %>%
+      dplyr::mutate(home = purrr::map_chr(df[[colname_nested_data]], ~ get_loc_with_progress(.))) %>%
       dplyr::select(-colname_nested_data) %>%
-      dplyr::select({{user}}, home)
+      dplyr::select({{ user }}, home)
   }
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
-  n_user <- output %>% pull(!!user) %>% n_distinct()
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
+  n_user <- output %>%
+    pull(!!user) %>%
+    n_distinct()
   message("\n")
   message(paste(emo::ji("tada"), "Congratulations!! Your have found", n_user, "users' potential home(s)."))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Extracting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Extracting time:", time.taken, "secs"))
   }
   message("\n")
   return(output)
-}
\ No newline at end of file
+}
diff --git a/R/filter.R b/R/filter.R
index 9183396..2cd2b48 100644
--- a/R/filter.R
+++ b/R/filter.R
@@ -1,58 +1,61 @@
-#' Return rows with matching conditions 
-#' 
-#' Filter finds rows where conditions are true. 
-#' @param df A dataframe 
+#' Return rows with matching conditions
+#'
+#' Filter finds rows where conditions are true.
+#' @param df A dataframe
 #' @param user Name of column that holds unique identifier for each user
 #' @param ... Logical predicates defined in terms of the variables in df. Only rows match conditions are kept.
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom rlang sym
 #' @importFrom rlang has_name
-#' 
+#'
 #' @export
-filter_verbose <- function(df, user = "u_id", ...){
-  
+filter_verbose <- function(df, user = "u_id", ...) {
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
-  
-  user <- rlang::sym(user) 
+
+  user <- rlang::sym(user)
   var_expr <- enquos(...)
-  
-  n_original_users <- df %>% pull({{user}}) %>% dplyr::n_distinct()
-  
+
+  n_original_users <- df %>%
+    pull({{ user }}) %>%
+    dplyr::n_distinct()
+
   message(paste(emo::ji("bust_in_silhouette"), "There are", n_original_users, "users at this moment."))
   message(paste(emo::ji("hammer_and_wrench"), "Start filtering users..."))
   start.time <- Sys.time()
   output <- df %>% filter(!!!var_expr)
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
-  n_new_users <- output %>% pull({{user}}) %>% n_distinct()
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
+  n_new_users <- output %>%
+    pull({{ user }}) %>%
+    n_distinct()
   n_removed_users <- n_original_users - n_new_users
 
   message(paste(emo::ji("white_check_mark"), "Finish filtering! Filterred", n_removed_users, "users!"))
   message(paste(emo::ji("bust_in_silhouette"), "There are", n_new_users, "users left."))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Filtering time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Filtering time:", time.taken, "secs"))
   }
-  
+
   message("\n")
   return(output)
 }
 
-#' Return rows with matching condition within nested dataframe 
-#' 
-#'   
-#' Filter finds rows where conditions are true within nested dataframe 
-#' @param df A nested dataframe 
+#' Return rows with matching condition within nested dataframe
+#'
+#'
+#' Filter finds rows where conditions are true within nested dataframe
+#' @param df A nested dataframe
 #' @param user Name of column that holds unique identifier for each user
 #' @param ... Logical predicates defined in terms of the variables in df. Only rows match conditions are kept.
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom rlang sym
 #' @importFrom rlang has_name
@@ -61,69 +64,63 @@ filter_verbose <- function(df, user = "u_id", ...){
 #' @importFrom plyr empty
 #' @importFrom purrr map
 #' @importFrom purrr map_lgl
-#' 
+#'
 #' @export
-filter_nested <- function(df,  user = "u_id", ...){
-  
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+filter_nested <- function(df, user = "u_id", ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Dataset is not nested!"))
   }
-  
+
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
-  
+
   var_expr <- enquos(...)
-  user <- rlang::sym(user) 
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  #filter
-  filter_with_progress <- function(data){
+  user <- rlang::sym(user)
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  # filter
+  filter_with_progress <- function(data) {
     pb$tick()$print()
-    data %>% 
+    data %>%
       filter(!!!var_expr)
   }
-  
+
   start.time <- Sys.time()
-  n_original_users <- df %>% pull({{user}}) %>% dplyr::n_distinct()
+  n_original_users <- df %>%
+    pull({{ user }}) %>%
+    dplyr::n_distinct()
   message(paste(emo::ji("bust_in_silhouette"), "There are", n_original_users, "users at this moment."))
-  message(paste(emo::ji("hammer_and_wrench"), "Start filtering user...")) 
-  
+  message(paste(emo::ji("hammer_and_wrench"), "Start filtering user..."))
+
   # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   start.time <- Sys.time()
-  output <- df %>% 
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~filter_with_progress(.))) 
+  output <- df %>%
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ filter_with_progress(.)))
   output_data <- output[[colname_nested_data]]
-  #check empty tibble 
-  output <- output %>% 
+  # check empty tibble
+  output <- output %>%
     filter(!(purrr::map_lgl(output_data, plyr::empty)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
-  n_new_users <- output %>% pull({{user}}) %>% dplyr::n_distinct()
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
+  n_new_users <- output %>%
+    pull({{ user }}) %>%
+    dplyr::n_distinct()
   n_removed_users <- n_original_users - n_new_users
-  
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish Filtering! Filterred", n_removed_users, "users!"))
   message(paste(emo::ji("bust_in_silhouette"), "There are", n_new_users, "users left!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Filtering time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Filtering time:", time.taken, "secs"))
   }
   message("\n")
   return(output)
 }
-
-
-
-
-
-
-
-
-
diff --git a/R/homelocator.R b/R/homelocator.R
index a355b1b..b342d68 100644
--- a/R/homelocator.R
+++ b/R/homelocator.R
@@ -1,8 +1,8 @@
-.onAttach <- function(libname, pkgname){
+.onAttach <- function(libname, pkgname) {
   packageStartupMessage("Welcome to homelocator package!")
 }
 
-.onLoad <- function(libname, pkgname){
+.onLoad <- function(libname, pkgname) {
   op <- options()
   op.devtools <- list(
     devtools.path = "~/R-dev",
@@ -12,20 +12,6 @@
     devtools.desc = list()
   )
   toset <- !(names(op.devtools) %in% names(op))
-  if(any(toset)) options(op.devtools[toset])
+  if (any(toset)) options(op.devtools[toset])
   invisible()
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/R/identify_location.R b/R/identify_location.R
index 06410f6..a82a9b8 100644
--- a/R/identify_location.R
+++ b/R/identify_location.R
@@ -1,66 +1,76 @@
 #' Identify home locations for users with built-in recipes
-#' 
-#' This function infers the most possible home locations for users with built-in recipes. 
-#' The "recipe" can be either embeded "recipe" - HMLC, OSNA, APDM, FREQ, or self created "recipe". 
-#' 
+#'
+#' This function infers the most possible home locations for users with built-in recipes.
+#' The "recipe" can be either embeded "recipe" - HMLC, OSNA, APDM, FREQ, or self created "recipe".
+#'
 #' @param df A dataframe with columns for the user id, location, timestamp
 #' @param user Name of column that holds unique identifier for each user
 #' @param timestamp Name of timestamp column. Should be POSIXct
 #' @param location Name of column that holds unique identifier for each location
-#' @param recipe  Embeded algorithms to identify the most possible home locations for users           
+#' @param recipe  Embeded algorithms to identify the most possible home locations for users
 #' @param show_n_loc Number of potential homes to extract
 #' @param keep_score Option to keep or remove calculated result/score per user per location
 #' @param use_default_threshold Option to use default threshold or customized threshold
 #' @param rm_topNpct_user Option to remove or keep the top N percent active users
-#' 
+#'
 #' @importFrom rlang sym
 #' @importFrom rlang has_name
 #' @importFrom emo ji
 #' @importFrom tictoc tic
 #' @importFrom tictoc toc
-#' 
+#'
 #' @export
-identify_location <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", recipe, 
-                              show_n_loc = 1, keep_score = F, use_default_threshold = T, rm_topNpct_user = F){
+identify_location <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", recipe,
+                              show_n_loc = 1, keep_score = F, use_default_threshold = T, rm_topNpct_user = F) {
   user_expr <- rlang::sym(user)
   timestamp_expr <- rlang::sym(timestamp)
   location_expr <- rlang::sym(location)
-  
+
   tictoc::tic("Location Identification")
-  ## Validate the input dataset 
+  ## Validate the input dataset
   df_valided <- validate_dataset(df, user = user, timestamp = timestamp, location = location)
-  
+
   # Nest the dataset to each user
-  df_nested <- nest_verbose(df_valided, c({{location_expr}}, {{timestamp_expr}}))
+  df_nested <- nest_verbose(df_valided, c({{ location_expr }}, {{ timestamp_expr }}))
 
   ## Derive new variables from timestamp
   df_enriched <- enrich_timestamp(df_nested, timestamp = timestamp)
-  
+
   ## recipe: HMLC
-  if(recipe == "HMLC"){
-    output <- recipe_HMLC(df_enriched, user = user, timestamp = timestamp, location = location, 
-                          show_n_loc, keep_original_vars = F, keep_score = keep_score, 
-                          use_default_threshold = use_default_threshold, rm_topNpct_user = rm_topNpct_user)
-  } 
-  
+  if (recipe == "HMLC") {
+    output <- recipe_HMLC(df_enriched,
+      user = user, timestamp = timestamp, location = location,
+      show_n_loc, keep_original_vars = F, keep_score = keep_score,
+      use_default_threshold = use_default_threshold, rm_topNpct_user = rm_topNpct_user
+    )
+  }
+
   ## recipe: FREQ
-  if(recipe == "FREQ"){
-    output <- recipe_FREQ(df_enriched, user = user, timestamp = timestamp, location = location, show_n_loc, 
-                          keep_score = keep_score, use_default_threshold = use_default_threshold, rm_topNpct_user = rm_topNpct_user)
+  if (recipe == "FREQ") {
+    output <- recipe_FREQ(df_enriched,
+      user = user, timestamp = timestamp, location = location, show_n_loc,
+      keep_score = keep_score, use_default_threshold = use_default_threshold, rm_topNpct_user = rm_topNpct_user
+    )
   }
-  
+
   ## recipe: OSNA
-  if(recipe == "OSNA"){
-    output <- recipe_OSNA(df_enriched, user = user, timestamp = timestamp, location = location, show_n_loc = show_n_loc, 
-                          keep_score = keep_score, use_default_threshold = use_default_threshold, rm_topNpct_user = rm_topNpct_user)
+  if (recipe == "OSNA") {
+    output <- recipe_OSNA(df_enriched,
+      user = user, timestamp = timestamp, location = location, show_n_loc = show_n_loc,
+      keep_score = keep_score, use_default_threshold = use_default_threshold, rm_topNpct_user = rm_topNpct_user
+    )
   }
-  
+
   ## recipe: APDM
-  if(recipe == "APDM"){
-    message(paste(emo::ji("exclamation"), "Please make sure you have loaded the neighbors table before you use APDM recipe.\nThe table should have one column named", location,
-          "and another column named neighbor.\n The neighbor column should be a list-column contains the neighboring locations for", location, "per row."))
-    output <- recipe_APDM(df_enriched, df_neighbors, user = user, timestamp = timestamp, location = location, 
-                          keep_score = keep_score, use_default_threshold = use_default_threshold)
+  if (recipe == "APDM") {
+    message(paste(
+      emo::ji("exclamation"), "Please make sure you have loaded the neighbors table before you use APDM recipe.\nThe table should have one column named", location,
+      "and another column named neighbor.\n The neighbor column should be a list-column contains the neighboring locations for", location, "per row."
+    ))
+    output <- recipe_APDM(df_enriched, df_neighbors,
+      user = user, timestamp = timestamp, location = location,
+      keep_score = keep_score, use_default_threshold = use_default_threshold
+    )
   }
   tictoc::toc()
   return(output)
@@ -76,20 +86,20 @@ identify_location <- function(df, user = "u_id", timestamp = "created_at", locat
 #' @param  keep_original_vars Option to keep or remove columns other than 'user, timestamp, and location'
 #' @param use_default_threshold Option to use default threshold or customized threshold
 #' @param rm_topNpct_user Option to remove or keep the top N percent active users
-#' 
+#'
 #' @importFrom rlang sym
 #' @importFrom lubridate hour
 #' @importFrom lubridate minute
 #' @importFrom lubridate second
-#' 
-recipe_HMLC <- function (df, user = "u_id", timestamp = "created_at", location = "loc_id", show_n_loc, 
-                         keep_original_vars = F, keep_score = F, use_default_threshold = T, rm_topNpct_user = F) {
+#'
+recipe_HMLC <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", show_n_loc,
+                        keep_original_vars = F, keep_score = F, use_default_threshold = T, rm_topNpct_user = F) {
   user_expr <- rlang::sym(user)
   timestamp_expr <- rlang::sym(timestamp)
   location_expr <- rlang::sym(location)
-  
+
   # use_default_threshold <- readline(prompt = "Do you want to use the default thresholds? (Yes/No): ")
-  if(use_default_threshold){
+  if (use_default_threshold) {
     topNpct <- 1
     threshold_n_points <- 10
     threshold_n_locs <- 10
@@ -106,15 +116,15 @@ recipe_HMLC <- function (df, user = "u_id", timestamp = "created_at", location =
     w_weekend <- 0.1
     w_rest <- 0.2
     w_weekend_am <- 0.1
-  } else{
-    topNpct <- readline(prompt="How many percentage of top active users to be removed (default = 1)? Your answer: ") %>% as.integer()
+  } else {
+    topNpct <- readline(prompt = "How many percentage of top active users to be removed (default = 1)? Your answer: ") %>% as.integer()
     threshold_n_points <- readline(prompt = "What is the minimum number of data points should a user sent in total(default = 10)? Your answer: ") %>% as.integer()
     threshold_n_locs <- readline(prompt = "What is the minimum number of unique places should data points be collected from (default = 10)? Your answer: ") %>% as.integer()
     threshold_n_points_loc <- readline(prompt = "What is the minimum number of data points should a user sent at each place (default = 10)? Your answer: ") %>% as.integer()
     threshold_n_hours_loc <- readline(prompt = "What is the minimum number of unique hours should data points be collected from (default = 10)? Your answer: ") %>% as.integer()
     threshold_n_days_loc <- readline(prompt = "What is the minimum number of days should data points be collected from (default = 10)? Your answer: ") %>% as.integer()
     threshold_period_loc <- readline(prompt = "What is the minimum period should a user being active on the digital platform (default = 10)? Your answer: ") %>% as.integer()
-    
+
     w_n_points_loc <- readline(prompt = "Give a weight to the number of data points at a place (default = 0.1): ") %>% as.integer()
     w_n_hours_loc <- readline(prompt = "Give a weight to the number of unique hours data points were sent a place (default = 0.1): ") %>% as.integer()
     w_n_days_loc <- readline(prompt = "Give a weight to the number of days data points were sent at a place (default = 0.1): ") %>% as.integer()
@@ -125,64 +135,75 @@ recipe_HMLC <- function (df, user = "u_id", timestamp = "created_at", location =
     w_rest <- readline(prompt = "Give a weight the proportion of data points sent during rest time at a place (default = 0.2): ") %>% as.integer()
     w_weekend_am <- readline(prompt = "Give a weight the proportion of data points sent during weekend morning at a place (default = 0.1): ") %>% as.integer()
   }
-  
-  # users level 
-  ## pre-condition set on users 
+
+  # users level
+  ## pre-condition set on users
   df_match_user_condition <- df %>%
-    summarise_nested(., n_points = n(),
-                     n_locs = n_distinct({{location_expr}})) %>%
+    summarise_nested(.,
+      n_points = n(),
+      n_locs = n_distinct({{ location_expr }})
+    ) %>%
     remove_top_users(., user = user, counts = "n_points", topNpct_user = topNpct, rm_topNpct_user = rm_topNpct_user) %>% # remove top N percent active users based on frequency
     filter_verbose(., user = user, n_points > threshold_n_points & n_locs > threshold_n_locs) # each use at least has more than threshold_n_points data points sent at threshold_n_locs different locations
-  
-  # location level 
-  # pre-condition set on locations     
+
+  # location level
+  # pre-condition set on locations
   colnames_nested_data <- df_match_user_condition$data[[1]] %>% names()
   colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data == location)]
-  
+
   df_match_loc_condition <- df_match_user_condition %>%
-    summarise_double_nested(., nest_cols = colnmaes_to_nest, # summarise on location level 
-                            n_points_loc = n(), 
-                            n_hours_loc = n_distinct(hour), 
-                            n_days_loc = n_distinct(ymd), 
-                            period_loc = as.numeric(max({{timestamp_expr}}) - min({{timestamp_expr}}), "days")) %>% 
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest, # summarise on location level
+      n_points_loc = n(),
+      n_hours_loc = n_distinct(hour),
+      n_days_loc = n_distinct(ymd),
+      period_loc = as.numeric(max({{ timestamp_expr }}) - min({{ timestamp_expr }}), "days")
+    ) %>%
     unnest_verbose(data) %>% # one row shows data point of per user per location
-    filter_verbose(., user = user, 
-                      n_points_loc > threshold_n_points_loc & n_hours_loc > threshold_n_hours_loc & n_days_loc > threshold_n_days_loc & period_loc > threshold_period_loc) %>%  # each use at least has threshold_n_points_loc data points sent at threshold_n_hours_loc different hours for at least threshold_n_days_loc different days at each location, the whole time period is at least more than threshold_period_loc days 
-    summarise_nested(n_wdays_loc = n_distinct(wday), # add new variable after matching the conditions
-                     n_months_loc = n_distinct(month))                               
-                                                                                                       
-  # add new variables 
-  df_expanded <- df_match_loc_condition %>% 
-    mutate_nested(., 
-                  wd_or_wk = if_else(wday %in% c(1,7), "weekend", "weekday"), # 1 means Sunday and 7 means Saturday 
-                  time_numeric = lubridate::hour({{timestamp_expr}}) + lubridate::minute({{timestamp_expr}})/60 + lubridate::second({{timestamp_expr}})/3600, 
-                  rest_or_work = if_else(time_numeric >= 9 & time_numeric <= 18, "work", "rest"), 
-                  wk.am_or_wk.pm = if_else(time_numeric >= 6 & time_numeric <= 12 & wd_or_wk == "weekend", "weekend_am", "weekend_pm")) %>% 
+    filter_verbose(.,
+      user = user,
+      n_points_loc > threshold_n_points_loc & n_hours_loc > threshold_n_hours_loc & n_days_loc > threshold_n_days_loc & period_loc > threshold_period_loc
+    ) %>% # each use at least has threshold_n_points_loc data points sent at threshold_n_hours_loc different hours for at least threshold_n_days_loc different days at each location, the whole time period is at least more than threshold_period_loc days
+    summarise_nested(
+      n_wdays_loc = n_distinct(wday), # add new variable after matching the conditions
+      n_months_loc = n_distinct(month)
+    )
+
+  # add new variables
+  df_expanded <- df_match_loc_condition %>%
+    mutate_nested(.,
+      wd_or_wk = if_else(wday %in% c(1, 7), "weekend", "weekday"), # 1 means Sunday and 7 means Saturday
+      time_numeric = lubridate::hour({{ timestamp_expr }}) + lubridate::minute({{ timestamp_expr }}) / 60 + lubridate::second({{ timestamp_expr }}) / 3600,
+      rest_or_work = if_else(time_numeric >= 9 & time_numeric <= 18, "work", "rest"),
+      wk.am_or_wk.pm = if_else(time_numeric >= 6 & time_numeric <= 12 & wd_or_wk == "weekend", "weekend_am", "weekend_pm")
+    ) %>%
     prop_factor_nested(wd_or_wk, rest_or_work, wk.am_or_wk.pm)
-    
+
   ## assign weight to variables and sum variables
-  df_scored <- df_expanded  %>%
-    score_nested(., user = user, location = location, # score each variables with weight
-                 keep_original_vars = keep_original_vars,
-                 s_n_points_loc = w_n_points_loc * (n_points_loc/max(n_points_loc)),
-                 s_n_hours_loc = w_n_hours_loc * (n_hours_loc/24),
-                 s_n_days_loc = w_n_days_loc * (n_days_loc/max(n_days_loc)),
-                 s_n_wdays_loc = w_n_wdays_loc * (n_wdays_loc/7),
-                 s_n_months_loc = w_n_months_loc * (n_months_loc/12),
-                 s_period_loc = w_period_loc * (period_loc/max(period_loc)),
-                 s_weekend = w_weekend * (weekend),
-                 s_rest = w_rest * (rest),
-                 s_weekend_am = w_weekend_am * (weekend_am)) %>% 
+  df_scored <- df_expanded %>%
+    score_nested(.,
+      user = user, location = location, # score each variables with weight
+      keep_original_vars = keep_original_vars,
+      s_n_points_loc = w_n_points_loc * (n_points_loc / max(n_points_loc)),
+      s_n_hours_loc = w_n_hours_loc * (n_hours_loc / 24),
+      s_n_days_loc = w_n_days_loc * (n_days_loc / max(n_days_loc)),
+      s_n_wdays_loc = w_n_wdays_loc * (n_wdays_loc / 7),
+      s_n_months_loc = w_n_months_loc * (n_months_loc / 12),
+      s_period_loc = w_period_loc * (period_loc / max(period_loc)),
+      s_weekend = w_weekend * (weekend),
+      s_rest = w_rest * (rest),
+      s_weekend_am = w_weekend_am * (weekend_am)
+    ) %>%
     score_summary(., user = user, location = location, starts_with("s_")) # sum all scores for each location
-  
-  # extract locations based on score value 
-  df_scored %>% 
+
+  # extract locations based on score value
+  df_scored %>%
     extract_location(., user = user, location = location, show_n_loc = show_n_loc, keep_score = keep_score, desc(score))
 }
 
 
 #' recipe: frequency - FREQ
-#' 
+#'
 #' @param df An enriched dataframe
 #' @param user Name of column that holds unique identifier for each user
 #' @param timestamp Name of timestamp column. Should be POSIXct
@@ -191,55 +212,58 @@ recipe_HMLC <- function (df, user = "u_id", timestamp = "created_at", location =
 #' @param keep_score Option to keep or remove calculated result/score per user per location
 #' @param use_default_threshold Option to use default threshold or customized threshold
 #' @param rm_topNpct_user Option to remove or keep the top N percent active users
-#' 
+#'
 #' @importFrom rlang sym
-recipe_FREQ <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", show_n_loc, 
-                        keep_score = F, use_default_threshold = T, rm_topNpct_user = F){
+recipe_FREQ <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", show_n_loc,
+                        keep_score = F, use_default_threshold = T, rm_topNpct_user = F) {
   user_expr <- rlang::sym(user)
   timestamp_expr <- rlang::sym(timestamp)
   location_expr <- rlang::sym(location)
-  
+
   # use_default_threshold <- readline(prompt = "Do you want to use the default thresholds? (Yes/No): ")
-  if(use_default_threshold){
+  if (use_default_threshold) {
     topNpct <- 1
     threshold_n_points <- 10
     threshold_n_locs <- 10
     threshold_n_points_loc <- 10
-  } else{
-    topNpct <- readline(prompt="How many percentage of top active users to be removed (default = 1)? Your answer: ") %>% as.integer()
-    threshold_n_points <- readline(prompt="What is the minumn number of data points should a user sent in total(default = 10)? Your answer: ") %>% as.integer()
-    threshold_n_locs <- readline(prompt="What is the minumn number of unique places should data points be collected from (default = 10)? Your answer: ") %>% as.integer()
-    threshold_n_points_loc <- readline(prompt="What is the minumn number of data points should a user sent at each place (default = 10)? Your answer: ") %>% as.integer()
+  } else {
+    topNpct <- readline(prompt = "How many percentage of top active users to be removed (default = 1)? Your answer: ") %>% as.integer()
+    threshold_n_points <- readline(prompt = "What is the minumn number of data points should a user sent in total(default = 10)? Your answer: ") %>% as.integer()
+    threshold_n_locs <- readline(prompt = "What is the minumn number of unique places should data points be collected from (default = 10)? Your answer: ") %>% as.integer()
+    threshold_n_points_loc <- readline(prompt = "What is the minumn number of data points should a user sent at each place (default = 10)? Your answer: ") %>% as.integer()
   }
-  
-  # users level 
-  ## pre-condition set on users 
+
+  # users level
+  ## pre-condition set on users
   df_match_user_condition <- df %>%
-    summarise_nested(., 
-                     n_points = n(),
-                     n_locs = n_distinct({{location_expr}})) %>%
+    summarise_nested(.,
+      n_points = n(),
+      n_locs = n_distinct({{ location_expr }})
+    ) %>%
     remove_top_users(., user = user, counts = "n_points", topNpct_user = topNpct, rm_topNpct_user = rm_topNpct_user) %>% # remove top N percent active users based on frequency
     filter_verbose(., user = user, n_points > threshold_n_points & n_locs > threshold_n_locs) # each use at least has more than threshold_n_points data points sent at threshold_n_loc different locations
-  
-  # location level 
-  # pre-condition set on locations     
+
+  # location level
+  # pre-condition set on locations
   colnames_nested_data <- df_match_user_condition$data[[1]] %>% names()
   colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data == location)]
-  
+
   df_match_loc_condition <- df_match_user_condition %>%
-    summarise_double_nested(., nest_cols = colnmaes_to_nest, # summarise on location level 
-                            n_points_loc = n()) %>% 
-    filter_nested(., user = user, n_points_loc > threshold_n_points_loc)  
-  
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest, # summarise on location level
+      n_points_loc = n()
+    ) %>%
+    filter_nested(., user = user, n_points_loc > threshold_n_points_loc)
+
   # extract locations based on frequency of data points sent on locations
-  df_match_loc_condition %>% 
+  df_match_loc_condition %>%
     extract_location(., user = user, location = location, show_n_loc = show_n_loc, keep_score = keep_score, desc(n_points_loc))
 }
 
 
 #' recipe: Online Social Networks Activity - OSNA
-#' 
-#' 
+#'
+#'
 #' @param df An enriched dataframe
 #' @param user Name of column that holds unique identifier for each user
 #' @param timestamp Name of timestamp column. Should be POSIXct
@@ -248,71 +272,76 @@ recipe_FREQ <- function(df, user = "u_id", timestamp = "created_at", location =
 #' @param keep_score Option to keep or remove calculated result/score per user per location
 #' @param use_default_threshold Option to use default threshold or customized threshold
 #' @param rm_topNpct_user Option to remove or keep the top N percent active users
-#' 
+#'
 #' @importFrom rlang sym
-recipe_OSNA <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", show_n_loc, 
-                        keep_score = F, use_default_threshold = T, rm_topNpct_user = F){
+recipe_OSNA <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", show_n_loc,
+                        keep_score = F, use_default_threshold = T, rm_topNpct_user = F) {
   user_expr <- rlang::sym(user)
   timestamp_expr <- rlang::sym(timestamp)
   location_expr <- rlang::sym(location)
-  
+
   # use_default_threshold <- readline(prompt = "Do you want to use the default thresholds? (Yes/No): ")
-  if(use_default_threshold){
+  if (use_default_threshold) {
     topNpct <- 1
     threshold_n_locs <- 3
-  } else{
-    topNpct <- readline(prompt="How many percentage of top active users to be removed (default = 1)? Your answer: ") %>% as.integer()
-    threshold_n_locs <- readline(prompt="What is the minumn number of unique places should data points be collected from (default = 3)? Your answer: ") %>% as.integer()
+  } else {
+    topNpct <- readline(prompt = "How many percentage of top active users to be removed (default = 1)? Your answer: ") %>% as.integer()
+    threshold_n_locs <- readline(prompt = "What is the minumn number of unique places should data points be collected from (default = 3)? Your answer: ") %>% as.integer()
   }
-  # users level 
-  ## pre-condition set on users 
+  # users level
+  ## pre-condition set on users
   df_match_user_condition <- df %>%
-    summarise_nested(., 
-                     n_points = n(),
-                     n_locs = n_distinct({{location_expr}})) %>% 
+    summarise_nested(.,
+      n_points = n(),
+      n_locs = n_distinct({{ location_expr }})
+    ) %>%
     remove_top_users(., user = user, counts = "n_points", topNpct_user = topNpct, rm_topNpct_user = rm_topNpct_user) %>% # remove top N percent active users based on frequency
-    filter_verbose(., user = user, n_locs > threshold_n_locs) %>% # remove users with data at less than N places 
-    filter_nested(., user = user, !wday %in% c(1, 7)) %>%  # remove data sent on weekend, 1 for Sunday and 7 for Saturday 
-    mutate_nested(timeframe = if_else(hour >= 2 & hour < 8, "Rest", if_else(hour >= 8 & hour < 19, "Active", "Leisure"))) %>% # add time frame column 
+    filter_verbose(., user = user, n_locs > threshold_n_locs) %>% # remove users with data at less than N places
+    filter_nested(., user = user, !wday %in% c(1, 7)) %>% # remove data sent on weekend, 1 for Sunday and 7 for Saturday
+    mutate_nested(timeframe = if_else(hour >= 2 & hour < 8, "Rest", if_else(hour >= 8 & hour < 19, "Active", "Leisure"))) %>% # add time frame column
     filter_nested(., user = user, timeframe != "Active") # home location is focused on Rest and Leisure time frame
-  
-  ## calculate weighted score of data points sent during Leisure and Rest time in different day at different places 
+
+  ## calculate weighted score of data points sent during Leisure and Rest time in different day at different places
   colnames_nested_data <- df_match_user_condition$data[[1]] %>% names()
-  colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data %in% c(location, "ymd", "timeframe"))] # per location per day per timeframe 
-  
+  colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data %in% c(location, "ymd", "timeframe"))] # per location per day per timeframe
+
   df_timeframe_nested <- df_match_user_condition %>%
-    summarise_double_nested(., nest_cols = colnmaes_to_nest, 
-                            n_points_timeframe = n()) %>%  # number of data points at the timeframe 
-    spread_nested(., key_var = "timeframe", value_var = "n_points_timeframe") %>% # spread the timeframe to columns 
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest,
+      n_points_timeframe = n()
+    ) %>% # number of data points at the timeframe
+    spread_nested(., key_var = "timeframe", value_var = "n_points_timeframe") %>% # spread the timeframe to columns
     unnest_verbose() %>% # unnest the result, missed column will be automatically added with NA value
-    replace(., is.na(.), 0)  # replace NA with 0
-  
-  # give the weigh to Rest and Leisure time 
+    replace(., is.na(.), 0) # replace NA with 0
+
+  # give the weigh to Rest and Leisure time
   weight_rest <- mean(0.744, 0.735, 0.737)
   weight_leisure <- mean(0.362, 0.357, 0.354)
-  
-  df_weighted <- df_timeframe_nested %>% 
-    mutate_verbose(score_ymd_loc = weight_rest * Rest + weight_leisure * Leisure) 
-  
-  # nest by user 
-  df_user_nested <- df_weighted %>% nest_verbose(-u_id) 
-  
-  # calculate the score per location 
+
+  df_weighted <- df_timeframe_nested %>%
+    mutate_verbose(score_ymd_loc = weight_rest * Rest + weight_leisure * Leisure)
+
+  # nest by user
+  df_user_nested <- df_weighted %>% nest_verbose(-u_id)
+
+  # calculate the score per location
   colnames_nested_data <- df_user_nested$data[[1]] %>% names()
-  colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data == location)] # nest by location 
-  
-  df_scored <- df_user_nested %>% 
-    summarise_double_nested(., nest_cols = colnmaes_to_nest, 
-                            score = sum(score_ymd_loc))
-  
-  # extract location based on score value 
+  colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data == location)] # nest by location
+
+  df_scored <- df_user_nested %>%
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest,
+      score = sum(score_ymd_loc)
+    )
+
+  # extract location based on score value
   extract_location(df_scored, user = user, location = location, show_n_loc = show_n_loc, keep_score = keep_score, desc(score))
 }
 
 
 
 #' recipe: Anchor Point Determining Model - APDM
-#' 
+#'
 #' @param df An enriched dataframe
 #' @param user Name of column that holds unique identifier for each user
 #' @param timestamp Name of timestamp column. Should be POSIXct
@@ -320,12 +349,12 @@ recipe_OSNA <- function(df, user = "u_id", timestamp = "created_at", location =
 #' @param show_n_loc Number of potential homes to extract
 #' @param keep_score Option to keep or remove calculated result/score per user per location
 #' @param use_default_threshold Option to use default threshold or customized threshold
-#' 
+#'
 #' @importFrom rlang sym
 #' @importFrom rlang has_name
 #' @importFrom emo ji
 #' @importFrom chron times
-recipe_APDM <- function(df, df_neighbors, user = "u_id", timestamp = "created_at", location = "loc_id", keep_score = F, use_default_threshold = T){
+recipe_APDM <- function(df, df_neighbors, user = "u_id", timestamp = "created_at", location = "loc_id", keep_score = F, use_default_threshold = T) {
   user_expr <- rlang::sym(user)
   timestamp_expr <- rlang::sym(timestamp)
   location_expr <- rlang::sym(location)
@@ -333,17 +362,17 @@ recipe_APDM <- function(df, df_neighbors, user = "u_id", timestamp = "created_at
   if (!rlang::has_name(df_neighbors, location)) {
     stop(paste(emo::ji("bomb"), "Location column does not exist!"))
   }
-  
+
   if (!rlang::has_name(df_neighbors, "neighbor")) {
     stop(paste(emo::ji("bomb"), "Neighbor column does not exist!"))
   }
   # use_default_threshold <- readline(prompt = "Do you want to use the default thresholds? (Yes/No): ")
-  if(use_default_threshold){
+  if (use_default_threshold) {
     threshold_n_days_per_month_loc <- 7
     threshold_n_points_per_month_loc <- 500
-  } else{
-    threshold_n_days_per_month_loc <- readline(prompt="What is the minium active days per month (default = 7)? Your answer: ") %>% as.integer()
-    threshold_n_points_per_month_loc <- readline(prompt="What is the maximum data points sent per month (default = 500)? Your answer: ") %>% as.integer()
+  } else {
+    threshold_n_days_per_month_loc <- readline(prompt = "What is the minium active days per month (default = 7)? Your answer: ") %>% as.integer()
+    threshold_n_points_per_month_loc <- readline(prompt = "What is the maximum data points sent per month (default = 500)? Your answer: ") %>% as.integer()
   }
   ## Removal of places with too high or too low a number of data points made
   ## respondents who have data points at their top n frequently visited places fewer than 7 days a month will be removed from the database
@@ -351,93 +380,107 @@ recipe_APDM <- function(df, df_neighbors, user = "u_id", timestamp = "created_at
   colnames_nested_data <- df$data[[1]] %>% names()
   colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data %in% c(location, "year", "month"))]
   df_cleaned <- df %>%
-    summarise_double_nested(., nest_cols = colnmaes_to_nest,
-                            n_days_per_month_loc = n_distinct(day),
-                            n_points_per_month_loc = n()) %>%
-    filter_nested(., user = user,
-                  n_days_per_month_loc >= threshold_n_days_per_month_loc & n_points_per_month_loc <= threshold_n_points_per_month_loc)
-  
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest,
+      n_days_per_month_loc = n_distinct(day),
+      n_points_per_month_loc = n()
+    ) %>%
+    filter_nested(.,
+      user = user,
+      n_days_per_month_loc >= threshold_n_days_per_month_loc & n_points_per_month_loc <= threshold_n_points_per_month_loc
+    )
+
   # get the top 5 most frequently visited location
   df_user_nested <- df_cleaned %>% unnest_nested(., data)
   colnames_nested_data <- df_user_nested$data[[1]] %>% names()
   colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data == location)]
   df_top5_locs <- df_user_nested %>%
-    summarise_double_nested(., nest_cols = colnmaes_to_nest,
-                            n_days_loc = n_distinct(ymd),
-                            n_points_loc = n()) %>%
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest,
+      n_days_loc = n_distinct(ymd),
+      n_points_loc = n()
+    ) %>%
     arrange_nested(., n_days_loc, n_points_loc) %>%
-    top_n_nested(., n = 5, wt = "n_points_loc") %>% 
+    top_n_nested(., n = 5, wt = "n_points_loc") %>%
     unnest_verbose()
-  
-  # extract the start data point per day for each location 
+
+  # extract the start data point per day for each location
   colnames_nested_data <- df_top5_locs$data[[1]] %>% names()
   colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data %in% c("ymd"))] # nest to per user per location per day
   df_with_start_point <- df_top5_locs %>%
-    summarise_double_nested(., nest_cols = colnmaes_to_nest,
-                            start_point = min({{timestamp_expr}})) # get the start data point of a day 
-  
+    summarise_double_nested(.,
+      nest_cols = colnmaes_to_nest,
+      start_point = min({{ timestamp_expr }})
+    ) # get the start data point of a day
+
   # add home or work label to top 5 most frequently visit places: 1 means home and 0 means work
-  time_line <- chron::times("17:00:00") %>% as.numeric() 
-  df_with_home_type <- df_with_start_point %>% 
-    mutate_nested(start_time = format(start_point, format = "%H:%M:%S") %>% chron::times() %>% as.numeric()) %>% 
-    summarise_nested(mean_start_time = mean(start_time), 
-                     sd_start_time = sd(start_time)) %>% 
+  time_line <- chron::times("17:00:00") %>% as.numeric()
+  df_with_home_type <- df_with_start_point %>%
+    mutate_nested(start_time = format(start_point, format = "%H:%M:%S") %>% chron::times() %>% as.numeric()) %>%
+    summarise_nested(
+      mean_start_time = mean(start_time),
+      sd_start_time = sd(start_time)
+    ) %>%
     mutate_verbose(location_type = if_else(mean_start_time < time_line & sd_start_time <= 0.175, 0, 1)) %>% # 1 means home and 0 means work
     filter(location_type != 0) %>% # remove work locations
-    nest_verbose(-{{user_expr}}) %>% 
+    nest_verbose(-{{ user_expr }}) %>%
     summarise_nested(n_home = n()) # get number of returned home locations
-  
-  ## for one home users, directly extract the location as their homes 
+
+  ## for one home users, directly extract the location as their homes
   df_one_home <- df_with_home_type %>% filter(n_home == 1)
-  
-  output <- extract_location(df_one_home %>% dplyr::select(-n_home), user = user, 
-                             location = location, show_n_loc = 1, keep_score = keep_score, desc(n_days_loc), desc(n_points_loc))
-  
+
+  output <- extract_location(df_one_home %>% dplyr::select(-n_home),
+    user = user,
+    location = location, show_n_loc = 1, keep_score = keep_score, desc(n_days_loc), desc(n_points_loc)
+  )
+
   ## for multiple home users, take the top 2 most frequently visit places and do the comparision
-  df_multiple_homes <- df_with_home_type %>% 
-    filter(n_home > 1) %>% 
+  df_multiple_homes <- df_with_home_type %>%
+    filter(n_home > 1) %>%
     arrange_nested(., desc(n_days_loc), desc(n_points_loc)) # order the location by number of days and number of data points in descending order
-  
-  
-  decide_home <- function(data){
-    homes <- data %>% pull({{location_expr}})
+
+
+  decide_home <- function(data) {
+    homes <- data %>% pull({{ location_expr }})
     home1 <- homes[1]
-    home1_neighbors <- df_neighbors %>% filter({{location_expr}} == home1) %>% pull(neighbor) %>% unlist() # get the neighboring locations of first home 
+    home1_neighbors <- df_neighbors %>%
+      filter({{ location_expr }} == home1) %>%
+      pull(neighbor) %>%
+      unlist() # get the neighboring locations of first home
     home2 <- homes[2]
     # if home 1 and home 2 are neighboring location, extract home1 (one with the higher number of days, and the higher number of data points)
-    if(home2 %in% home1_neighbors){
+    if (home2 %in% home1_neighbors) {
       return(home1)
-    } else{
+    } else {
       n_days_homes <- data %>% pull(n_days_loc)
       n_days_home1 <- n_days_homes[1]
       n_days_home2 <- n_days_homes[2]
       # if the most frequently visited location covers more than 75 percent of the days a user stayed at the two most frequently visited locations, return the place as home
-      if((n_days_home1/(n_days_home1 + n_days_home2)) > 0.75){
+      if ((n_days_home1 / (n_days_home1 + n_days_home2)) > 0.75) {
         return(home1)
-      } else{
+      } else {
         sd_homes <- data %>% pull(sd_start_time)
         sd_home1 <- sd_homes[1]
         sd_home2 <- sd_homes[2]
-        # the location with the larger standard deviation is classified as the home 
-        if(sd_home1 > sd_home2){
+        # the location with the larger standard deviation is classified as the home
+        if (sd_home1 > sd_home2) {
           return(home1)
-        } else{
+        } else {
           return(home2)
         }
       }
     }
   }
-  
-  if(keep_score){
-    df_multiple_homes %>% 
-      dplyr::select(-n_home) %>% 
-      mutate_verbose(., home = map_chr(data, function(x) decide_home(x))) %>% 
+
+  if (keep_score) {
+    df_multiple_homes %>%
+      dplyr::select(-n_home) %>%
+      mutate_verbose(., home = map_chr(data, function(x) decide_home(x))) %>%
       bind_rows(., output)
-  } else{
-    df_multiple_homes %>% 
-      mutate_verbose(., home = map_chr(data, function(x) decide_home(x))) %>% 
-      dplyr::select(u_id, home) %>% 
+  } else {
+    df_multiple_homes %>%
+      mutate_verbose(., home = map_chr(data, function(x) decide_home(x))) %>%
+      dplyr::select(u_id, home) %>%
       bind_rows(., output)
   }
-  
 }
diff --git a/R/mutate.R b/R/mutate.R
index bc8fbde..66f4509 100644
--- a/R/mutate.R
+++ b/R/mutate.R
@@ -1,107 +1,106 @@
-#' Add new variable 
-#' 
-#' Add new variable and perserves existing to dataframe 
-#' 
-#' @param df A dataframe 
+#' Add new variable
+#'
+#' Add new variable and perserves existing to dataframe
+#'
+#' @param df A dataframe
 #' @param ... Name-value pairs of expressions
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr setdiff
-#' 
-#' 
+#'
+#'
 #' @export
-mutate_verbose <- function(df, ...){
+mutate_verbose <- function(df, ...) {
   if (!is.data.frame(df)) {
     stop(paste(emo::ji("bomb"), "Dataset is not a dataframe!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start adding..."))
   start.time <- Sys.time()
   output <- df %>% mutate(!!!var_expr)
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- names(df)
   colnames_new <- names(output)
-  colnames_added <- dplyr::setdiff(colnames_new, colnames_original)   
-  
+  colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
+
   message(paste(emo::ji("white_check_mark"), "Finish adding! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Adding time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Adding time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 #' Add new variables within a nested dataframe
-#' 
+#'
 #' Add new variables and preserves existing within a nested dataframe
-#' @param df A nested dataframe 
+#' @param df A nested dataframe
 #' @param ... Name-value pairs of expressions
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom purrr map
 #' @importFrom dplyr setdiff
 
 #' @export
-mutate_nested <- function(df, ...){
-  
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+mutate_nested <- function(df, ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Dataset is not nested!"))
   }
-  
+
   var_expr <- enquos(...)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
 
-  add_with_progress <- function(data){
+  add_with_progress <- function(data) {
     pb$tick()$print()
-    data %>% 
+    data %>%
       mutate(!!!var_expr)
   }
-  
+
   # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
-  
+
+
   message(paste(emo::ji("hammer_and_wrench"), "Start adding variable(s)..."))
   start.time <- Sys.time()
   output <- df %>%
-    mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~add_with_progress(.))) 
+    mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ add_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- df[[colname_nested_data]][[1]] %>% names()
   colnames_new <- output[[colname_nested_data]][[1]] %>% names()
-  colnames_added <- dplyr::setdiff(colnames_new, colnames_original)   
-  
+  colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish adding! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Adding time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Adding time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 #' Calculate the proportion of categories for a variable within a nested dataframe
-#' 
+#'
 #' Calculate the proportion of categories for a variable within a nested dataframe and convert each categories to a new variable adding to the dataframe
-#' @param df A nested dataframe 
-#' @param var Name of column to calculate 
-#' 
-#' 
+#' @param df A nested dataframe
+#' @param var Name of column to calculate
+#'
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr select
 #' @importFrom dplyr summarise
@@ -110,130 +109,116 @@ mutate_nested <- function(df, ...){
 #' @importFrom purrr map
 #' @importFrom dplyr setdiff
 
-#' @export 
-prop_factor_nested <- function(df, ...){
-  
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+#' @export
+prop_factor_nested <- function(df, ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Dataset is not nested!"))
   }
-  
+
   var_expr <- enquos(...)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
 
-  add_with_progress <- function(data){
+  add_with_progress <- function(data) {
     pb$tick()$print()
-    data %>% 
-      dplyr::select(!!!var_expr) %>% 
-      rownames_to_column(var = "id") %>% 
-      gather(key = "key", value = "value", -id) %>% 
-      group_by(key, value) %>% 
-      dplyr::summarise(n = n()) %>% 
-      group_by(key) %>% 
-      mutate(total = sum(n), 
-             prop = n/total) %>% 
-      ungroup() %>% 
-      dplyr::select(value, prop) %>% 
+    data %>%
+      dplyr::select(!!!var_expr) %>%
+      rownames_to_column(var = "id") %>%
+      gather(key = "key", value = "value", -id) %>%
+      group_by(key, value) %>%
+      dplyr::summarise(n = n()) %>%
+      group_by(key) %>%
+      mutate(
+        total = sum(n),
+        prop = n / total
+      ) %>%
+      ungroup() %>%
+      dplyr::select(value, prop) %>%
       spread(value, prop)
   }
 
-  #create the progress bar
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start calculating proportion..."))
   start.time <- Sys.time()
-  output <- df %>% 
-    dplyr::bind_cols(do.call(dplyr::bind_rows, purrr::map(df[[colname_nested_data]], ~add_with_progress(.)))) %>% 
+  output <- df %>%
+    dplyr::bind_cols(do.call(dplyr::bind_rows, purrr::map(df[[colname_nested_data]], ~ add_with_progress(.)))) %>%
     replace(., is.na(.), 0)
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- names(df)
   colnames_new <- names(output)
   colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
-  
-  
+
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish calculating! There are", length(colnames_added), "new calculated variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Calculating time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Calculating time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 #' Add new variable within a double-nested dataframe
-#' 
+#'
 #' Add new variable within a double-nested dataframe
-#' @param df A dataframe 
+#' @param df A dataframe
 #' @param nest_cols A selection of columns to nest in existing list-column
 #' @param ... Name-value pairs of functions
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom purrr map
-#' 
+#'
 #' @export
-mutate_double_nested <- function(df, nest_cols, ...){
-
-  if(nrow(df) == 0){
+mutate_double_nested <- function(df, nest_cols, ...) {
+  if (nrow(df) == 0) {
     stop(paste(emo::ji("bomb"), "No user left, tune your threshold and try again."))
   }
-  
+
   stopifnot(
-    is.list(df[ , grepl("^data$", names(df))])
+    is.list(df[, grepl("^data$", names(df))])
   )
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
 
   add_variable <- . %>% mutate(!!!var_expr)
-  add_column <- . %>% 
-    mutate(data = purrr::map(data, add_variable)) 
-  
-  
-  # double nest 
-  df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~.x %>% nest(data = nest_cols))
-  
+  add_column <- . %>%
+    mutate(data = purrr::map(data, add_variable))
+
+
+  # double nest
+  df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~ .x %>% nest(data = nest_cols))
+
   message(paste(emo::ji("hammer_and_wrench"), "Start adding values..."))
   start.time <- Sys.time()
-  output <- df %>% 
-    mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], add_column))
+  output <- df %>%
+    mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], add_column))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- df[[colname_nested_data]][[1]] %>% names()
   colnames_new <- output[[colname_nested_data]][[1]] %>% names()
   colnames_new <- colnames_new[-which(colnames_new == "data")]
   colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
-  
+
   message(paste(emo::ji("white_check_mark"), "Finish adding! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Adding time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Adding time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/R/nest.R b/R/nest.R
index 69741b3..993e208 100644
--- a/R/nest.R
+++ b/R/nest.R
@@ -1,245 +1,230 @@
-#' Nest dataframe 
-#' 
+#' Nest dataframe
+#'
 #' Nesting creates a list-column of dataframe
-#' @param df A dataframe 
-#' @param ... A selection of columns. 
-#' 
-#' 
+#' @param df A dataframe
+#' @param ... A selection of columns.
+#'
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr progress_estimated
-#' @importFrom purrr map 
-#' 
+#' @importFrom purrr map
+#'
 #' @export
-nest_verbose <- function(df, ...){
-  
+nest_verbose <- function(df, ...) {
   if (!is.data.frame(df)) {
     stop(paste(emo::ji("bomb"), "Dataset is not a dataframe!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start nesting..."))
   start.time <- Sys.time()
   output <- df %>% nest_legacy(!!!var_expr)
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message(paste(emo::ji("white_check_mark"), "Finish nesting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Nesting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Nesting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 #' Unnest dataframe
-#' 
+#'
 #' Unnesting makes each element of the list its own row.
 #' @param df A dataframe
-#' @param ... Specification of columns to unnest. 
-#' 
+#' @param ... Specification of columns to unnest.
+#'
 #' @importFrom emo ji
 #' @export
-unnest_verbose <- function(df, ...){
-  
+unnest_verbose <- function(df, ...) {
   if (!is.data.frame(df)) {
     stop(paste(emo::ji("bomb"), "Dataset is not a dataframe!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start unnesting..."))
   start.time <- Sys.time()
   output <- suppressWarnings(
-    df %>% unnest_legacy(!!!var_expr))
+    df %>% unnest_legacy(!!!var_expr)
+  )
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message(paste(emo::ji("white_check_mark"), "Finish unnesting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Unnesting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Unnesting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
-#' Unnest within a nested dataframe 
-#' 
+#' Unnest within a nested dataframe
+#'
 #' Flatten list-column to regular columns inside a nested dataframe
-#' 
+#'
 #' @param df A dataframe
-#' @param ... Specification of columns to unnest 
-#' 
+#' @param ... Specification of columns to unnest
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr progress_estimated
 #' @importFrom purrr map
-#' 
+#'
 #' @export
-unnest_nested <- function(df, ...){
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+unnest_nested <- function(df, ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Error: Dataset is not nested!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  
-  unnest_with_progress <- function(data){
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+
+  unnest_with_progress <- function(data) {
     pb$tick()$print()
     suppressWarnings(
       data %>% unnest_legacy(!!!var_expr)
     )
   }
-  #create the progress bar
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start unnesting..."))
   start.time <- Sys.time()
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~unnest_with_progress(.)))
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ unnest_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish unnesting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Unnesting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Unnesting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 
-#' Nest within a nested dataframe 
-#' 
+#' Nest within a nested dataframe
+#'
 #' Double nesting creates a list-column of nested dataframe
-#' @param df A nested dataframe 
-#' @param ... A selection of columns. 
-#' 
+#' @param df A nested dataframe
+#' @param ... A selection of columns.
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr progress_estimated
 #' @importFrom purrr map
 #' @export
-nest_nested <- function(df, ...){
-  
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+nest_nested <- function(df, ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Error: Dataset is not nested!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
 
-  nest_with_progress <- function(data){
+  nest_with_progress <- function(data) {
     pb$tick()$print()
     suppressWarnings(
       data %>% nest_legacy(!!!var_expr)
-      )
+    )
   }
-  
-  #create the progress bar
+
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
-  
+
+
   message(paste(emo::ji("hammer_and_wrench"), "Start nesting..."))
   start.time <- Sys.time()
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~nest_with_progress(.)))
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ nest_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish nesting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Nesting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Nesting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 
 #' Unnest within a double-nested dataframe
-#' 
+#'
 #' Unnesting makes each element of the list its own row.
 #' @param df A nested dataframe
-#' @param ... Specification of columns to unnest. 
-#' 
+#' @param ... Specification of columns to unnest.
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr progress_estimated
 #' @importFrom purrr map
-#' 
-#' @export 
-unnest_double_nested <- function(df, ...){
-  
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+#'
+#' @export
+unnest_double_nested <- function(df, ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Error: Dataset is not nested!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  unnest_with_progress <- function(data){
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  unnest_with_progress <- function(data) {
     pb$tick()$print()
     suppressWarnings(
       data %>% unnest_legacy(!!!var_expr)
     )
   }
-  #create the progress bar
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start unnesting..."))
   start.time <- Sys.time()
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~unnest_with_progress(.))) %>% 
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ unnest_with_progress(.))) %>%
     unnest_legacy()
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish unnesting!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Unnesting time:", time.taken, "mins"))
-  }else{
+  } else {
     message(paste(emo::ji("hourglass"), "Unnesting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/R/remove_top_users.R b/R/remove_top_users.R
index b4f7a9d..8e8bcd6 100644
--- a/R/remove_top_users.R
+++ b/R/remove_top_users.R
@@ -1,63 +1,64 @@
 #' Remove top N percent of active users based on the frequency of data points per user
-#' 
-#' Remove top N percent of active users based on the frequency of data points per user. 
-#' Although the majority of users are real people, some accounts are run by algorithms or 'bots', whereas others can be considered as spam accounts. 
-#' Removing a certain top N percent of active users is an oft-used approach to remove such accounts and reduce the number of such users in the final dataset. 
-#' 
-#' @param df A dataframe with columns for the user id, counts point per user 
+#'
+#' Remove top N percent of active users based on the frequency of data points per user.
+#' Although the majority of users are real people, some accounts are run by algorithms or 'bots', whereas others can be considered as spam accounts.
+#' Removing a certain top N percent of active users is an oft-used approach to remove such accounts and reduce the number of such users in the final dataset.
+#'
+#' @param df A dataframe with columns for the user id, counts point per user
 #' @param user Name of column that holds unique identifier for each user
-#' @param counts Name of column that holds the data points frequency for each user         
+#' @param counts Name of column that holds the data points frequency for each user
 #' @param topNpct_user A decimal number that represent the certain percentage of users to remove
-#' @param rm_topNpct_user Option to remove or keep the top N percent active users 
-#' 
+#' @param rm_topNpct_user Option to remove or keep the top N percent active users
+#'
 #' @importFrom rlang has_name
 #' @importFrom rlang sym
 #' @importFrom emo ji
 #' @importFrom dplyr n_distinct
 #' @importFrom dplyr slice
-#' 
+#'
 #' @export
-remove_top_users <- function(df, user = "u_id", counts = "n_points", topNpct_user = 1, rm_topNpct_user = F){
-  
+remove_top_users <- function(df, user = "u_id", counts = "n_points", topNpct_user = 1, rm_topNpct_user = F) {
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
   if (!rlang::has_name(df, counts)) {
     stop(paste(emo::ji("bomb"), "The column of counts of data points for each user does not exist!"))
   }
-  
-  user <- rlang::sym(user) 
+
+  user <- rlang::sym(user)
   counts <- rlang::sym(counts)
-  
-  n_original_users <- df %>% pull({{user}}) %>% dplyr::n_distinct()
+
+  n_original_users <- df %>%
+    pull({{ user }}) %>%
+    dplyr::n_distinct()
   message(paste(emo::ji("bust_in_silhouette"), "There are", n_original_users, "users at this moment."))
-  
-  if(rm_topNpct_user == F){
+
+  if (rm_topNpct_user == F) {
     message(paste(emo::ji("bust_in_silhouette"), "Skip removing active users"))
-    output <- df %>% arrange(desc({{counts}}))
-  }else{
-    message(paste0(emo::ji("hammer_and_wrench"), " Start removing top ",  topNpct_user, "% top active users..."))
+    output <- df %>% arrange(desc({{ counts }}))
+  } else {
+    message(paste0(emo::ji("hammer_and_wrench"), " Start removing top ", topNpct_user, "% top active users..."))
     start.time <- Sys.time()
-    output <- df %>% 
-      arrange(desc({{counts}})) %>% 
-      dplyr::slice(round(n_original_users*(topNpct_user/100)):n()) 
+    output <- df %>%
+      arrange(desc({{ counts }})) %>%
+      dplyr::slice(round(n_original_users * (topNpct_user / 100)):n())
     end.time <- Sys.time()
-    time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-    
-    n_new_users <- output %>% pull({{user}}) %>% n_distinct()
+    time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
+    n_new_users <- output %>%
+      pull({{ user }}) %>%
+      n_distinct()
     n_removed_users <- n_original_users - n_new_users
     message(paste(emo::ji("white_check_mark"), "Finish removing! Removed", n_removed_users, "active users!"))
     message(paste(emo::ji("bust_in_silhouette"), "There are", n_new_users, "users left."))
-    
-    if(time.taken > 60){
-      time.taken <-  round(time.taken/60, 2)
+
+    if (time.taken > 60) {
+      time.taken <- round(time.taken / 60, 2)
       message(paste(emo::ji("hourglass"), "Removing time:", time.taken, "mins"))
-    } else{
+    } else {
       message(paste(emo::ji("hourglass"), "Removing time:", time.taken, "secs"))
     }
     message("\n")
   }
   return(output)
 }
-
-
diff --git a/R/score.R b/R/score.R
index ddf3a27..e5bbd06 100644
--- a/R/score.R
+++ b/R/score.R
@@ -1,152 +1,150 @@
 #' Give a weighted value for one or more variables in a nested dataframe
-#' 
+#'
 #' Give a weighted value for one or more variables in a nested dataframe
 #' @param df A nested dataframe by user
 #' @param user Name of column that holds unique identifier for each user
 #' @param location Name of column that holds unique identifier for each location
-#' @param keep_ori_vars Option to keep or drop original varialbes 
+#' @param keep_ori_vars Option to keep or drop original varialbes
 #' @param ... Name-value pairs of expression
-#' 
+#'
 #' @importFrom rlang has_name
-#' @importFrom rlang sym 
+#' @importFrom rlang sym
 #' @importFrom emo ji
-#' @importFrom dplyr select 
+#' @importFrom dplyr select
 #' @importFrom dplyr setdiff
 #' @importFrom dplyr progress_estimated
 #' @importFrom purrr map
-#' 
+#'
 #' @export
-score_nested <- function(df, user = "u_id", location = "loc_id", keep_original_vars = F, ...){
+score_nested <- function(df, user = "u_id", location = "loc_id", keep_original_vars = F, ...) {
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
-  
+
   if (!rlang::has_name(df, location)) {
     stop(paste(emo::ji("bomb"), "Location column does not exist!"))
   }
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  user <- rlang::sym(user) 
+  user <- rlang::sym(user)
   location <- rlang::sym(location)
-  
+
   df_nest <- df %>%
-    nest_legacy(-({{user}}))
-  
-  colname_nested_data <- names(df_nest[ , grepl("^data$", names(df_nest))])
-  
-  transmute_with_progress <- function(data){
+    nest_legacy(-({{ user }}))
+
+  colname_nested_data <- names(df_nest[, grepl("^data$", names(df_nest))])
+
+  transmute_with_progress <- function(data) {
     pb$tick()$print()
-    transmute_column <- data %>% 
+    transmute_column <- data %>%
       transmute(!!!var_expr)
-    
-    data %>% 
-      dplyr::select({{location}}) %>% 
+
+    data %>%
+      dplyr::select({{ location }}) %>%
       bind_cols(transmute_column)
   }
-  
-  add_with_progress <- function(data){
+
+  add_with_progress <- function(data) {
     pb$tick()$print()
-    data %>% 
+    data %>%
       dplyr::mutate(!!!var_expr)
   }
-  
-  #create the progress bar
+
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
- 
-  
+
+
   message(paste(emo::ji("hammer_and_wrench"), "Start scoring ..."))
   start.time <- Sys.time()
-  if(keep_original_vars){
-    output <- df_nest %>% 
-      dplyr::mutate({{colname_nested_data}} := purrr::map(df_nest[[colname_nested_data]], ~add_with_progress(.)))
-  }else{
-    output <- df_nest %>% 
-      dplyr::mutate({{colname_nested_data}} := purrr::map(df_nest[[colname_nested_data]], ~transmute_with_progress(.)))
+  if (keep_original_vars) {
+    output <- df_nest %>%
+      dplyr::mutate({{ colname_nested_data }} := purrr::map(df_nest[[colname_nested_data]], ~ add_with_progress(.)))
+  } else {
+    output <- df_nest %>%
+      dplyr::mutate({{ colname_nested_data }} := purrr::map(df_nest[[colname_nested_data]], ~ transmute_with_progress(.)))
   }
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- df_nest[[colname_nested_data]][[1]] %>% names()
   colnames_new <- output[[colname_nested_data]][[1]] %>% names()
   colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish scoring! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <-  round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Scoring time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Scoring time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 #' Summarises all scored columns and return one single summary score per row
-#' 
+#'
 #' summarises all scored columns and return one single summary score per row
-#' @param df A  dataframe 
+#' @param df A  dataframe
 #' @param user Name of column that holds unique identifier for each user
 #' @param location Name of column that holds unique identifier for each location
 #' @param ... A selection of columns to sum
-#' 
-#' 
+#'
+#'
 #' @importFrom rlang has_name
-#' @importFrom rlang sym 
+#' @importFrom rlang sym
 #' @importFrom emo ji
-#' @importFrom dplyr select 
+#' @importFrom dplyr select
 #' @importFrom dplyr setdiff
 #' @importFrom dplyr progress_estimated
 #' @importFrom purrr map
-#' @export 
-score_summary <- function(df, user = "u_id", location = "loc_id", ...){
+#' @export
+score_summary <- function(df, user = "u_id", location = "loc_id", ...) {
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
-  
-  user <- rlang::sym(user) 
+
+  user <- rlang::sym(user)
   location <- rlang::sym(location)
   var_expr <- enquos(...)
-  
-  colname_nested_data <- names(df[,grepl("^data$", names(df))])
- 
-  sum_score_with_progress <- function(data){
+
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  sum_score_with_progress <- function(data) {
     pb$tick()$print()
-    data_sub <- data %>% dplyr::select({{location}}, !!!var_expr) 
-    
+    data_sub <- data %>% dplyr::select({{ location }}, !!!var_expr)
+
     location_index <- which(colnames(data_sub) == location)
-    data_sub %>% 
-      dplyr::mutate(score = rowSums(.[ , -c(location_index)]))
+    data_sub %>%
+      dplyr::mutate(score = rowSums(.[, -c(location_index)]))
   }
-   
+
   # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
-  
+
+
   message(paste(emo::ji("hammer_and_wrench"), "Start summing scores..."))
   start.time <- Sys.time()
-  output <- df %>% 
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~sum_score_with_progress(.)))
+  output <- df %>%
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ sum_score_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- df[[colname_nested_data]][[1]] %>% names()
   colnames_new <- output[[colname_nested_data]][[1]] %>% names()
   colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish summing! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <-  round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Summing time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Summing time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
diff --git a/R/spread.R b/R/spread.R
index c942818..6217862 100644
--- a/R/spread.R
+++ b/R/spread.R
@@ -1,64 +1,53 @@
 #' Spread a key-value pair across multiple columns
-#' 
-#' Spread a key-value pair across multiple columns in nested dataframe 
-#' @param df A nested dataframe 
+#'
+#' Spread a key-value pair across multiple columns in nested dataframe
+#' @param df A nested dataframe
 #' @param var_key Column name or position
 #' @param var_value Column name or position
-#' 
-#' 
+#'
+#'
 #' @importFrom rlang sym
 #' @importFrom dplyr progress_estimated
 #' @importFrom dplyr setdiff
 #' @importFrom purrr map
 #' @importFrom emo ji
 #' @export
-spread_nested <- function(df, key_var, value_var){
-  
-  key_var_expr <-  rlang::sym(key_var)
-  value_var_expr <-  rlang::sym(value_var)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
+spread_nested <- function(df, key_var, value_var) {
+  key_var_expr <- rlang::sym(key_var)
+  value_var_expr <- rlang::sym(value_var)
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
 
-  spread_with_progress <- function(data){
+  spread_with_progress <- function(data) {
     pb$tick()$print()
     data %>%
-      spread(key = {{key_var_expr}}, value = {{value_var_expr}}) %>%
+      spread(key = {{ key_var_expr }}, value = {{ value_var_expr }}) %>%
       replace(., is.na(.), 0)
   }
 
-  #create the progress bar
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
+
   message(paste(emo::ji("hammer_and_wrench"), "Start spreading", key_var, "variable..."))
   start.time <- Sys.time()
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~spread_with_progress(.)))
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ spread_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- names(df[[colname_nested_data]][[1]])
   colnames_new <- names(output[[colname_nested_data]][[1]])
-  colnames_added <- dplyr::setdiff(colnames_new, colnames_original) 
-  
+  colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish spreading! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <-  round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Spreading time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Spreading time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
-
-
-
-
-
-
-
-
diff --git a/R/summarise.R b/R/summarise.R
index 647b249..2f93d45 100644
--- a/R/summarise.R
+++ b/R/summarise.R
@@ -1,24 +1,24 @@
 #' Aggregate multiple values to a single value within a nested tibble
-#' 
+#'
 #' Aggregate multiple values to a single value of existing table by name-value paired summary function within a nested tibble
-#' @param df A nested dataframe 
+#' @param df A nested dataframe
 #' @param ... Name-value pairs of summary functions.
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr progress_estimated
 #' @importFrom dplyr setdiff
 #' @importFrom purrr map
-#' 
+#'
 #' @export
-summarise_nested <- function(df, ...){
-  if(!is.list(df[ , grepl("^data$", names(df))])){
+summarise_nested <- function(df, ...) {
+  if (!is.list(df[, grepl("^data$", names(df))])) {
     stop(paste(emo::ji("bomb"), "Dataset is not nested!"))
   }
 
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  summarise_with_progress <- function(data){
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  summarise_with_progress <- function(data) {
     pb$tick()$print()
     data %>% dplyr::summarise(!!!var_expr)
   }
@@ -28,88 +28,84 @@ summarise_nested <- function(df, ...){
   message(paste(emo::ji("hammer_and_wrench"), "Start summarising values..."))
   start.time <- Sys.time()
   output <- df %>%
-    dplyr::mutate(adds = purrr::map(df[[colname_nested_data]], ~summarise_with_progress(.))) %>%
+    dplyr::mutate(adds = purrr::map(df[[colname_nested_data]], ~ summarise_with_progress(.))) %>%
     unnest_legacy(adds)
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- names(df)
   colnames_new <- names(output)
-  colnames_added <- dplyr::setdiff(colnames_new, colnames_original) 
-  
+  colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish summarising! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Summarising time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Summarising time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
 
 
 #' Aggregate multiple values to a single value in a double-nested tibble
-#' 
+#'
 #' Create a list-column in existing list-column and aggregate multiple values in created list-column to a single value by name-value paired summary function
-#' @param df A nested dataframe 
+#' @param df A nested dataframe
 #' @param nest_cols A selection of columns to nest in existing list-column
 #' @param ... Name-value pairs of summary functions
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom dplyr setdiff
 #' @importFrom purrr map
-#' 
-#' @export 
-summarise_double_nested <- function(df, nest_cols, ...){
-  
-  if(nrow(df) == 0){
+#'
+#' @export
+summarise_double_nested <- function(df, nest_cols, ...) {
+  if (nrow(df) == 0) {
     stop(paste(emo::ji("bomb"), "No user left, tune your threshold and try again."))
   }
-  
+
   stopifnot(
-    is.list(df[ , grepl("^data$", names(df))])
+    is.list(df[, grepl("^data$", names(df))])
   )
-  
+
   var_expr <- enquos(..., .named = TRUE)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
   cal_column <- . %>% dplyr::summarise(!!!var_expr)
-  
-  add_column <- . %>% 
-      dplyr::mutate(adds = purrr::map(data, cal_column)) %>% 
-      unnest_legacy(adds) 
-  
-  # double nest 
-  df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~.x %>% nest(data = nest_cols))
-  
+
+  add_column <- . %>%
+    dplyr::mutate(adds = purrr::map(data, cal_column)) %>%
+    unnest_legacy(adds)
+
+  # double nest
+  df[[colname_nested_data]] <- purrr::map(df[[colname_nested_data]], ~ .x %>% nest(data = nest_cols))
+
   message(paste(emo::ji("hammer_and_wrench"), "Start summarising values..."))
   start.time <- Sys.time()
-  output <- df %>% 
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], add_column))
+  output <- df %>%
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], add_column))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   colnames_original <- df[[colname_nested_data]][[1]] %>% names()
   colnames_new <- output[[colname_nested_data]][[1]] %>% names()
   colnames_new <- colnames_new[-which(colnames_new == "data")]
   colnames_added <- dplyr::setdiff(colnames_new, colnames_original)
-  
+
   message(paste(emo::ji("white_check_mark"), "Finish summarising! There are", length(colnames_added), "new added variables:", paste(colnames_added, collapse = ", ")))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Summarising time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Summarising time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
-
diff --git a/R/topN.R b/R/topN.R
index 28c26f3..f066582 100644
--- a/R/topN.R
+++ b/R/topN.R
@@ -1,51 +1,44 @@
-#' Select top n rows by certain value 
-#' 
+#' Select top n rows by certain value
+#'
 #' Select top n rows in each group, ordered by wt within a nested dataframe
-#' @param df A nested dataframe 
-#' @param n Number of rows to return 
-#' @param wt The variable to use for ordering 
-#' 
+#' @param df A nested dataframe
+#' @param n Number of rows to return
+#' @param wt The variable to use for ordering
+#'
 #' @importFrom dplyr progress_estimated
 #' @importFrom rlang sym
 #' @importFrom emo ji
 #' @export
-top_n_nested <- function(df, n = 2, wt){
+top_n_nested <- function(df, n = 2, wt) {
   wt_expr <- rlang::sym(wt)
-  colname_nested_data <- names(df[ , grepl("^data$", names(df))])
-  
-  top_n_with_progress <- function(data){
+  colname_nested_data <- names(df[, grepl("^data$", names(df))])
+
+  top_n_with_progress <- function(data) {
     pb$tick()$print()
-    data %>% 
-      top_n(n = n, wt = {{wt_expr}})
+    data %>%
+      top_n(n = n, wt = {{ wt_expr }})
   }
-  
-  #create the progress bar
+
+  # create the progress bar
   pb <- dplyr::progress_estimated(nrow(df))
-  
-  message(paste(emo::ji("hammer_and_wrench"), "Start selecting top", n, "row(s)...")) 
+
+  message(paste(emo::ji("hammer_and_wrench"), "Start selecting top", n, "row(s)..."))
   start.time <- Sys.time()
   output <- df %>%
-    dplyr::mutate({{colname_nested_data}} := purrr::map(df[[colname_nested_data]], ~top_n_with_progress(.))) 
+    dplyr::mutate({{ colname_nested_data }} := purrr::map(df[[colname_nested_data]], ~ top_n_with_progress(.)))
   end.time <- Sys.time()
-  time.taken <-  difftime(end.time, start.time, units = "secs") %>% round(., 3)
-  
+  time.taken <- difftime(end.time, start.time, units = "secs") %>% round(., 3)
+
   message("\n")
   message(paste(emo::ji("white_check_mark"), "Finish selecting top", n, "row(s)!"))
-  
-  if(time.taken > 60){
-    time.taken <- round(time.taken/60, 2)
+
+  if (time.taken > 60) {
+    time.taken <- round(time.taken / 60, 2)
     message(paste(emo::ji("hourglass"), "Selecting time:", time.taken, "mins"))
-  } else{
+  } else {
     message(paste(emo::ji("hourglass"), "Selecting time:", time.taken, "secs"))
   }
   message("\n")
-  
+
   return(output)
 }
-
-
-
-
-
-
-
diff --git a/R/validate_dataset.R b/R/validate_dataset.R
index 7865aaa..577174a 100644
--- a/R/validate_dataset.R
+++ b/R/validate_dataset.R
@@ -1,23 +1,23 @@
-#' Validate input dataset 
-#' 
-#' To make sure the input dataset contains all three necessary variables: 
+#' Validate input dataset
+#'
+#' To make sure the input dataset contains all three necessary variables:
 #' a unique identifier for the person or user
-#' a unique identifier for the spatial locaiton for the data point 
-#' and a timestamp that relects the time the data point was created 
-#' 
+#' a unique identifier for the spatial locaiton for the data point
+#' and a timestamp that relects the time the data point was created
+#'
 #' @param df A dataframe with columns for the user id, location, timestamp
 #' @param user Name of column that holds unique identifier for each user
 #' @param timestamp Name of column that holds specific timestamp for each data point and it should be POSIXct
 #' @param location Name of column that holds unique identifier for each location
 #' @param keep_other_vars Option to keep or remove other variables of the input dataset
-#' 
+#'
 #' @importFrom emo ji
 #' @importFrom rlang sym
 #' @importFrom rlang has_name
 #' @importFrom dplyr select
-#' 
+#'
 #' @export
-validate_dataset <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", keep_other_vars = F){
+validate_dataset <- function(df, user = "u_id", timestamp = "created_at", location = "loc_id", keep_other_vars = F) {
   if (!rlang::has_name(df, user)) {
     stop(paste(emo::ji("bomb"), "User column does not exist!"))
   }
@@ -36,26 +36,22 @@ validate_dataset <- function(df, user = "u_id", timestamp = "created_at", locati
     stop(paste(emo::ji("bomb"), "Dataset is not a dataframe!"))
   }
 
-  if (!is(df %>% pull({{timestamp}}), "POSIXct")) {
+  if (!is(df %>% pull({{ timestamp }}), "POSIXct")) {
     stop("Timestamp is not of class POSIXct")
   }
 
-  unique_users <- df %>% pull({{user}}) %>% n_distinct()
+  unique_users <- df %>%
+    pull({{ user }}) %>%
+    n_distinct()
   message(paste(emo::ji("tada"), "Congratulations!! Your dataset has passed validation."))
   message(paste(emo::ji("bust_in_silhouette"), "There are", unique_users, "unique users in your dataset."))
   message(paste(emo::ji("earth_asia"), "Now start your journey identifying their meaningful location(s)!"))
   message(paste(emo::ji("clap"), "Good luck!"))
   message("\n")
 
-  if(keep_other_vars) {
+  if (keep_other_vars) {
     df
   } else {
-    df %>% dplyr::select(c({{user}}, {{location}}, {{timestamp}}))
+    df %>% dplyr::select(c({{ user }}, {{ location }}, {{ timestamp }}))
   }
 }
-
-
-
-
-
-
diff --git a/README.Rmd b/README.Rmd
index 872e9f6..57822a9 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -52,7 +52,7 @@ library(here)
 
 
 ```{r validate}
-# load test sample dataset 
+# load test sample dataset
 data("test_sample", package = "homelocator")
 df_validated <- validate_dataset(test_sample, user = "u_id", timestamp = "created_at", location = "grid_id")
 head(df_validated)
@@ -103,15 +103,19 @@ identify_location(test_sample, user = "u_id", timestamp = "created_at", location
 #### FREQ
 ```{r eval=F}
 # recipe: Frequency -- FREQ
-identify_location(test_sample, user = "u_id", timestamp = "created_at", location = "grid_id", 
-                  show_n_loc = 1, recipe = "FREQ")
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at", location = "grid_id",
+  show_n_loc = 1, recipe = "FREQ"
+)
 ```
 
 #### OSNA
 ```{r eval=F}
 # recipe: Online Social Network Activity -- OSNA
-identify_location(test_sample, user = "u_id", timestamp = "created_at", location = "grid_id", 
-                  show_n_loc = 1, recipe = "OSNA")
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at", location = "grid_id",
+  show_n_loc = 1, recipe = "OSNA"
+)
 ```
 
 #### APDM
@@ -120,10 +124,12 @@ identify_location(test_sample, user = "u_id", timestamp = "created_at", location
 ## APDM recipe strictly returns the most likely home location
 ## It is important to create your location neighbors table before you use the recipe!!
 ## example: st_queen <- function(a, b = a) st_relate(a, b, pattern = "F***T****")
-##          neighbors <- st_queen(df_sf) ===> convert result to dataframe 
+##          neighbors <- st_queen(df_sf) ===> convert result to dataframe
 data("df_neighbors", package = "homelocator")
-identify_location(test_sample, user = "u_id", timestamp = "created_at", location = "grid_id", 
-                  show_n_loc = 1, recipe = "APDM")
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at", location = "grid_id",
+  show_n_loc = 1, recipe = "APDM"
+)
 ```
 
 
diff --git a/data-raw/df_neighbors.R b/data-raw/df_neighbors.R
index a4f29cb..47b6332 100644
--- a/data-raw/df_neighbors.R
+++ b/data-raw/df_neighbors.R
@@ -2,20 +2,20 @@ library(tidyverse)
 library(sf)
 library(here)
 
-#generate grid neighbors 
+# generate grid neighbors
 grids <- st_read(here("data-raw/MP14_SUBZONE_NO_SEA_PL.shp"), quiet = T) %>%
-  st_transform(crs = 3414) %>% 
-  st_make_valid() %>% 
-  st_make_grid(., cellsize = 750, square = F) %>% 
-  st_sf() %>% 
+  st_transform(crs = 3414) %>%
+  st_make_valid() %>%
+  st_make_grid(., cellsize = 750, square = F) %>%
+  st_sf() %>%
   rowid_to_column("grid_id")
 
 st_queen <- function(a, b = a) st_relate(a, b, pattern = "F***T****")
 neighbors <- st_queen(grids)
 
-#convert list to tibble
-list_to_tibble <- function(index, neighbors){
-  tibble(grid_id = as.character(index)) %>% 
+# convert list to tibble
+list_to_tibble <- function(index, neighbors) {
+  tibble(grid_id = as.character(index)) %>%
     mutate(neighbor = list(neighbors[[index]]))
 }
 df_neighbors <- do.call(rbind, map(1:length(neighbors), function(x) list_to_tibble(x, neighbors)))
@@ -24,29 +24,21 @@ df_neighbors <- do.call(rbind, map(1:length(neighbors), function(x) list_to_tibb
 save(df_neighbors, file = "data/df_neighbors.rda")
 
 
-# generate sample file 
+# generate sample file
 df <- readRDS("~/GitProjects/identifyingmeaningfullocations/analysis/data/derived_data/sg_tweets_anonymized_20200929.rds")
 
-df_nested <- df %>% 
-  dplyr::select(u_id_anonymized, created_at_sg_anonymized, grid_id) %>% 
-  dplyr::rename(u_id = u_id_anonymized, created_at = created_at_sg_anonymized) %>% 
+df_nested <- df %>%
+  dplyr::select(u_id_anonymized, created_at_sg_anonymized, grid_id) %>%
+  dplyr::rename(u_id = u_id_anonymized, created_at = created_at_sg_anonymized) %>%
   nest(data = c(created_at, grid_id))
 
 set.seed(1234)
-test_sample <- df_nested %>% 
-  sample_n(size = 100) %>% 
-  unnest(cols = data) %>% 
+test_sample <- df_nested %>%
+  sample_n(size = 100) %>%
+  unnest(cols = data) %>%
   sample_n(size = nrow(.))
 
 save(test_sample, file = here("data/test_sample.rda"))
 
 
 load("data/test_sample.rda")
-
-
-
-
-
-
-
-
diff --git a/vignettes/homelocator.Rmd b/vignettes/homelocator.Rmd
index 34add21..1e087a9 100644
--- a/vignettes/homelocator.Rmd
+++ b/vignettes/homelocator.Rmd
@@ -45,7 +45,7 @@ To explore the basic data manipulation functions of `homelocator`, we'll use a `
 # load data
 data("test_sample", package = "homelocator")
 
-# show test sample 
+# show test sample
 test_sample %>% head(3)
 ```
 
@@ -68,7 +68,7 @@ The `validate_dataset()` function makes sure your input dataset contains all thr
 ```{r}
 df_validated <- validate_dataset(test_sample, user = "u_id", timestamp = "created_at", location = "grid_id", keep_other_vars = FALSE)
 
-# show result 
+# show result
 df_validated %>% head(3)
 ```
 
@@ -85,16 +85,16 @@ The `nest_verbose()` and `unnest_verbose()` functions work in the same way as  [
 # nest data
 df_nested <- nest_verbose(df_validated, c("created_at", "grid_id"))
 
-# show result 
+# show result
 df_nested %>% head(3)
 
-# show result 
+# show result
 df_nested$data[[1]] %>% head(3)
 
 # unnest data
 df_unnested <- unnest_verbose(df_nested, data)
 
-# show result 
+# show result
 df_unnested %>% head(3)
 ```
 
@@ -111,20 +111,20 @@ The `nest_double_nest()` and `unnest_double_nested()` functions work in a simila
 # double nest data (e.g., nesting column: created_at)
 df_double_nested <- nest_nested(df_nested %>% head(100), c("created_at"))
 
-# show result 
+# show result
 df_double_nested %>% head(3)
 
-# show result 
+# show result
 df_double_nested$data[[1]] %>% head(3)
 
 df_double_nested$data[[1]]$data[[1]] %>% head(3)
 
-# unnest nested data 
-df_double_unnested <- df_double_nested %>% 
-  head(100) %>%  ## take first 100 rows for example
-  unnest_double_nested(., data) 
+# unnest nested data
+df_double_unnested <- df_double_nested %>%
+  head(100) %>% ## take first 100 rows for example
+  unnest_double_nested(., data)
 
-# show result 
+# show result
 df_double_unnested %>% head(3)
 ```
 
@@ -138,14 +138,18 @@ The `enrich_timestamp()` function creates additional variables that are derived
 
 
 ```{r}
-#original variables 
-df_nested[1, ] %>% unnest(cols = c(data)) %>% head(3)
+# original variables
+df_nested[1, ] %>%
+  unnest(cols = c(data)) %>%
+  head(3)
 
-# create new variables from "created_at" timestamp column 
+# create new variables from "created_at" timestamp column
 df_enriched <- enrich_timestamp(df_nested, timestamp = "created_at")
 
-# show result 
-df_enriched[1, ] %>% unnest(cols = c(data)) %>% head(3)
+# show result
+df_enriched[1, ] %>%
+  unnest(cols = c(data)) %>%
+  head(3)
 ```
 
 ### Summarize in nested and double nested dataframe 
@@ -157,23 +161,25 @@ The `summarise_nested()` function  works similar to dplyr's regular `summarise`
   - `...`: name-value pairs of summary functions 
 
 ```{r}
-# summarize in nested dataframe 
+# summarize in nested dataframe
 # e.g., summarise total number of tweets and total number of places per user
-df_summarize_nested <- summarise_nested(df_enriched, 
-                                        n_tweets = n(),
-                                        n_locs = n_distinct(grid_id))
+df_summarize_nested <- summarise_nested(df_enriched,
+  n_tweets = n(),
+  n_locs = n_distinct(grid_id)
+)
 
-# show result 
+# show result
 df_summarize_nested %>% head(3)
 
-# summarize in double nested dataframe 
+# summarize in double nested dataframe
 # take first 100 users for example
-# e.g summarise total number of tweets and totla number of distinct days 
-df_summarize_double_nested <- summarise_double_nested(df_enriched %>% head(100), 
-                    nest_cols = c("created_at", "ymd", "year", "month", "day", "wday", "hour"), 
-                    n_tweets = n(), n_days = n_distinct(ymd))
+# e.g summarise total number of tweets and totla number of distinct days
+df_summarize_double_nested <- summarise_double_nested(df_enriched %>% head(100),
+  nest_cols = c("created_at", "ymd", "year", "month", "day", "wday", "hour"),
+  n_tweets = n(), n_days = n_distinct(ymd)
+)
 
-# show result 
+# show result
 df_summarize_double_nested[1, ]
 df_summarize_double_nested[1, ]$data[[1]] %>% head(3)
 ```
@@ -189,10 +195,12 @@ The `remove_top_users()` function allows to remove top `N` percent of active use
 
 ```{r}
 # remove top 1% active users (e.g based on the frequency of tweets sent by users)
-df_removed_active_users <- remove_top_users(df_summarize_nested, user = "u_id", 
-                                            counts = "n_tweets", topNpct_user = 1) 
+df_removed_active_users <- remove_top_users(df_summarize_nested,
+  user = "u_id",
+  counts = "n_tweets", topNpct_user = 1
+)
 
-# show result 
+# show result
 df_removed_active_users %>% head(3)
 ```
 
@@ -206,19 +214,23 @@ The `filter_verbose()` function works in the same way as [filter](https://www.rd
   - `...`: Logical predicates defined in terms of the variables in df. Only rows match conditions are kept.
 
 ```{r}
-# filter users with less than 10 tweets sent at less than 10 places 
-df_filtered <- filter_verbose(df_removed_active_users, user = "u_id", 
-                              n_tweets > 10 & n_locs > 10)
+# filter users with less than 10 tweets sent at less than 10 places
+df_filtered <- filter_verbose(df_removed_active_users,
+  user = "u_id",
+  n_tweets > 10 & n_locs > 10
+)
 
-# show result 
+# show result
 df_filtered %>% head(3)
 
 # filter tweets that sent on weekends and during daytime (8am to 6pm)
-df_filter_nested <- filter_nested(df_filtered, user = "u_id", 
-                                  !wday %in% c(1, 7), # 1 means Sunday and 7 means Saturday
-                                  !hour %in% seq(8, 18, 1)) 
+df_filter_nested <- filter_nested(df_filtered,
+  user = "u_id",
+  !wday %in% c(1, 7), # 1 means Sunday and 7 means Saturday
+  !hour %in% seq(8, 18, 1)
+)
 
-# show result 
+# show result
 df_filter_nested %>% head(3)
 
 df_filter_nested$data[[1]] %>% head(3)
@@ -238,31 +250,37 @@ Function `mutate_nested()` works in the similar way as `mutate_verbose()` but it
   
   
 ```{r}
-## let's use pre-discussed functions to filter some users first 
+## let's use pre-discussed functions to filter some users first
 colnmaes_data <- df_filtered$data[[1]] %>% names()
-colnmaes_to_nest <- colnmaes_data[-which(colnmaes_data == "grid_id")] 
-
-df_cleaned <- df_filtered %>% 
-  summarise_double_nested(., nest_cols = colnmaes_to_nest,
-                          n_tweets_loc = n(), # number of tweets sent at each location
-                          n_hrs_loc = n_distinct(hour), # number of unique hours of sent tweets 
-                          n_days_loc = n_distinct(ymd), # number of unique days of sent tweets 
-                          period_loc = as.numeric(max(created_at) - min(created_at), "days")) %>% # period of tweeting 
-  unnest_verbose(data) %>% 
-  filter_verbose(., user = "u_id",
-                 n_tweets_loc > 10 & n_hrs_loc > 10 & n_days_loc > 10 & period_loc > 10) 
-  
-# show cleaned dataset 
+colnmaes_to_nest <- colnmaes_data[-which(colnmaes_data == "grid_id")]
+
+df_cleaned <- df_filtered %>%
+  summarise_double_nested(.,
+    nest_cols = colnmaes_to_nest,
+    n_tweets_loc = n(), # number of tweets sent at each location
+    n_hrs_loc = n_distinct(hour), # number of unique hours of sent tweets
+    n_days_loc = n_distinct(ymd), # number of unique days of sent tweets
+    period_loc = as.numeric(max(created_at) - min(created_at), "days")
+  ) %>% # period of tweeting
+  unnest_verbose(data) %>%
+  filter_verbose(.,
+    user = "u_id",
+    n_tweets_loc > 10 & n_hrs_loc > 10 & n_days_loc > 10 & period_loc > 10
+  )
+
+# show cleaned dataset
 df_cleaned %>% head(3)
 
 # ok, then let's apply the mutate_nested function to add four new variables: wd_or_wk, time_numeric, rest_or_work, wk.am_or_wk.pm
-df_expanded <- df_cleaned %>% 
-  mutate_nested(wd_or_wk = if_else(wday %in% c(1,7), "weekend", "weekday"),
-                time_numeric = lubridate::hour(created_at) + lubridate::minute(created_at) / 60 + lubridate::second(created_at) / 3600, 
-                rest_or_work = if_else(time_numeric >= 9 & time_numeric <= 18, "work", "rest"), 
-                wk.am_or_wk.pm = if_else(time_numeric >= 6 & time_numeric <= 12 & wd_or_wk == "weekend", "weekend_am", "weekend_pm")) 
-
-# show result 
+df_expanded <- df_cleaned %>%
+  mutate_nested(
+    wd_or_wk = if_else(wday %in% c(1, 7), "weekend", "weekday"),
+    time_numeric = lubridate::hour(created_at) + lubridate::minute(created_at) / 60 + lubridate::second(created_at) / 3600,
+    rest_or_work = if_else(time_numeric >= 9 & time_numeric <= 18, "work", "rest"),
+    wk.am_or_wk.pm = if_else(time_numeric >= 6 & time_numeric <= 12 & wd_or_wk == "weekend", "weekend_am", "weekend_pm")
+  )
+
+# show result
 df_expanded %>% head(3)
 ```
 
@@ -277,10 +295,10 @@ The function `prop_factor_nested()` allows you to calculate the proportion of ca
 df_expanded$data[[1]] %>% head(3)
 
 # calculate proportion of categories for four variables: wd_or_wk, rest_or_work, wk.am_or_wk.pm
-df_expanded <- df_expanded %>% 
-  prop_factor_nested(wd_or_wk, rest_or_work, wk.am_or_wk.pm) 
+df_expanded <- df_expanded %>%
+  prop_factor_nested(wd_or_wk, rest_or_work, wk.am_or_wk.pm)
 
-# show result 
+# show result
 df_expanded %>% head(3)
 ```
 
@@ -303,41 +321,47 @@ The `score_summary()` function summarises all scored columns and return one sing
 
 
 ```{r}
-## let's add two more variables before we do the scoring 
-df_expanded <- df_expanded %>% 
-  summarise_nested(n_wdays_loc = n_distinct(wday),
-                   n_months_loc = n_distinct(month))
+## let's add two more variables before we do the scoring
+df_expanded <- df_expanded %>%
+  summarise_nested(
+    n_wdays_loc = n_distinct(wday),
+    n_months_loc = n_distinct(month)
+  )
 
 df_expanded %>% head(3)
 
 # when calculating scores, you can give weight to different variables, but the total weight should add up to 1
-df_scored <- df_expanded %>% 
-  score_nested(., user = "u_id", location = "grid_id", keep_original_vars = F,
-               s_n_tweets_loc = 0.1 * (n_tweets_loc/max(n_tweets_loc)),
-               s_n_hrs_loc = 0.1 * (n_hrs_loc/24), 
-               s_n_days_loc = 0.1 * (n_days_loc/max(n_days_loc)),
-               s_period_loc = 0.1 * (period_loc/max(period_loc)),
-               s_n_wdays_loc = 0.1 * (n_wdays_loc/7),
-               s_n_months_loc = 0.1 * (n_months_loc/12),
-               s_weekend = 0.1 * (weekend),
-               s_rest = 0.2 * (rest),
-               s_wk_am = 0.1 * (weekend_am))
+df_scored <- df_expanded %>%
+  score_nested(.,
+    user = "u_id", location = "grid_id", keep_original_vars = F,
+    s_n_tweets_loc = 0.1 * (n_tweets_loc / max(n_tweets_loc)),
+    s_n_hrs_loc = 0.1 * (n_hrs_loc / 24),
+    s_n_days_loc = 0.1 * (n_days_loc / max(n_days_loc)),
+    s_period_loc = 0.1 * (period_loc / max(period_loc)),
+    s_n_wdays_loc = 0.1 * (n_wdays_loc / 7),
+    s_n_months_loc = 0.1 * (n_months_loc / 12),
+    s_weekend = 0.1 * (weekend),
+    s_rest = 0.2 * (rest),
+    s_wk_am = 0.1 * (weekend_am)
+  )
 
 df_scored %>% head(3)
 df_scored$data[[1]]
 
-### we can replace the score function by mutate function 
-df_scored_2 <- df_expanded %>% 
-  nest_verbose(-u_id) %>% 
-  mutate_nested(s_n_tweets_loc = 0.1 * (n_tweets_loc/max(n_tweets_loc)),
-                s_n_hrs_loc = 0.1 * (n_hrs_loc/24), 
-                s_n_days_loc = 0.1 * (n_days_loc/max(n_days_loc)),
-                s_period_loc = 0.1 * (period_loc/max(period_loc)),
-                s_n_wdays_loc = 0.1 * (n_wdays_loc/7),
-                s_n_months_loc = 0.1 * (n_months_loc/12),
-                s_weekend = 0.1 * (weekend),
-                s_rest = 0.2 * (rest),
-                s_wk_am = 0.1 * (weekend_am))
+### we can replace the score function by mutate function
+df_scored_2 <- df_expanded %>%
+  nest_verbose(-u_id) %>%
+  mutate_nested(
+    s_n_tweets_loc = 0.1 * (n_tweets_loc / max(n_tweets_loc)),
+    s_n_hrs_loc = 0.1 * (n_hrs_loc / 24),
+    s_n_days_loc = 0.1 * (n_days_loc / max(n_days_loc)),
+    s_period_loc = 0.1 * (period_loc / max(period_loc)),
+    s_n_wdays_loc = 0.1 * (n_wdays_loc / 7),
+    s_n_months_loc = 0.1 * (n_months_loc / 12),
+    s_weekend = 0.1 * (weekend),
+    s_rest = 0.2 * (rest),
+    s_wk_am = 0.1 * (weekend_am)
+  )
 
 df_scored_2 %>% head(3)
 df_scored_2$data[[1]]
@@ -345,8 +369,8 @@ df_scored_2$data[[1]]
 
 
 ```{r}
-# sum varialbes score for each location 
-df_score_summed <- df_scored %>% 
+# sum varialbes score for each location
+df_score_summed <- df_scored %>%
   score_summary(., user = "u_id", location = "grid_id", starts_with("s_"))
 
 df_score_summed %>% head(3)
@@ -367,13 +391,13 @@ The `extract_location()` function allows you to sort the locations of each each
 
 ```{r}
 # extract homes for users based on score value (each user return 1 most possible home)
-df_home <- df_score_summed %>% 
+df_home <- df_score_summed %>%
   extract_location(., user = "u_id", location = "grid_id", show_n_loc = 1, keep_score = F, score)
 
 df_home %>% head(3)
 
 # extract homes for users and keep scores of locations
-df_home <- df_score_summed %>% 
+df_home <- df_score_summed %>%
   extract_location(., user = "u_id", location = "grid_id", show_n_loc = 1, keep_score = T, score)
 
 df_home %>% head(3)
@@ -391,7 +415,7 @@ The `spread_nested()` function works in the same way as [spread](https://www.rdo
 
 ```{r}
 # let's add one timeframe variable first and calculate the number of data points at each timeframe
-df_timeframe <- df_enriched %>% 
+df_timeframe <- df_enriched %>%
   mutate_nested(timeframe = if_else(hour >= 2 & hour < 8, "Rest", if_else(hour >= 8 & hour < 19, "Active", "Leisure")))
 
 colnames_nested_data <- df_timeframe$data[[1]] %>% names()
@@ -399,13 +423,15 @@ colnames_to_nest <- colnames_nested_data[-which(colnames_nested_data %in% c("gri
 
 df_timeframe <- df_timeframe %>%
   head(20) %>% # take first 20 users as example
-  summarise_double_nested(., nest_cols = colnames_to_nest, 
-                            n_points_timeframe = n()) 
+  summarise_double_nested(.,
+    nest_cols = colnames_to_nest,
+    n_points_timeframe = n()
+  )
 df_timeframe$data[[2]]
 
 # spread timeframe in nested dataframe with key is timeframe and value is n_points_timeframe
-df_timeframe_spreaded <- df_timeframe %>% 
-    spread_nested(., key_var = "timeframe", value_var = "n_points_timeframe") 
+df_timeframe_spreaded <- df_timeframe %>%
+  spread_nested(., key_var = "timeframe", value_var = "n_points_timeframe")
 
 df_timeframe_spreaded$data[[2]]
 ```
@@ -419,9 +445,9 @@ The `arrange_nested()` function works in the same way as [arrange](https://www.r
   - `...`: comma separated list of unquoted variable names
 
 ```{r}
-df_enriched$data[[3]]  
+df_enriched$data[[3]]
 
-df_arranged <- df_enriched %>% 
+df_arranged <- df_enriched %>%
   arrange_nested(desc(hour)) # arrange the hour in descending order
 
 df_arranged$data[[3]]
@@ -435,23 +461,25 @@ The `arrange_double_nested()` function works in the similar way as `arrange_nest
 
 
 ```{r}
-# get the name of columns to nest 
+# get the name of columns to nest
 colnames_nested_data <- df_enriched$data[[1]] %>% names()
 colnmaes_to_nest <- colnames_nested_data[-which(colnames_nested_data %in% c("grid_id"))]
 
-df_double_arranged <- df_enriched %>% 
+df_double_arranged <- df_enriched %>%
   head(20) %>% # take the first 20 users for example
-  arrange_double_nested(., nest_cols = colnmaes_to_nest, 
-                        desc(created_at)) # sort by time in descending order
+  arrange_double_nested(.,
+    nest_cols = colnmaes_to_nest,
+    desc(created_at)
+  ) # sort by time in descending order
 
 
 # original third user
 df_enriched[3, ]
-# third user data points 
+# third user data points
 df_enriched$data[[3]]
 # arranged third user
 df_double_arranged[3, ]
-# arranged time 
+# arranged time
 df_double_arranged$data[[3]]$data[[2]]
 ```
 
@@ -463,8 +491,8 @@ The `top_n_nested()` function works in the same way as [top_n](https://dplyr.tid
 ```{r}
 df_enriched$data[[2]]
 
-## get the top 1 row based on hour 
-df_top_1 <- df_enriched %>% 
+## get the top 1 row based on hour
+df_top_1 <- df_enriched %>%
   top_n_nested(., n = 1, wt = "hour")
 
 df_top_1$data[[2]]
@@ -494,23 +522,31 @@ Current available recipes:
 
 ```{r eval = FALSE}
 # recipe: homelocator -- HMLC
-identify_location(test_sample, user = "u_id", timestamp = "created_at", 
-                  location = "grid_id", show_n_loc = 1, recipe = "HMLC")
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at",
+  location = "grid_id", show_n_loc = 1, recipe = "HMLC"
+)
 
 # recipe: Frequency -- FREQ
-identify_location(test_sample, user = "u_id", timestamp = "created_at", 
-                  location = "grid_id", show_n_loc = 1, recipe = "FREQ")
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at",
+  location = "grid_id", show_n_loc = 1, recipe = "FREQ"
+)
 
 # recipe: Online Social Network Activity -- OSNA
-identify_location(test_sample, user = "u_id", timestamp = "created_at", 
-                  location = "grid_id", show_n_loc = 1, recipe = "OSNA")
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at",
+  location = "grid_id", show_n_loc = 1, recipe = "OSNA"
+)
 
 # recipe: Online Social Network Activity -- APDM
 ## APDM recipe strictly returns the most likely home location
 ## It is important to load the neighbors table before you use the recipe!!
 ## example: st_queen <- function(a, b = a) st_relate(a, b, pattern = "F***T****")
-##          neighbors <- st_queen(df_sf) ===> convert result to dataframe 
+##          neighbors <- st_queen(df_sf) ===> convert result to dataframe
 data("df_neighbors", package = "homelocator")
-identify_location(test_sample, user = "u_id", timestamp = "created_at", 
-                  location = "grid_id", recipe = "APDM", keep_score = F)
+identify_location(test_sample,
+  user = "u_id", timestamp = "created_at",
+  location = "grid_id", recipe = "APDM", keep_score = F
+)
 ```
diff --git a/vignettes/quick-start.Rmd b/vignettes/quick-start.Rmd
index ccf9455..3f31c3e 100644
--- a/vignettes/quick-start.Rmd
+++ b/vignettes/quick-start.Rmd
@@ -27,7 +27,7 @@ install_github("spatialnetworkslab/homelocator")
 ## Load library
 
 ```{r setup}
-#load homelocator package 
+# load homelocator package
 library(homelocator)
 ```
 
@@ -98,7 +98,7 @@ Calculates the average and standard deviation of start time data points by a sin
 ## APDM recipe strictly returns the most likely home location
 ## It is important to load the neighbors table before you use the recipe!!
 ## example: st_queen <- function(a, b = a) st_relate(a, b, pattern = "F***T****")
-##          neighbors <- st_queen(df_sf) ===> convert result to dataframe 
+##          neighbors <- st_queen(df_sf) ===> convert result to dataframe
 data("df_neighbors", package = "homelocator")
 identify_location(test_sample, user = "u_id", timestamp = "created_at", location = "grid_id", recipe = "APDM", keep_score = F)
 ```