Skip to content

Commit

Permalink
Rename 'split.data.by.bins' to 'split.datafame.by.bins'
Browse files Browse the repository at this point in the history
This renaming makes sense as the method only splits dataframes. Rename
'split.data.by.bins.vector' to 'split.data.by.bins' as it is more readable
and easier to understand.

This works towards #239.

Signed-off-by: Maximilian Löffler <[email protected]>
  • Loading branch information
maxloeffler committed Oct 18, 2023
1 parent 48ef4fa commit ed5feb2
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
8 changes: 4 additions & 4 deletions tests/test-split-misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ if (!dir.exists(CF.DATA)) CF.DATA = file.path(".", "tests", "codeface-data")
## Split raw data (data and networks by bins) ------------------------------

##
## Tests for split.data.by.bins and split.network.by.bins
## Tests for split.dataframe.by.bins and split.network.by.bins
##

test_that("Split network and data on low level (split.data.by.bins, split.network.by.bins).", {
test_that("Split network and data on low level (split.dataframe.by.bins, split.network.by.bins).", {

length.dates = 15
length.bins = 5
Expand All @@ -69,7 +69,7 @@ test_that("Split network and data on low level (split.data.by.bins, split.networ
## sprintf("c(\"%s\")", paste( sample(bins, size = length.dates, replace = TRUE), collapse = "', '") )

##
## split.data.by.bins
## split.dataframe.by.bins
##

## generate data frame with dates and IDs
Expand All @@ -86,7 +86,7 @@ test_that("Split network and data on low level (split.data.by.bins, split.networ
"4" = df[ c(4, 11, 13), ],
"5" = df[ c(3, 10, 15), ]
)
results = split.data.by.bins(df, bins.vector)
results = split.dataframe.by.bins(df, bins.vector)

## check result
expect_equal(results, expected, info = "Split data by bins.")
Expand Down
2 changes: 1 addition & 1 deletion util-misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ get.data.from.range = function(range, data) {

## split data by this bin; this gives a list of three data frames, "0" contains the data before the range, "1" the
## data within the range and "2" the holds the data after the range
split.data = split.data.by.bins(data, df.bins)
split.data = split.dataframe.by.bins(data, df.bins)

## look for the element with name "1", as we are interested in the data within the range
## if there is no data, return an empty data frame corresponding to the data we want to cut
Expand Down
18 changes: 9 additions & 9 deletions util-split.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ split.data.time.based = function(project.data, time.period = "3 months", bins =
#' @return the list of RangeData objects, each referring to one bin
#'
#' @seealso split.get.bins.activity.based
split.data.by.bins.vector = function(project.data, activity.amount, bins, split.basis = c("commits", "mails", "issues"),
split.data.by.bins = function(project.data, activity.amount, bins, split.basis = c("commits", "mails", "issues"),
sliding.window) {
split = split.data.by.time.or.bins(project.data, activity.amount, bins, split.by.time = FALSE,
sliding.window = sliding.window, split.basis = split.basis)
Expand All @@ -94,9 +94,9 @@ split.data.by.bins.vector = function(project.data, activity.amount, bins, split.
#'
#' @param project.data the *Data object from which the data is retrieved
#' @param splitting.length either \code{time.period} from \code{split.data.time.based}
#' or \code{activity.amount} from\code{split.data.by.bins.vector}
#' or \code{activity.amount} from\code{split.data.by.bins}
#' @param bins either formatted as the \code{bins} parameter of \code{split.data.time.based}
#' or as the \code{bins} parameter of \code{split.data.by.bins.vector}
#' or as the \code{bins} parameter of \code{split.data.by.bins}
#' @param split.by.time logical indicating whether splitting is done time-based or activity-bins-based
#' @param number.windows see \code{number.windows} from \code{split.data.time.by.bins.vector}
#' [default: NULL]
Expand All @@ -111,7 +111,7 @@ split.data.by.bins.vector = function(project.data, activity.amount, bins, split.
#' @return the list of RangeData objects, each referring to one time period
#'
#' @seealso split.data.time.based
#' @seealso split.data.by.bins.vector
#' @seealso split.data.by.bins
split.data.by.time.or.bins = function(project.data, splitting.length, bins, split.by.time,
number.windows = NULL, split.basis = c("commits", "mails", "issues"),
sliding.window = FALSE, project.conf.new = NULL) {
Expand Down Expand Up @@ -435,7 +435,7 @@ split.data.activity.based = function(project.data, activity.type = c("commits",

## split the data based on the extracted timestamps
logging::logdebug("Splitting data based on time windows arising from activity bins.")
cf.data = split.data.by.bins.vector(project.data, bins = bins.data, activity.amount = activity.amount,
cf.data = split.data.by.bins(project.data, bins = bins.data, activity.amount = activity.amount,
sliding.window = sliding.window, split.basis = activity.type)

## perform additional steps for sliding-window approach:
Expand Down Expand Up @@ -1015,16 +1015,16 @@ split.network.time.based.by.ranges = function(network, ranges, remove.isolates =
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Split raw data ----------------------------------------------------------

#' Split the given data by the given bins.
#' Split the given datafame by the given bins.
#'
#' @param df a data.frame to be split
#' @param bins a vector with the length of 'nrow(df)' assigning a bin for each row of 'df'
#'
#' @return a list of data.frames, with the length of 'unique(bins)'
split.data.by.bins = function(df, bins) {
logging::logdebug("split.data.by.bins: starting.")
split.dataframe.by.bins = function(df, bins) {
logging::logdebug("split.dataframe.by.bins: starting.")
df.split = split(df, bins)
logging::logdebug("split.data.by.bins: finished.")
logging::logdebug("split.dataframe.by.bins: finished.")
return(df.split)
}

Expand Down

0 comments on commit ed5feb2

Please sign in to comment.