Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.02' into remove-meth…
Browse files Browse the repository at this point in the history
…od-parameter-from-merge-and-join
  • Loading branch information
bdice committed Jan 5, 2022
2 parents bfaf321 + eba4f03 commit 9590998
Show file tree
Hide file tree
Showing 59 changed files with 1,925 additions and 1,565 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ git submodule update --init --remote --recursive
```bash
# create the conda environment (assuming in base `cudf` directory)
# note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead
conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.0.yml
conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.5.yml
# activate the environment
conda activate cudf_dev
```
Expand Down
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ add_library(
src/lists/lists_column_factories.cu
src/lists/lists_column_view.cu
src/lists/segmented_sort.cu
src/lists/sequences.cu
src/merge/merge.cu
src/partitioning/partitioning.cu
src/partitioning/round_robin.cu
Expand Down Expand Up @@ -416,7 +417,8 @@ add_library(
src/strings/copying/concatenate.cu
src/strings/copying/copying.cu
src/strings/copying/shift.cu
src/strings/extract.cu
src/strings/extract/extract.cu
src/strings/extract/extract_all.cu
src/strings/filling/fill.cu
src/strings/filter_chars.cu
src/strings/findall.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
cuco 0.0
GLOBAL_TARGETS cuco::cuco
CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
GIT_TAG 6433e8ad7571f14cc5384051b049029c60dd1ce0
GIT_TAG 193de1aa74f5721717f991ca757dc610c852bb17
OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
)

Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_thrust.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,6 @@ function(find_and_configure_thrust VERSION)
endif()
endfunction()

set(CUDF_MIN_VERSION_Thrust 1.12.0)
set(CUDF_MIN_VERSION_Thrust 1.15.0)

find_and_configure_thrust(${CUDF_MIN_VERSION_Thrust})
278 changes: 32 additions & 246 deletions cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,280 +285,66 @@ std::unique_ptr<cudf::column> extract_quarter(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group

/**
* @brief Round up to the nearest day
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_day(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest hour
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_hour(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest minute
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_minute(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest second
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> ceil_second(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest millisecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest microsecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round up to the nearest nanosecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> ceil_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest day
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
* @brief Fixed frequencies supported by datetime rounding functions ceil, floor, round.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_day(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
enum class rounding_frequency : int32_t {
DAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND,
MICROSECOND,
NANOSECOND
};

/**
* @brief Round down to the nearest hour
* @brief Round datetimes up to the nearest multiple of the given frequency.
*
* @param column cudf::column_view of the input datetime values
* @param column cudf::column_view of the input datetime values.
* @param freq rounding_frequency indicating the frequency to round up to.
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
* @throw cudf::logic_error if input column datatype is not TIMESTAMP.
* @return cudf::column of the same datetime resolution as the input column.
*/
std::unique_ptr<cudf::column> floor_hour(
std::unique_ptr<cudf::column> ceil_datetimes(
cudf::column_view const& column,
rounding_frequency freq,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest minute
* @brief Round datetimes down to the nearest multiple of the given frequency.
*
* @param column cudf::column_view of the input datetime values
* @param column cudf::column_view of the input datetime values.
* @param freq rounding_frequency indicating the frequency to round down to.
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
* @throw cudf::logic_error if input column datatype is not TIMESTAMP.
* @return cudf::column of the same datetime resolution as the input column.
*/
std::unique_ptr<cudf::column> floor_minute(
std::unique_ptr<cudf::column> floor_datetimes(
cudf::column_view const& column,
rounding_frequency freq,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest second
* @brief Round datetimes to the nearest multiple of the given frequency.
*
* @param column cudf::column_view of the input datetime values
* @param column cudf::column_view of the input datetime values.
* @param freq rounding_frequency indicating the frequency to round to.
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
* @throw cudf::logic_error if input column datatype is not TIMESTAMP.
* @return cudf::column of the same datetime resolution as the input column.
*/
std::unique_ptr<cudf::column> floor_second(
std::unique_ptr<cudf::column> round_datetimes(
cudf::column_view const& column,
rounding_frequency freq,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest millisecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest microsecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest nanosecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest day
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> round_day(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest hour
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> round_hour(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest minute
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> round_minute(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest second
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> round_second(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest millisecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> round_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest microsecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> round_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round to the nearest nanosecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> round_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/** @} */ // end of group

} // namespace datetime
} // namespace cudf
14 changes: 6 additions & 8 deletions cpp/include/cudf/detail/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,15 @@ namespace detail {
*/
std::unique_ptr<column> hash(
table_view const& input,
hash_id hash_function = hash_id::HASH_MURMUR3,
cudf::host_span<uint32_t const> initial_hash = {},
uint32_t seed = 0,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
hash_id hash_function = hash_id::HASH_MURMUR3,
uint32_t seed = 0,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<column> murmur_hash3_32(
table_view const& input,
cudf::host_span<uint32_t const> initial_hash = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<column> md5_hash(
table_view const& input,
Expand Down
Loading

0 comments on commit 9590998

Please sign in to comment.