diff --git a/build.sh b/build.sh
index bee66d819b4..7cbd0fceb5a 100755
--- a/build.sh
+++ b/build.sh
@@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then
# Record build times
if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
- python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
- MSG="
"
+ MSG=""
# get some sccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
@@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then
BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
echo "Metrics output dir: [$BMR_DIR]"
mkdir -p ${BMR_DIR}
- python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html
+ MSG_OUTFILE="$(mktemp)"
+ echo "$MSG" > "${MSG_OUTFILE}"
+ python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
fi
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index b68c2bdbef6..bc27e7d76b0 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -14,29 +14,3 @@ rapids-logger "Begin cpp build"
rapids-mamba-retry mambabuild conda/recipes/libcudf
rapids-upload-conda-to-s3 cpp
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
-
-if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then
- ls -l ${RAPIDS_ARTIFACTS_DIR}
-fi
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
-
-FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log
-if [[ -f $FILE ]]; then
- echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m"
- UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log
- rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
-fi
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
-
-FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html
-if [[ -f $FILE ]]; then
- echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m"
- UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html
- rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
-fi
-
-echo "++++++++++++++++++++++++++++++++++++++++++++"
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
index 9d9758f1f15..0c55c4b9141 100755
--- a/ci/release/apply_wheel_modifications.sh
+++ b/ci/release/apply_wheel_modifications.sh
@@ -6,12 +6,6 @@
VERSION=${1}
CUDA_SUFFIX=${2}
-# __init__.py versions
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf/cudf/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask_cudf/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py
-
# pyproject.toml versions
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index caa807bd7ec..0b2fc71aacd 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -92,6 +92,7 @@ outputs:
- test -f $PREFIX/include/cudf/concatenate.hpp
- test -f $PREFIX/include/cudf/copying.hpp
- test -f $PREFIX/include/cudf/datetime.hpp
+ - test -f $PREFIX/include/cudf/timezone.hpp
- test -f $PREFIX/include/cudf/detail/aggregation/aggregation.hpp
- test -f $PREFIX/include/cudf/detail/aggregation/result_cache.hpp
- test -f $PREFIX/include/cudf/detail/binaryop.hpp
@@ -128,6 +129,8 @@ outputs:
- test -f $PREFIX/include/cudf/detail/stream_compaction.hpp
- test -f $PREFIX/include/cudf/detail/structs/utilities.hpp
- test -f $PREFIX/include/cudf/detail/tdigest/tdigest.hpp
+ - test -f $PREFIX/include/cudf/detail/timezone.cuh
+ - test -f $PREFIX/include/cudf/detail/timezone.hpp
- test -f $PREFIX/include/cudf/detail/transform.hpp
- test -f $PREFIX/include/cudf/detail/transpose.hpp
- test -f $PREFIX/include/cudf/detail/unary.hpp
diff --git a/conda/recipes/libcudf/post-link.sh b/conda/recipes/libcudf/post-link.sh
index 64e0b1ad305..8ae2349f791 100644
--- a/conda/recipes/libcudf/post-link.sh
+++ b/conda/recipes/libcudf/post-link.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Only add the license notice to libcudf and not our examples / tests
if [[ "$PKG_NAME" == "libcudf" ]]; then
- cat ./nvlink.txt >> $PREFIX/.messages.txt
+ cat ./nvcomp.txt >> $PREFIX/.messages.txt
fi
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0fcd1895972..13583378134 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -369,7 +369,7 @@ add_library(
src/io/orc/stripe_data.cu
src/io/orc/stripe_enc.cu
src/io/orc/stripe_init.cu
- src/io/orc/timezone.cpp
+ src/datetime/timezone.cpp
src/io/orc/writer_impl.cu
src/io/parquet/compact_protocol_reader.cpp
src/io/parquet/compact_protocol_writer.cpp
@@ -890,31 +890,20 @@ install(
EXPORT cudf-exports
)
-install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test
- ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
-
-if(CUDF_BUILD_TESTUTIL)
+set(_components_export_string)
+if(TARGET cudftestutil)
install(
TARGETS cudftest_default_stream cudftestutil
DESTINATION ${lib_dir}
EXPORT cudf-testing-exports
)
-
- install(
- EXPORT cudf-testing-exports
- FILE cudf-testing-targets.cmake
- NAMESPACE cudf::
- DESTINATION "${lib_dir}/cmake/cudf"
- )
-
- include("${rapids-cmake-dir}/export/write_dependencies.cmake")
- rapids_export_write_dependencies(
- INSTALL cudf-testing-exports
- "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake"
- )
+ set(_components_export_string COMPONENTS testing COMPONENTS_EXPORT_SET cudf-testing-exports)
endif()
+install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test
+ ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+
if(CUDF_BUILD_STREAMS_TEST_UTIL)
install(TARGETS cudf_identify_stream_usage_mode_cudf DESTINATION ${lib_dir})
install(TARGETS cudf_identify_stream_usage_mode_testing DESTINATION ${lib_dir})
@@ -976,12 +965,6 @@ string(
[=[
if(testing IN_LIST cudf_FIND_COMPONENTS)
enable_language(CUDA)
- if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake")
- include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake")
- endif()
- if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
- include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
- endif()
endif()
]=]
)
@@ -989,8 +972,8 @@ string(APPEND install_code_string "${common_code_string}")
rapids_export(
INSTALL cudf
- EXPORT_SET cudf-exports
- GLOBAL_TARGETS cudf
+ EXPORT_SET cudf-exports ${_components_export_string}
+ GLOBAL_TARGETS cudf cudftestutil
NAMESPACE cudf::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK install_code_string
@@ -1013,23 +996,13 @@ string(APPEND build_code_string "${common_code_string}")
rapids_export(
BUILD cudf
- EXPORT_SET cudf-exports
- GLOBAL_TARGETS cudf
+ EXPORT_SET cudf-exports ${_components_export_string}
+ GLOBAL_TARGETS cudf cudftestutil
NAMESPACE cudf::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK build_code_string
)
-if(CUDF_BUILD_TESTUTIL)
- export(
- EXPORT cudf-testing-exports
- FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake
- NAMESPACE cudf::
- )
- rapids_export_write_dependencies(
- BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake"
- )
-endif()
# ##################################################################################################
# * make documentation ----------------------------------------------------------------------------
diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 545028260b8..762e9640d12 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -430,8 +430,12 @@ std::unique_ptr create_random_column(data_profile const& profile,
null_mask.begin());
}
- auto [result_bitmask, null_count] = cudf::detail::valid_if(
- null_mask.begin(), null_mask.end(), thrust::identity{}, cudf::get_default_stream());
+ auto [result_bitmask, null_count] =
+ cudf::detail::valid_if(null_mask.begin(),
+ null_mask.end(),
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
return std::make_unique(
dtype,
@@ -509,8 +513,12 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons
thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
num_rows,
string_generator{chars.data(), engine});
- auto [result_bitmask, null_count] = cudf::detail::valid_if(
- null_mask.begin(), null_mask.end() - 1, thrust::identity{}, cudf::get_default_stream());
+ auto [result_bitmask, null_count] =
+ cudf::detail::valid_if(null_mask.begin(),
+ null_mask.end() - 1,
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
return cudf::make_strings_column(
num_rows,
std::move(offsets),
@@ -628,8 +636,11 @@ std::unique_ptr create_random_column(data_profi
auto [null_mask, null_count] = [&]() {
if (profile.get_null_probability().has_value()) {
auto valids = valid_dist(engine, num_rows);
- return cudf::detail::valid_if(
- valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream());
+ return cudf::detail::valid_if(valids.begin(),
+ valids.end(),
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
}
return std::pair{};
}();
@@ -712,9 +723,12 @@ std::unique_ptr create_random_column(data_profile
auto offsets_column = std::make_unique(
cudf::data_type{cudf::type_id::INT32}, num_rows + 1, offsets.release());
- auto [null_mask, null_count] = cudf::detail::valid_if(
- valids.begin(), valids.end(), thrust::identity{}, cudf::get_default_stream());
- list_column = cudf::make_lists_column(
+ auto [null_mask, null_count] = cudf::detail::valid_if(valids.begin(),
+ valids.end(),
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
+ list_column = cudf::make_lists_column(
num_rows,
std::move(offsets_column),
std::move(current_child_column),
@@ -840,7 +854,8 @@ std::pair create_random_null_mask(
return cudf::detail::valid_if(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size),
bool_generator{seed, 1.0 - *null_probability},
- cudf::get_default_stream());
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource());
}
}
diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
index 73060200d00..1b1cf9b7e9d 100644
--- a/cpp/benchmarks/iterator/iterator.cu
+++ b/cpp/benchmarks/iterator/iterator.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -140,8 +140,8 @@ void BM_iterator(benchmark::State& state)
cudf::column_view hasnull_F = wrap_hasnull_F;
// Initialize dev_result to false
- auto dev_result =
- cudf::detail::make_zeroed_device_uvector_sync(1, cudf::get_default_stream());
+ auto dev_result = cudf::detail::make_zeroed_device_uvector_sync(
+ 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
if (cub_or_thrust) {
@@ -210,7 +210,7 @@ void BM_pair_iterator(benchmark::State& state)
// Initialize dev_result to false
auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>(
- 1, cudf::get_default_stream());
+ 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
if (cub_or_thrust) {
diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp
index e37a4ca1193..70036a95377 100644
--- a/cpp/benchmarks/join/join_common.hpp
+++ b/cpp/benchmarks/join/join_common.hpp
@@ -104,8 +104,11 @@ void BM_join(state_type& state, Join JoinFunc)
// roughly 75% nulls
auto validity =
thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{});
- return cudf::detail::valid_if(
- validity, validity + size, thrust::identity{}, cudf::get_default_stream())
+ return cudf::detail::valid_if(validity,
+ validity + size,
+ thrust::identity{},
+ cudf::get_default_stream(),
+ rmm::mr::get_current_device_resource())
.first;
};
diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index a88f621095c..91c3dccfdc6 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -121,8 +121,8 @@ recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/wa
and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives."
* Prefer algorithms from STL and Thrust to raw loops.
- * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw pointers
- and raw memory allocation.
+ * Prefer libcudf and RMM [owning data structures and views](#libcudf-data-structures) to raw
+ pointers and raw memory allocation.
* libcudf doesn't have a lot of CPU-thread concurrency, but there is some. And currently libcudf
does use raw synchronization primitives. So we should revisit Parent's third rule and improve
here.
@@ -146,8 +146,8 @@ The following guidelines apply to organizing `#include` lines.
* Separate groups by a blank line.
* Order the groups from "nearest" to "farthest". In other words, local includes, then includes
from other RAPIDS libraries, then includes from related libraries, like ``, then
- includes from dependencies installed with cuDF, and then standard headers (for example ``,
- ``).
+ includes from dependencies installed with cuDF, and then standard headers (for example
+ ``, ``).
* Use `<>` instead of `""` unless the header is in the same directory as the source file.
* Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping
and brackets wrong.
@@ -271,10 +271,12 @@ A *mutable*, non-owning view of a table.
## cudf::size_type
-The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements within a column, indices to address specific elements, segments for subsets of column elements, etc.
+The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements
+within a column, indices to address specific elements, segments for subsets of column elements, etc.
It is equivalent to a signed, 32-bit integer type and therefore has a maximum value of 2147483647.
-Some APIs also accept negative index values and those functions support a minimum value of -2147483648.
-This fundamental type also influences output values not just for column size limits but for counting elements as well.
+Some APIs also accept negative index values and those functions support a minimum value of
+-2147483648. This fundamental type also influences output values not just for column size limits
+but for counting elements as well.
## Spans
@@ -343,8 +345,8 @@ auto s1 = static_cast(s.get());
```
### Passing to device
-Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which allows
-access to the value and its validity from the device. This can be obtained using the function
+Each scalar type, except `list_scalar`, has a corresponding non-owning device view class which
+allows access to the value and its validity from the device. This can be obtained using the function
`get_scalar_device_view(ScalarType s)`. Note that a device view is not provided for a base scalar
object, only for the derived typed scalar class objects.
@@ -355,68 +357,84 @@ data, a specialized device view for list columns can be constructed via
# libcudf Policies and Design Principles
-`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for solving a wide variety of problems that arise in data science.
-APIs are written to execute on the default GPU, which can be controlled by the caller through standard CUDA device APIs or environment variables like `CUDA_VISIBLE_DEVICES`.
-Our goal is to enable diverse use cases like Spark or Pandas to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark or Dask to orchestrate multi-GPU tasks.
+`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for
+solving a wide variety of problems that arise in data science. APIs are written to execute on the
+default GPU, which can be controlled by the caller through standard CUDA device APIs or environment
+variables like `CUDA_VISIBLE_DEVICES`. Our goal is to enable diverse use cases like Spark or Pandas
+to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark
+or Dask to orchestrate multi-GPU tasks.
-To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes may come at the cost of convenience.
-While we welcome users to use libcudf directly, we design with the expectation that most users will be consuming libcudf through higher-level layers like Spark or cuDF Python that handle some of details that direct users of libcudf must handle on their own.
-We document these policies and the reasons behind them here.
+To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes
+may come at the cost of convenience. While we welcome users to use libcudf directly, we design with
+the expectation that most users will be consuming libcudf through higher-level layers like Spark or
+cuDF Python that handle some of details that direct users of libcudf must handle on their own. We
+document these policies and the reasons behind them here.
## libcudf does not introspect data
libcudf APIs generally do not perform deep introspection and validation of input data.
There are numerous reasons for this:
1. It violates the single responsibility principle: validation is separate from execution.
-2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the overhead of a kernel launch, and may in general be prohibitively expensive.
+2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the
+ overhead of a kernel launch, and may in general be prohibitively expensive.
3. API promises around data introspection often significantly complicate implementation.
Users are therefore responsible for passing valid data into such APIs.
_Note that this policy does not mean that libcudf performs no validation whatsoever_.
libcudf APIs should still perform any validation that does not require introspection.
-To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples.
+To give some idea of what should or should not be validated, here are (non-exhaustive) lists of
+examples.
**Things that libcudf should validate**:
- Input column/table sizes or data types
**Things that libcudf should not validate**:
- Integer overflow
-- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of inputs
+- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of
+ inputs
## libcudf expects nested types to have sanitized null masks
-Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized.
-In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns.
+Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume
+that these columns have been sanitized. In this context, sanitization refers to ensuring that the
+null elements in a column with a nested dtype are compatible with the elements of nested columns.
Specifically:
-- Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset.
+- Null elements of list columns should also be empty. The starting offset of a null element should
+ be equal to the ending offset.
- Null elements of struct columns should also be null elements in the underlying structs.
-- For compound columns, nulls should only be present at the level of the parent column. Child columns should not contain nulls.
+- For compound columns, nulls should only be present at the level of the parent column. Child
+ columns should not contain nulls.
- Slice operations on nested columns do not propagate offsets to child columns.
-libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized data.
-Therefore, the only problem is if users construct input columns that are not correctly sanitized and then pass those into libcudf APIs.
+libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized
+data. Therefore, the only problem is if users construct input columns that are not correctly
+sanitized and then pass those into libcudf APIs.
## Treat libcudf APIs as if they were asynchronous
libcudf APIs called on the host do not guarantee that the stream is synchronized before returning.
-Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default stream (stream 0).
-Note that the stream 0 behavior differs if [per-thread default stream is enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via `CUDF_USE_PER_THREAD_DEFAULT_STREAM`.
-Any data provided to or returned by libcudf that uses a separate non-blocking stream requires synchronization with the default libcudf stream to ensure stream safety.
+Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default
+stream (stream 0). Note that the stream 0 behavior differs if [per-thread default stream is
+enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via
+`CUDF_USE_PER_THREAD_DEFAULT_STREAM`. Any data provided to or returned by libcudf that uses a
+separate non-blocking stream requires synchronization with the default libcudf stream to ensure
+stream safety.
## libcudf generally does not make ordering guarantees
-Functions like merge or groupby in libcudf make no guarantees about the order of entries in the output.
-Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
+Functions like merge or groupby in libcudf make no guarantees about the order of entries in the
+output. Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
Calling code is responsible for performing sorts after the fact if sorted outputs are needed.
## libcudf does not promise specific exception messages
-libcudf documents the exceptions that will be thrown by an API for different kinds of invalid inputs.
-The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API.
-However, the explanatory string returned by the `what` method of those exceptions is not part of the API and is subject to change.
-Calling code should not rely on the contents of libcudf error messages to determine the nature of the error.
-For information on the types of exceptions that libcudf throws under different circumstances, see the [section on error handling](#errors).
+libcudf documents the exceptions that will be thrown by an API for different kinds of invalid
+inputs. The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API.
+However, the explanatory string returned by the `what` method of those exceptions is not part of the
+API and is subject to change. Calling code should not rely on the contents of libcudf error
+messages to determine the nature of the error. For information on the types of exceptions that
+libcudf throws under different circumstances, see the [section on error handling](#errors).
# libcudf API and Implementation
@@ -475,14 +493,6 @@ asynchrony if and when we add an asynchronous API to libcudf.
**Note:** `cudaDeviceSynchronize()` should *never* be used.
This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs.
- ### NVTX Ranges
-
-In order to aid in performance optimization and debugging, all compute intensive libcudf functions
-should have a corresponding NVTX range. In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()`
-that will automatically annotate the lifetime of the enclosing function and use the function's name
-as the name of the NVTX range. For more information about NVTX, see
-[here](https://github.com/NVIDIA/NVTX/tree/dev/c).
-
### Stream Creation
There may be times in implementing libcudf features where it would be advantageous to use streams
@@ -494,8 +504,8 @@ should avoid creating streams (even if it is slightly less efficient). It is a g
## Memory Allocation
-Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control how device
-memory is allocated.
+Device [memory resources](#rmmdevice_memory_resource) are used in libcudf to abstract and control
+how device memory is allocated.
### Output Memory
@@ -515,6 +525,12 @@ std::unique_ptr returns_output_memory(
void does_not_allocate_output_memory(...);
```
+This rule automatically applies to all detail APIs that allocates memory. Any detail API may be
+called by any public API, and therefore could be allocating memory that is returned to the user.
+To support such uses cases, all detail APIs allocating memory resources should accept an `mr`
+parameter. Callers are responsible for either passing through a provided `mr` or
+`rmm::mr::get_current_device_resource()` as needed.
+
### Temporary Memory
Not all memory allocated within a libcudf API is returned to the caller. Often algorithms must
@@ -535,7 +551,7 @@ rmm::device_buffer some_function(
### Memory Management
libcudf code generally eschews raw pointers and direct memory allocation. Use RMM classes built to
-use `device_memory_resource`(*)s for device memory allocation with automated lifetime management.
+use `device_memory_resource`s for device memory allocation with automated lifetime management.
#### rmm::device_buffer
Allocates a specified number of bytes of untyped, uninitialized device memory using a
@@ -617,6 +633,32 @@ rmm::mr::device_memory_resource * mr = new my_custom_resource{...};
rmm::device_uvector v2{100, s, mr};
```
+## Default Parameters
+
+While public libcudf APIs are free to include default function parameters, detail functions should
+not. Default memory resource parameters make it easy for developers to accidentally allocate memory
+using the incorrect resource. Avoiding default memory resources forces developers to consider each
+memory allocation carefully.
+
+While streams are not currently exposed in libcudf's API, we plan to do so eventually. As a result,
+the same reasons for memory resources also apply to streams. Public APIs default to using
+`cudf::get_default_stream()`. However, including the same default in detail APIs opens the door for
+developers to forget to pass in a user-provided stream if one is passed to a public API. Forcing
+every detail API call to explicitly pass a stream is intended to prevent such mistakes.
+
+The memory resources (and eventually, the stream) are the final parameters for essentially all
+public APIs. For API consistency, the same is true throughout libcudf's internals. Therefore, a
+consequence of not allowing default streams or MRs is that no parameters in detail APIs may have
+defaults.
+
+## NVTX Ranges
+
+In order to aid in performance optimization and debugging, all compute intensive libcudf functions
+should have a corresponding NVTX range. libcudf has a convenience macro `CUDF_FUNC_RANGE()` that
+automatically annotates the lifetime of the enclosing function and uses the function's name as
+the name of the NVTX range. For more information about NVTX, see
+[here](https://github.com/NVIDIA/NVTX/tree/dev/c).
+
## Input/Output Style
The preferred style for how inputs are passed in and outputs are returned is the following:
@@ -886,9 +928,9 @@ CUDF_FAIL("This code path should not be reached.");
### CUDA Error Checking
-Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This
-macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The
-thrown exception includes a description of the CUDA error code in its `what()` message.
+Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions.
+This macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`.
+The thrown exception includes a description of the CUDA error code in its `what()` message.
Example:
@@ -1111,8 +1153,8 @@ For list columns, the parent column's type is `LIST` and contains no data, but i
the number of lists in the column, and its null mask represents the validity of each list element.
The parent has two children.
-1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each list
- in a dense column of elements.
+1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the
+ beginning of each list in a dense column of elements.
2. A column containing the actual data and optional null mask for all elements of all the lists
packed together.
@@ -1271,9 +1313,9 @@ libcudf provides view types for nested column types as well as for the data elem
`cudf::strings_column_view` is a view of a strings column, like `cudf::column_view` is a view of
any `cudf::column`. `cudf::string_view` is a view of a single string, and therefore
`cudf::string_view` is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the
-data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a read-only object
-instance that points to device memory inside the strings column. It's lifespan is the same (or less)
-as the column it views.
+data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a
+read-only object instance that points to device memory inside the strings column. It's lifespan is
+the same (or less) as the column it views.
Use the `column_device_view::element` method to access an individual row element. Like any other
column, do not call `element()` on a row that is null.
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index ac2865c05c5..5460a0e5a76 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -583,10 +583,12 @@ void gather_bitmask(table_view const& source,
std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) {
return col->mutable_view().null_mask();
});
- auto d_target_masks = make_device_uvector_async(target_masks, stream);
+ auto d_target_masks =
+ make_device_uvector_async(target_masks, stream, rmm::mr::get_current_device_resource());
auto const device_source = table_device_view::create(source, stream);
- auto d_valid_counts = make_zeroed_device_uvector_async(target.size(), stream);
+ auto d_valid_counts = make_zeroed_device_uvector_async(
+ target.size(), stream, rmm::mr::get_current_device_resource());
// Dispatch operation enum to get implementation
auto const impl = [op]() {
diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh
index cb9ced6fc28..ce2619d767e 100644
--- a/cpp/include/cudf/detail/null_mask.cuh
+++ b/cpp/include/cudf/detail/null_mask.cuh
@@ -426,7 +426,8 @@ std::vector segmented_count_bits(bitmask_type const* bitmask,
// Construct a contiguous host buffer of indices and copy to device.
auto const h_indices = std::vector(indices_begin, indices_end);
- auto const d_indices = make_device_uvector_async(h_indices, stream);
+ auto const d_indices =
+ make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource());
// Compute the bit counts over each segment.
auto first_bit_indices_begin = thrust::make_transform_iterator(
diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp
index ccffcbc61df..5ab53690a23 100644
--- a/cpp/include/cudf/detail/reshape.hpp
+++ b/cpp/include/cudf/detail/reshape.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,21 +30,19 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
-std::unique_ptr tile(
- table_view const& input,
- size_type count,
- rmm::cuda_stream_view,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr tile(table_view const& input,
+ size_type count,
+ rmm::cuda_stream_view,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::interleave_columns
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
-std::unique_ptr interleave_columns(
- table_view const& input,
- rmm::cuda_stream_view,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr interleave_columns(table_view const& input,
+ rmm::cuda_stream_view,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp
index 1e5612919f4..cdfc7caef37 100644
--- a/cpp/include/cudf/detail/round.hpp
+++ b/cpp/include/cudf/detail/round.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -31,12 +31,11 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr round(
- column_view const& input,
- int32_t decimal_places,
- rounding_method method,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr round(column_view const& input,
+ int32_t decimal_places,
+ rounding_method method,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index c8b17e22df2..dbf7bfa9527 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -386,13 +386,12 @@ struct column_scatterer_impl {
* @return Result of scattering values from source to target
*/
template
-std::unique_ptr scatter(
- table_view const& source,
- MapIterator scatter_map_begin,
- MapIterator scatter_map_end,
- table_view const& target,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr scatter(table_view const& source,
+ MapIterator scatter_map_begin,
+ MapIterator scatter_map_end,
+ table_view const& target,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 7c4b04537ea..39ae4fe1944 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -59,12 +59,11 @@ namespace detail {
* @param mr Device memory resource used to allocate the returned table's device memory
* @return Result of scattering values from source to target
*/
-std::unique_ptr scatter(
- table_view const& source,
- column_view const& scatter_map,
- table_view const& target,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr scatter(table_view const& source,
+ column_view const& scatter_map,
+ table_view const& target,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::detail::scatter(table_view const&,column_view const&,table_view
@@ -72,12 +71,11 @@ std::unique_ptr scatter(
*
* @throws cudf::logic_error if `scatter_map` span size is larger than max of `size_type`.
*/
-std::unique_ptr scatter(
- table_view const& source,
- device_span const scatter_map,
- table_view const& target,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr scatter(table_view const& source,
+ device_span const scatter_map,
+ table_view const& target,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @brief Scatters a row of scalar values into a copy of the target table
@@ -108,12 +106,11 @@ std::unique_ptr scatter(
* @param mr Device memory resource used to allocate the returned table's device memory
* @return Result of scattering values from source to target
*/
-std::unique_ptr scatter(
- std::vector> const& source,
- column_view const& indices,
- table_view const& target,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr scatter(std::vector> const& source,
+ column_view const& indices,
+ table_view const& target,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::boolean_mask_scatter(
@@ -123,12 +120,11 @@ std::unique_ptr scatter(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr boolean_mask_scatter(
- table_view const& source,
- table_view const& target,
- column_view const& boolean_mask,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr boolean_mask_scatter(table_view const& source,
+ table_view const& target,
+ column_view const& boolean_mask,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::boolean_mask_scatter(
@@ -144,7 +140,7 @@ std::unique_ptr boolean_mask_scatter(
table_view const& target,
column_view const& boolean_mask,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp
index 56d41fd635c..4c4ad7834f4 100644
--- a/cpp/include/cudf/detail/search.hpp
+++ b/cpp/include/cudf/detail/search.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -89,12 +89,11 @@ std::unique_ptr contains(column_view const& haystack,
* @param mr Device memory resource used to allocate the returned vector
* @return A vector of bools indicating if each row in `needles` has matching rows in `haystack`
*/
-rmm::device_uvector contains(
- table_view const& haystack,
- table_view const& needles,
- null_equality compare_nulls,
- nan_equality compare_nans,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+rmm::device_uvector contains(table_view const& haystack,
+ table_view const& needles,
+ null_equality compare_nulls,
+ nan_equality compare_nans,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp
index 4a9bf5c74e1..3c3d1d0ed9e 100644
--- a/cpp/include/cudf/detail/sequence.hpp
+++ b/cpp/include/cudf/detail/sequence.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,12 +32,11 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr sequence(
- size_type size,
- scalar const& init,
- scalar const& step,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr sequence(size_type size,
+ scalar const& init,
+ scalar const& step,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::sequence(size_type size, scalar const& init,
@@ -46,11 +45,10 @@ std::unique_ptr sequence(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr sequence(
- size_type size,
- scalar const& init,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr sequence(size_type size,
+ scalar const& init,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::calendrical_month_sequence(size_type size,
@@ -60,12 +58,11 @@ std::unique_ptr sequence(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr calendrical_month_sequence(
- size_type size,
- scalar const& init,
- size_type months,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr calendrical_month_sequence(size_type size,
+ scalar const& init,
+ size_type months,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/timezone.cuh b/cpp/include/cudf/detail/timezone.cuh
new file mode 100644
index 00000000000..830ee1a7fa6
--- /dev/null
+++ b/cpp/include/cudf/detail/timezone.cuh
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace cudf::detail {
+
+/**
+ * @brief Returns the UT offset for a given date and given timezone table.
+ *
+ * @param transition_times Transition times; trailing `solar_cycle_entry_count` entries are used for
+ * all times beyond the one covered by the TZif file
+ * @param offsets Time offsets in specific intervals; trailing `solar_cycle_entry_count` entries are
+ * used for all times beyond the one covered by the TZif file
+ * @param ts ORC timestamp
+ *
+ * @return offset from UT, in seconds
+ */
+inline __device__ duration_s get_ut_offset(table_device_view tz_table, timestamp_s ts)
+{
+ if (tz_table.num_rows() == 0) { return duration_s{0}; }
+
+ cudf::device_span transition_times(tz_table.column(0).head(),
+ static_cast(tz_table.num_rows()));
+
+ auto const ts_ttime_it = [&]() {
+ auto last_less_equal = [](auto begin, auto end, auto value) {
+ auto const first_larger = thrust::upper_bound(thrust::seq, begin, end, value);
+ // Return start of the range if all elements are larger than the value
+ if (first_larger == begin) return begin;
+ // Element before the first larger element is the last one less or equal
+ return first_larger - 1;
+ };
+
+ auto const file_entry_end =
+ transition_times.begin() + (transition_times.size() - solar_cycle_entry_count);
+
+ if (ts <= *(file_entry_end - 1)) {
+ // Search the file entries if the timestamp is in range
+ return last_less_equal(transition_times.begin(), file_entry_end, ts);
+ } else {
+ auto project_to_cycle = [](timestamp_s ts) {
+ // Years divisible by four are leap years
+ // Exceptions are years divisible by 100, but not divisible by 400
+ static constexpr int32_t num_leap_years_in_cycle =
+ solar_cycle_years / 4 - (solar_cycle_years / 100 - solar_cycle_years / 400);
+ static constexpr duration_s cycle_s = cuda::std::chrono::duration_cast(
+ duration_D{365 * solar_cycle_years + num_leap_years_in_cycle});
+ return timestamp_s{(ts.time_since_epoch() + cycle_s) % cycle_s};
+ };
+ // Search the 400-year cycle if outside of the file entries range
+ return last_less_equal(file_entry_end, transition_times.end(), project_to_cycle(ts));
+ }
+ }();
+
+ return tz_table.column(1).element(ts_ttime_it - transition_times.begin());
+}
+
+} // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/timezone.hpp b/cpp/include/cudf/detail/timezone.hpp
new file mode 100644
index 00000000000..f7f97c0a7c2
--- /dev/null
+++ b/cpp/include/cudf/detail/timezone.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+
+#include
+
+namespace cudf::detail {
+
+/**
+ * @copydoc cudf::make_timezone_transition_table(std::optional, std::string_view,
+ * rmm::mr::device_memory_resource*)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr make_timezone_transition_table(
+ std::optional tzif_dir,
+ std::string_view timezone_name,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+} // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp
index 8e19ebb8da7..5b64f61f11a 100644
--- a/cpp/include/cudf/detail/transform.hpp
+++ b/cpp/include/cudf/detail/transform.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,24 +29,22 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr transform(
- column_view const& input,
- std::string const& unary_udf,
- data_type output_type,
- bool is_ptx,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr transform(column_view const& input,
+ std::string const& unary_udf,
+ data_type output_type,
+ bool is_ptx,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::compute_column
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr compute_column(
- table_view const table,
- ast::operation const& expr,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr compute_column(table_view const table,
+ ast::operation const& expr,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::nans_to_nulls
@@ -54,9 +52,7 @@ std::unique_ptr compute_column(
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::pair, size_type> nans_to_nulls(
- column_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::bools_to_mask
@@ -64,9 +60,7 @@ std::pair, size_type> nans_to_nulls(
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::pair, cudf::size_type> bools_to_mask(
- column_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::encode
@@ -74,42 +68,37 @@ std::pair, cudf::size_type> bools_to_mask(
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::pair, std::unique_ptr> encode(
- cudf::table_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::one_hot_encode
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::pair, table_view> one_hot_encode(
- column_view const& input,
- column_view const& categories,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair, table_view> one_hot_encode(column_view const& input,
+ column_view const& categories,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::mask_to_bools
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr mask_to_bools(
- bitmask_type const* null_mask,
- size_type begin_bit,
- size_type end_bit,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr mask_to_bools(bitmask_type const* null_mask,
+ size_type begin_bit,
+ size_type end_bit,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::row_bit_count
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr row_bit_count(
- table_view const& t,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr row_bit_count(table_view const& t,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp
index 0470d625edc..d0be51860b2 100644
--- a/cpp/include/cudf/detail/transpose.hpp
+++ b/cpp/include/cudf/detail/transpose.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -28,10 +28,9 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::pair, table_view> transpose(
- table_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair, table_view> transpose(table_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index b7ecedc1489..3fbdf4a5a8f 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -45,13 +45,12 @@ namespace detail {
*/
template
-std::unique_ptr true_if(
- InputIterator begin,
- InputIterator end,
- size_type size,
- Predicate p,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr true_if(InputIterator begin,
+ InputIterator end,
+ size_type size,
+ Predicate p,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
auto output =
make_numeric_column(data_type(type_id::BOOL8), size, mask_state::UNALLOCATED, stream, mr);
@@ -68,52 +67,47 @@ std::unique_ptr true_if(
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr unary_operation(
- cudf::column_view const& input,
- cudf::unary_operator op,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr unary_operation(cudf::column_view const& input,
+ cudf::unary_operator op,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::is_valid
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr is_valid(
- cudf::column_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr is_valid(cudf::column_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::cast
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr cast(
- column_view const& input,
- data_type type,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr cast(column_view const& input,
+ data_type type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::is_nan
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr is_nan(
- cudf::column_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr is_nan(cudf::column_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::is_not_nan
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr is_not_nan(
- cudf::column_view const& input,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr is_not_nan(cudf::column_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index 75df0d92d0a..c446a7b5148 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -48,10 +48,9 @@ namespace detail {
* @return A device_uvector containing zeros
*/
template
-rmm::device_uvector make_zeroed_device_uvector_async(
- std::size_t size,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+rmm::device_uvector make_zeroed_device_uvector_async(std::size_t size,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
rmm::device_uvector ret(size, stream, mr);
CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value()));
@@ -70,10 +69,9 @@ rmm::device_uvector make_zeroed_device_uvector_async(
* @return A device_uvector containing zeros
*/
template
-rmm::device_uvector make_zeroed_device_uvector_sync(
- std::size_t size,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+rmm::device_uvector make_zeroed_device_uvector_sync(std::size_t size,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
rmm::device_uvector ret(size, stream, mr);
CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value()));
@@ -94,10 +92,9 @@ rmm::device_uvector make_zeroed_device_uvector_sync(
* @return A device_uvector containing the copied data
*/
template
-rmm::device_uvector make_device_uvector_async(
- host_span source_data,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+rmm::device_uvector make_device_uvector_async(host_span source_data,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
rmm::device_uvector ret(source_data.size(), stream, mr);
CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(),
@@ -126,9 +123,7 @@ template <
std::enable_if_t<
std::is_convertible_v>>* = nullptr>
rmm::device_uvector make_device_uvector_async(
- Container const& c,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return make_device_uvector_async(host_span{c}, stream, mr);
}
@@ -146,10 +141,9 @@ rmm::device_uvector make_device_uvector_async(
* @return A device_uvector containing the copied data
*/
template
-rmm::device_uvector make_device_uvector_async(
- device_span source_data,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+rmm::device_uvector make_device_uvector_async(device_span source_data,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
rmm::device_uvector ret(source_data.size(), stream, mr);
CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(),
@@ -178,9 +172,7 @@ template <
std::enable_if_t<
std::is_convertible_v>>* = nullptr>
rmm::device_uvector make_device_uvector_async(
- Container const& c,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return make_device_uvector_async(
device_span{c}, stream, mr);
@@ -199,10 +191,9 @@ rmm::device_uvector make_device_uvector_async(
* @return A device_uvector containing the copied data
*/
template
-rmm::device_uvector make_device_uvector_sync(
- host_span source_data,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+rmm::device_uvector make_device_uvector_sync(host_span source_data,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
auto ret = make_device_uvector_async(source_data, stream, mr);
stream.synchronize();
@@ -227,9 +218,7 @@ template <
std::enable_if_t<
std::is_convertible_v>>* = nullptr>
rmm::device_uvector make_device_uvector_sync(
- Container const& c,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return make_device_uvector_sync(host_span{c}, stream, mr);
}
@@ -247,10 +236,9 @@ rmm::device_uvector make_device_uvector_sync(
* @return A device_uvector containing the copied data
*/
template
-rmm::device_uvector make_device_uvector_sync(
- device_span source_data,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+rmm::device_uvector make_device_uvector_sync(device_span source_data,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
auto ret = make_device_uvector_async(source_data, stream, mr);
stream.synchronize();
@@ -275,9 +263,7 @@ template <
std::enable_if_t<
std::is_convertible_v>>* = nullptr>
rmm::device_uvector make_device_uvector_sync(
- Container const& c,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ Container const& c, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return make_device_uvector_sync(device_span{c}, stream, mr);
}
diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh
index 04c78bed17d..76d6fd719a4 100644
--- a/cpp/include/cudf/detail/valid_if.cuh
+++ b/cpp/include/cudf/detail/valid_if.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -86,12 +86,11 @@ __global__ void valid_if_kernel(
* null count
*/
template
-std::pair valid_if(
- InputIterator begin,
- InputIterator end,
- Predicate p,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::pair valid_if(InputIterator begin,
+ InputIterator end,
+ Predicate p,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(begin <= end, "Invalid range.");
diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp
index 90d730338fc..9fdc7a47fb9 100644
--- a/cpp/include/cudf/io/detail/csv.hpp
+++ b/cpp/include/cudf/io/detail/csv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -56,7 +56,7 @@ void write_csv(data_sink* sink,
host_span column_names,
csv_writer_options const& options,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ rmm::mr::device_memory_resource* mr);
} // namespace csv
} // namespace detail
diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp
index b03dbd4fb70..4914f434c98 100644
--- a/cpp/include/cudf/io/detail/tokenize_json.hpp
+++ b/cpp/include/cudf/io/detail/tokenize_json.hpp
@@ -131,7 +131,7 @@ std::pair, rmm::device_uvector> ge
device_span json_in,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
diff --git a/cpp/include/cudf/io/text/detail/tile_state.hpp b/cpp/include/cudf/io/text/detail/tile_state.hpp
index bf833d4720c..6ae399fbe75 100644
--- a/cpp/include/cudf/io/text/detail/tile_state.hpp
+++ b/cpp/include/cudf/io/text/detail/tile_state.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -82,7 +82,7 @@ struct scan_tile_state {
scan_tile_state(cudf::size_type num_tiles,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ rmm::mr::device_memory_resource* mr)
: tile_status(rmm::device_uvector>(
num_tiles, stream, mr)),
tile_state_partial(rmm::device_uvector(num_tiles, stream, mr)),
diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp
index a908a9fa227..7bb2e4e2ece 100644
--- a/cpp/include/cudf/io/text/detail/trie.hpp
+++ b/cpp/include/cudf/io/text/detail/trie.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -165,7 +165,7 @@ struct trie {
*/
static trie create(std::string const& pattern,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ rmm::mr::device_memory_resource* mr)
{
return create(std::vector{pattern}, stream, mr);
@@ -181,7 +181,7 @@ struct trie {
*/
static trie create(std::vector const& patterns,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+ rmm::mr::device_memory_resource* mr)
{
std::vector tokens;
std::vector transitions;
diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp
index 9f28074173a..4bc45e48a9f 100644
--- a/cpp/include/cudf/lists/detail/combine.hpp
+++ b/cpp/include/cudf/lists/detail/combine.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -27,22 +27,20 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr concatenate_rows(
- table_view const& input,
- concatenate_null_policy null_policy,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr concatenate_rows(table_view const& input,
+ concatenate_null_policy null_policy,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::lists::concatenate_list_elements
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr concatenate_list_elements(
- column_view const& input,
- concatenate_null_policy null_policy,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr concatenate_list_elements(column_view const& input,
+ concatenate_null_policy null_policy,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace lists
diff --git a/cpp/include/cudf/lists/detail/contains.hpp b/cpp/include/cudf/lists/detail/contains.hpp
index 24318e72e98..58ec18cb9ef 100644
--- a/cpp/include/cudf/lists/detail/contains.hpp
+++ b/cpp/include/cudf/lists/detail/contains.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,12 +29,11 @@ namespace detail {
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr index_of(
- cudf::lists_column_view const& lists,
- cudf::scalar const& search_key,
- cudf::lists::duplicate_find_option find_option,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr index_of(cudf::lists_column_view const& lists,
+ cudf::scalar const& search_key,
+ cudf::lists::duplicate_find_option find_option,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
@@ -43,12 +42,11 @@ std::unique_ptr index_of(
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr index_of(
- cudf::lists_column_view const& lists,
- cudf::column_view const& search_keys,
- cudf::lists::duplicate_find_option find_option,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr index_of(cudf::lists_column_view const& lists,
+ cudf::column_view const& search_keys,
+ cudf::lists::duplicate_find_option find_option,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
@@ -56,11 +54,10 @@ std::unique_ptr index_of(
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr contains(
- cudf::lists_column_view const& lists,
- cudf::scalar const& search_key,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr contains(cudf::lists_column_view const& lists,
+ cudf::scalar const& search_key,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
@@ -68,11 +65,10 @@ std::unique_ptr contains(
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr contains(
- cudf::lists_column_view const& lists,
- cudf::column_view const& search_keys,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr contains(cudf::lists_column_view const& lists,
+ cudf::column_view const& search_keys,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace lists
} // namespace cudf
diff --git a/cpp/include/cudf/lists/detail/set_operations.hpp b/cpp/include/cudf/lists/detail/set_operations.hpp
index ef4255de430..1411c65448e 100644
--- a/cpp/include/cudf/lists/detail/set_operations.hpp
+++ b/cpp/include/cudf/lists/detail/set_operations.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,52 +30,48 @@ namespace cudf::lists::detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr have_overlap(
- lists_column_view const& lhs,
- lists_column_view const& rhs,
- null_equality nulls_equal,
- nan_equality nans_equal,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr have_overlap(lists_column_view const& lhs,
+ lists_column_view const& rhs,
+ null_equality nulls_equal,
+ nan_equality nans_equal,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::list::intersect_distinct
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr intersect_distinct(
- lists_column_view const& lhs,
- lists_column_view const& rhs,
- null_equality nulls_equal,
- nan_equality nans_equal,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr intersect_distinct(lists_column_view const& lhs,
+ lists_column_view const& rhs,
+ null_equality nulls_equal,
+ nan_equality nans_equal,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::list::union_distinct
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr union_distinct(
- lists_column_view const& lhs,
- lists_column_view const& rhs,
- null_equality nulls_equal,
- nan_equality nans_equal,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr union_distinct(lists_column_view const& lhs,
+ lists_column_view const& rhs,
+ null_equality nulls_equal,
+ nan_equality nans_equal,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/**
* @copydoc cudf::list::difference_distinct
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
-std::unique_ptr difference_distinct(
- lists_column_view const& lhs,
- lists_column_view const& rhs,
- null_equality nulls_equal,
- nan_equality nans_equal,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr difference_distinct(lists_column_view const& lhs,
+ lists_column_view const& rhs,
+ null_equality nulls_equal,
+ nan_equality nans_equal,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
/** @} */ // end of group
} // namespace cudf::lists::detail
diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp
index a6eacb97e91..fea1118748c 100644
--- a/cpp/include/cudf/lists/lists_column_factories.hpp
+++ b/cpp/include/cudf/lists/lists_column_factories.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -35,11 +35,10 @@ namespace detail {
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @param[in] mr Device memory resource used to allocate the returned column's device memory.
*/
-std::unique_ptr make_lists_column_from_scalar(
- list_scalar const& value,
- size_type size,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr make_lists_column_from_scalar(list_scalar const& value,
+ size_type size,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace lists
diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp
index a098703e4b0..82ccca188e2 100644
--- a/cpp/include/cudf/structs/detail/concatenate.hpp
+++ b/cpp/include/cudf/structs/detail/concatenate.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -48,10 +48,9 @@ namespace detail {
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with concatenated results.
*/
-std::unique_ptr concatenate(
- host_span columns,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr concatenate(host_span columns,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace structs
diff --git a/cpp/include/cudf/timezone.hpp b/cpp/include/cudf/timezone.hpp
new file mode 100644
index 00000000000..56678c73811
--- /dev/null
+++ b/cpp/include/cudf/timezone.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+
+#include
+#include
+#include
+
+namespace cudf {
+class table;
+
+// Cycle in which the time offsets repeat in Gregorian calendar
+static constexpr int32_t solar_cycle_years = 400;
+// Number of future entries in the timezone transition table:
+// Two entries per year, over the length of the Gregorian calendar's solar cycle
+static constexpr uint32_t solar_cycle_entry_count = 2 * solar_cycle_years;
+
+/**
+ * @brief Creates a transition table to convert ORC timestamps to UTC.
+ *
+ * Uses system's TZif files. Assumes little-endian platform when parsing these files.
+ * The transition table starts with the entries from the TZif file. For timestamps after the file's
+ * last transition, the table includes entries that form a `solar_cycle_years`-year cycle (future
+ * entries). This portion of the table has `solar_cycle_entry_count` elements, as it assumes two
+ * transitions per year from Daylight Saving Time. If the timezone does not have DST, the table will
+ * still include the future entries, which will all have the same offset.
+ *
+ * @param tzif_dir The directory where the TZif files are located
+ * @param timezone_name standard timezone name (for example, "America/Los_Angeles")
+ * @param mr Device memory resource used to allocate the returned table's device memory.
+ *
+ * @return The transition table for the given timezone
+ */
+std::unique_ptr make_timezone_transition_table(
+ std::optional tzif_dir,
+ std::string_view timezone_name,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+} // namespace cudf
diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp
index 4fa712fe7c3..b925fc8ae92 100644
--- a/cpp/include/cudf/utilities/type_checks.hpp
+++ b/cpp/include/cudf/utilities/type_checks.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -36,4 +36,15 @@ namespace cudf {
*/
bool column_types_equal(column_view const& lhs, column_view const& rhs);
+/**
+ * @brief Compare the type IDs of two `column_view`s
+ * This function returns true if the type of `lhs` equals that of `rhs`.
+ * - For fixed point types, the scale is ignored.
+ *
+ * @param lhs The first `column_view` to compare
+ * @param rhs The second `column_view` to compare
+ * @return true if column types match
+ */
+bool column_types_equivalent(column_view const& lhs, column_view const& rhs);
+
} // namespace cudf
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index 91773b2c3f1..6341e2e10b0 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -732,9 +732,11 @@ class strings_column_wrapper : public detail::column_wrapper {
{
auto all_valid = thrust::make_constant_iterator(true);
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
- auto d_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream());
- auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream());
- wrapped = cudf::make_strings_column(d_chars, d_offsets);
+ auto d_chars = cudf::detail::make_device_uvector_sync(
+ chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
+ auto d_offsets = cudf::detail::make_device_uvector_sync(
+ offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
+ wrapped = cudf::make_strings_column(d_chars, d_offsets);
}
/**
@@ -772,10 +774,13 @@ class strings_column_wrapper : public detail::column_wrapper {
size_type num_strings = std::distance(begin, end);
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
auto null_mask = detail::make_null_mask_vector(v, v + num_strings);
- auto d_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream());
- auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream());
- auto d_bitmask = cudf::detail::make_device_uvector_sync(null_mask, cudf::get_default_stream());
- wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask);
+ auto d_chars = cudf::detail::make_device_uvector_sync(
+ chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
+ auto d_offsets = cudf::detail::make_device_uvector_sync(
+ offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
+ auto d_bitmask = cudf::detail::make_device_uvector_sync(
+ null_mask, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
+ wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask);
}
/**
diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
index ce45ad91be1..d23d7f29a6c 100644
--- a/cpp/include/cudf_test/tdigest_utilities.cuh
+++ b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -168,7 +168,8 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
// verify min/max
thrust::host_vector> h_spans;
h_spans.push_back({input_values.begin(), static_cast(input_values.size())});
- auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::get_default_stream());
+ auto spans = cudf::detail::make_device_uvector_async(
+ h_spans, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
auto expected_min = cudf::make_fixed_width_column(
data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED);
diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py
index 9cb8afbff9f..3fe503f749e 100755
--- a/cpp/scripts/sort_ninja_log.py
+++ b/cpp/scripts/sort_ninja_log.py
@@ -1,10 +1,11 @@
#
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
import argparse
import os
import sys
import xml.etree.ElementTree as ET
+from pathlib import Path
from xml.dom import minidom
parser = argparse.ArgumentParser()
@@ -22,52 +23,50 @@
"--msg",
type=str,
default=None,
- help="optional message to include in html output",
+ help="optional text file to include at the top of the html output",
+)
+parser.add_argument(
+ "--cmp_log",
+ type=str,
+ default=None,
+ help="optional baseline ninja_log to compare results",
)
args = parser.parse_args()
log_file = args.log_file
-log_path = os.path.dirname(os.path.abspath(log_file))
-
output_fmt = args.fmt
+cmp_file = args.cmp_log
# build a map of the log entries
-entries = {}
-with open(log_file) as log:
- last = 0
- files = {}
- for line in log:
- entry = line.split()
- if len(entry) > 4:
- obj_file = entry[3]
- file_size = (
- os.path.getsize(os.path.join(log_path, obj_file))
- if os.path.exists(obj_file)
- else 0
- )
- start = int(entry[0])
- end = int(entry[1])
- # logic based on ninjatracing
- if end < last:
- files = {}
- last = end
- files.setdefault(entry[4], (entry[3], start, end, file_size))
-
- # build entries from files dict
- for entry in files.values():
- entries[entry[0]] = (entry[1], entry[2], entry[3])
-
-# check file could be loaded and we have entries to report
-if len(entries) == 0:
- print("Could not parse", log_file)
- exit()
+def build_log_map(log_file):
+ entries = {}
+ log_path = os.path.dirname(os.path.abspath(log_file))
+ with open(log_file) as log:
+ last = 0
+ files = {}
+ for line in log:
+ entry = line.split()
+ if len(entry) > 4:
+ obj_file = entry[3]
+ file_size = (
+ os.path.getsize(os.path.join(log_path, obj_file))
+ if os.path.exists(obj_file)
+ else 0
+ )
+ start = int(entry[0])
+ end = int(entry[1])
+ # logic based on ninjatracing
+ if end < last:
+ files = {}
+ last = end
+ files.setdefault(entry[4], (entry[3], start, end, file_size))
+
+ # build entries from files dict
+ for entry in files.values():
+ entries[entry[0]] = (entry[1], entry[2], entry[3])
+
+ return entries
-# sort the entries by build-time (descending order)
-sorted_list = sorted(
- list(entries.keys()),
- key=lambda k: entries[k][1] - entries[k][0],
- reverse=True,
-)
# output results in XML format
def output_xml(entries, sorted_list, args):
@@ -147,14 +146,46 @@ def assign_entries_to_threads(entries):
return (results, end_time)
-# output chart results in HTML format
-def output_html(entries, sorted_list, args):
+# format the build-time
+def format_build_time(input_time):
+ build_time = abs(input_time)
+ build_time_str = str(build_time) + " ms"
+ if build_time > 120000: # 2 minutes
+ minutes = int(build_time / 60000)
+ seconds = int(((build_time / 60000) - minutes) * 60)
+ build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
+ elif build_time > 1000:
+ build_time_str = "{:.3f} s".format(build_time / 1000)
+ if input_time < 0:
+ build_time_str = "-" + build_time_str
+ return build_time_str
+
+
+# format file size
+def format_file_size(input_size):
+ file_size = abs(input_size)
+ file_size_str = ""
+ if file_size > 1000000:
+ file_size_str = "{:.3f} MB".format(file_size / 1000000)
+ elif file_size > 1000:
+ file_size_str = "{:.3f} KB".format(file_size / 1000)
+ elif file_size > 0:
+ file_size_str = str(file_size) + " bytes"
+ if input_size < 0:
+ file_size_str = "-" + file_size_str
+ return file_size_str
+
+
+# Output chart results in HTML format
+# Builds a standalone html file with no javascript or styles
+def output_html(entries, sorted_list, cmp_entries, args):
print("Build Metrics Report")
- # Note: Jenkins does not support javascript nor style defined in the html
- # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/
print("")
if args.msg is not None:
- print("", args.msg, "
")
+ msg_file = Path(args.msg)
+ if msg_file.is_file():
+ msg = msg_file.read_text()
+ print("", msg, "
")
# map entries to threads
# the end_time is used to scale all the entries to a fixed output width
@@ -201,15 +232,8 @@ def output_html(entries, sorted_list, args):
# adjust for the cellspacing
prev_end = end + int(end_time / 500)
- # format the build-time
build_time = end - start
- build_time_str = str(build_time) + " ms"
- if build_time > 120000: # 2 minutes
- minutes = int(build_time / 60000)
- seconds = int(((build_time / 60000) - minutes) * 60)
- build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
- elif build_time > 1000:
- build_time_str = "{:.3f} s".format(build_time / 1000)
+ build_time_str = format_build_time(build_time)
# assign color and accumulate legend values
color = white
@@ -248,7 +272,7 @@ def output_html(entries, sorted_list, args):
# done with this entry
print("")
# update the entry with just the computed output info
- entries[name] = (build_time_str, color, entry[2])
+ entries[name] = (build_time, color, entry[2])
# add a filler column at the end of each row
print(" |
")
@@ -259,30 +283,53 @@ def output_html(entries, sorted_list, args):
# output detail table in build-time descending order
print("")
print(
- "File | ",
- "Compile time | ",
- "Size |
---|
",
- sep="",
+ "
File | ", "Compile time | ", "Size | ", sep=""
)
+ if cmp_entries:
+ print("t-cmp | ", sep="")
+ print("
")
+
for name in sorted_list:
entry = entries[name]
- build_time_str = entry[0]
+ build_time = entry[0]
color = entry[1]
file_size = entry[2]
- # format file size
- file_size_str = ""
- if file_size > 1000000:
- file_size_str = "{:.3f} MB".format(file_size / 1000000)
- elif file_size > 1000:
- file_size_str = "{:.3f} KB".format(file_size / 1000)
- elif file_size > 0:
- file_size_str = str(file_size) + " bytes"
+ build_time_str = format_build_time(build_time)
+ file_size_str = format_file_size(file_size)
# output entry row
print("", name, " | ", sep="", end="")
print("", build_time_str, " | ", sep="", end="")
- print("", file_size_str, " |
", sep="")
+ print("", file_size_str, " | ", sep="", end="")
+ # output diff column
+ cmp_entry = (
+ cmp_entries[name] if cmp_entries and name in cmp_entries else None
+ )
+ if cmp_entry:
+ diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
+ diff_time_str = format_build_time(diff_time)
+ diff_color = white
+ diff_percent = int((diff_time / build_time) * 100)
+ if build_time > 60000:
+ if diff_percent > 20:
+ diff_color = red
+ diff_time_str = "" + diff_time_str + ""
+ elif diff_percent < -20:
+ diff_color = green
+ diff_time_str = "" + diff_time_str + ""
+ elif diff_percent > 0:
+ diff_color = yellow
+ print(
+ "",
+ diff_time_str,
+ " | ",
+ sep="",
+ end="",
+ )
+ print("")
print("
")
@@ -296,22 +343,62 @@ def output_html(entries, sorted_list, args):
print("", summary["green"], " | ")
print("time < 1 second | ")
print("", summary["white"], " |
")
- print("
")
+ print("
")
+
+ if cmp_entries:
+ print("")
+ print("time increase > 20% |
")
+ print("time increase > 0 |
")
+ print("time decrease > 20% |
")
+ print(
+ "time change < 20%% or build time < 1 minute |
",
+ )
+ print("
")
+
+ print("