From f846eccbfb0cc7cfe96610fc84deafe5df600e81 Mon Sep 17 00:00:00 2001 From: psamfass Date: Thu, 12 Dec 2024 16:37:22 +0000 Subject: [PATCH] (upstream branch, commit 4dc8300c6104697b1d9313a48d1c4c7f5dabf81a:) GALI PREM SAGAR Thu Nov 16 16:22:44 2023 -0600 Raise error in `reindex` when `index` is not unique (#14400) (#14429) Bacport of #14400 Fixes: #14398 This PR raises an error in `reindex` API when reindexing is performed on a non-unique index column. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/14400 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) - Ashwin Srinath (https://github.com/shwina) - Ray Douglass (https://github.com/raydouglass) --- .clang-format | 155 + .github/CODEOWNERS | 22 + .github/ISSUE_TEMPLATE/bug_report.md | 28 + .../ISSUE_TEMPLATE/documentation-request.md | 35 + .github/ISSUE_TEMPLATE/feature_request.md | 20 + .../ISSUE_TEMPLATE/pandas_function_request.md | 22 + .github/ISSUE_TEMPLATE/submit-question.md | 10 + .github/PULL_REQUEST_TEMPLATE.md | 9 + .github/copy-pr-bot.yaml | 4 + .github/labeler.yml | 21 + .github/ops-bot.yaml | 7 + .github/workflows/build.yaml | 109 + .github/workflows/jni-docker-build.yml | 53 + .github/workflows/labeler.yml | 11 + .github/workflows/pr.yaml | 180 + .github/workflows/test.yaml | 120 + .gitignore | 175 + .pre-commit-config.yaml | 160 + CHANGELOG.md | 6900 ++++++++ CONTRIBUTING.md | 398 + LICENSE | 201 + README.md | 97 + build.sh | 383 + ci/build_cpp.sh | 18 + ci/build_docs.sh | 57 + ci/build_python.sh | 43 + ci/build_wheel.sh | 55 + ci/build_wheel_cudf.sh | 26 + ci/build_wheel_dask_cudf.sh | 11 + ci/check_style.sh | 23 + ci/checks/copyright.py | 277 + ci/checks/doxygen.sh | 35 + ci/cudf_pandas_scripts/pandas-tests/diff.sh | 24 + .../pandas-tests/job-summary.py | 100 + ci/cudf_pandas_scripts/pandas-tests/run.sh | 37 + ci/cudf_pandas_scripts/run_tests.sh | 39 + ci/release/update-version.sh | 125 + ci/test_cpp.sh | 45 + ci/test_cpp_common.sh | 32 + ci/test_cpp_memcheck.sh | 25 + ci/test_java.sh | 45 + ci/test_notebooks.sh | 61 + ci/test_python_common.sh | 36 + ci/test_python_cudf.sh | 62 + ci/test_python_other.sh | 48 + ci/test_wheel_cudf.sh | 17 + ci/test_wheel_dask_cudf.sh | 19 + ci/utils/nbtest.sh | 53 + ci/utils/nbtestlog2junitxml.py | 163 + ci/wheel_smoke_test_cudf.py | 13 + codecov.yml | 11 + .../all_cuda-118_arch-x86_64.yaml | 103 + .../all_cuda-120_arch-x86_64.yaml | 100 + conda/recipes/cudf/build.sh | 4 + conda/recipes/cudf/conda_build_config.yaml | 17 + conda/recipes/cudf/meta.yaml | 119 + conda/recipes/cudf_kafka/build.sh | 16 + .../cudf_kafka/conda_build_config.yaml | 11 + conda/recipes/cudf_kafka/meta.yaml | 79 + conda/recipes/custreamz/build.sh | 4 + conda/recipes/custreamz/meta.yaml | 65 + conda/recipes/dask-cudf/build.sh | 4 + conda/recipes/dask-cudf/meta.yaml | 65 + conda/recipes/dask-cudf/run_test.sh | 36 + conda/recipes/libcudf/build.sh | 9 + conda/recipes/libcudf/conda_build_config.yaml | 59 + conda/recipes/libcudf/install_libcudf.sh | 4 + .../libcudf/install_libcudf_example.sh | 4 + .../recipes/libcudf/install_libcudf_kafka.sh | 4 + .../recipes/libcudf/install_libcudf_tests.sh | 5 + conda/recipes/libcudf/meta.yaml | 227 + cpp/.clang-tidy | 27 + cpp/CMakeLists.txt | 1099 ++ cpp/benchmarks/CMakeLists.txt | 336 + cpp/benchmarks/ast/transform.cpp | 134 + cpp/benchmarks/binaryop/binaryop.cpp | 115 + cpp/benchmarks/binaryop/compiled_binaryop.cpp | 115 + cpp/benchmarks/column/concatenate.cpp | 169 + cpp/benchmarks/common/generate_input.cu | 946 ++ cpp/benchmarks/common/generate_input.hpp | 694 + .../common/random_distribution_factory.cuh | 181 + cpp/benchmarks/copying/contiguous_split.cu | 263 + cpp/benchmarks/copying/copy_if_else.cpp | 74 + cpp/benchmarks/copying/gather.cu | 75 + cpp/benchmarks/copying/scatter.cu | 78 + cpp/benchmarks/copying/shift.cu | 96 + cpp/benchmarks/filling/repeat.cpp | 71 + cpp/benchmarks/fixture/benchmark_fixture.hpp | 124 + cpp/benchmarks/fixture/nvbench_fixture.hpp | 94 + cpp/benchmarks/fixture/nvbench_main.cpp | 41 + .../fixture/templated_benchmark_fixture.hpp | 73 + cpp/benchmarks/groupby/group_common.hpp | 29 + cpp/benchmarks/groupby/group_max.cpp | 62 + cpp/benchmarks/groupby/group_no_requests.cpp | 92 + cpp/benchmarks/groupby/group_nth.cpp | 65 + cpp/benchmarks/groupby/group_nunique.cpp | 74 + cpp/benchmarks/groupby/group_rank.cpp | 104 + cpp/benchmarks/groupby/group_scan.cpp | 101 + cpp/benchmarks/groupby/group_shift.cpp | 65 + cpp/benchmarks/groupby/group_struct_keys.cpp | 94 + .../groupby/group_struct_values.cpp | 94 + cpp/benchmarks/groupby/group_sum.cpp | 98 + cpp/benchmarks/hashing/hash.cpp | 85 + cpp/benchmarks/hashing/partition.cpp | 72 + cpp/benchmarks/io/csv/csv_reader_input.cpp | 110 + cpp/benchmarks/io/csv/csv_reader_options.cpp | 131 + cpp/benchmarks/io/csv/csv_writer.cpp | 128 + cpp/benchmarks/io/cuio_common.cpp | 203 + cpp/benchmarks/io/cuio_common.hpp | 140 + cpp/benchmarks/io/fst.cu | 243 + cpp/benchmarks/io/json/json_reader_input.cpp | 132 + cpp/benchmarks/io/json/json_writer.cpp | 131 + cpp/benchmarks/io/json/nested_json.cpp | 217 + cpp/benchmarks/io/nvbench_helpers.hpp | 171 + cpp/benchmarks/io/orc/orc_reader_input.cpp | 139 + cpp/benchmarks/io/orc/orc_reader_options.cpp | 169 + cpp/benchmarks/io/orc/orc_writer.cpp | 214 + cpp/benchmarks/io/orc/orc_writer_chunks.cpp | 142 + .../io/parquet/parquet_reader_input.cpp | 245 + .../io/parquet/parquet_reader_options.cpp | 179 + cpp/benchmarks/io/parquet/parquet_writer.cpp | 221 + .../io/parquet/parquet_writer_chunks.cpp | 113 + cpp/benchmarks/io/text/multibyte_split.cpp | 225 + cpp/benchmarks/iterator/iterator.cu | 255 + cpp/benchmarks/join/conditional_join.cu | 287 + cpp/benchmarks/join/generate_input_tables.cuh | 181 + cpp/benchmarks/join/join.cu | 194 + cpp/benchmarks/join/join_common.hpp | 229 + cpp/benchmarks/join/left_join.cu | 128 + cpp/benchmarks/join/mixed_join.cu | 332 + cpp/benchmarks/lists/copying/scatter_lists.cu | 147 + cpp/benchmarks/lists/set_operations.cpp | 83 + cpp/benchmarks/merge/merge.cpp | 92 + cpp/benchmarks/null_mask/set_null_mask.cpp | 45 + cpp/benchmarks/quantiles/quantiles.cpp | 69 + cpp/benchmarks/reduction/anyall.cpp | 74 + cpp/benchmarks/reduction/dictionary.cpp | 86 + cpp/benchmarks/reduction/minmax.cpp | 63 + cpp/benchmarks/reduction/rank.cpp | 61 + cpp/benchmarks/reduction/reduce.cpp | 91 + cpp/benchmarks/reduction/scan.cpp | 61 + cpp/benchmarks/reduction/scan_structs.cpp | 70 + cpp/benchmarks/reduction/segmented_reduce.cpp | 150 + cpp/benchmarks/replace/clamp.cpp | 75 + cpp/benchmarks/replace/nans.cpp | 60 + cpp/benchmarks/search/contains_scalar.cpp | 57 + cpp/benchmarks/search/contains_table.cpp | 73 + cpp/benchmarks/search/search.cpp | 151 + cpp/benchmarks/sort/nested_types_common.hpp | 93 + cpp/benchmarks/sort/rank.cpp | 60 + cpp/benchmarks/sort/rank_lists.cpp | 47 + cpp/benchmarks/sort/rank_structs.cpp | 45 + cpp/benchmarks/sort/rank_types_common.hpp | 52 + cpp/benchmarks/sort/segmented_sort.cpp | 63 + cpp/benchmarks/sort/sort.cpp | 62 + cpp/benchmarks/sort/sort_lists.cpp | 101 + cpp/benchmarks/sort/sort_strings.cpp | 48 + cpp/benchmarks/sort/sort_structs.cpp | 37 + .../stream_compaction/apply_boolean_mask.cpp | 133 + cpp/benchmarks/stream_compaction/distinct.cpp | 96 + .../stream_compaction/distinct_count.cpp | 64 + .../stream_compaction/stable_distinct.cpp | 96 + cpp/benchmarks/stream_compaction/unique.cpp | 145 + .../stream_compaction/unique_count.cpp | 52 + cpp/benchmarks/string/case.cpp | 77 + cpp/benchmarks/string/char_types.cpp | 66 + cpp/benchmarks/string/combine.cpp | 69 + cpp/benchmarks/string/contains.cpp | 117 + cpp/benchmarks/string/convert_datetime.cpp | 76 + cpp/benchmarks/string/convert_durations.cpp | 105 + cpp/benchmarks/string/convert_fixed_point.cpp | 106 + cpp/benchmarks/string/convert_numerics.cpp | 130 + cpp/benchmarks/string/copy.cu | 94 + cpp/benchmarks/string/count.cpp | 62 + cpp/benchmarks/string/extract.cpp | 84 + cpp/benchmarks/string/factory.cu | 92 + cpp/benchmarks/string/filter.cpp | 85 + cpp/benchmarks/string/find.cpp | 93 + cpp/benchmarks/string/gather.cpp | 59 + cpp/benchmarks/string/join_strings.cpp | 58 + cpp/benchmarks/string/json.cu | 223 + cpp/benchmarks/string/lengths.cpp | 55 + cpp/benchmarks/string/like.cpp | 115 + cpp/benchmarks/string/repeat_strings.cpp | 112 + cpp/benchmarks/string/replace.cpp | 85 + cpp/benchmarks/string/replace_re.cpp | 67 + cpp/benchmarks/string/reverse.cpp | 55 + cpp/benchmarks/string/slice.cpp | 84 + cpp/benchmarks/string/split.cpp | 71 + cpp/benchmarks/string/split_re.cpp | 60 + cpp/benchmarks/string/string_bench_args.hpp | 56 + cpp/benchmarks/string/translate.cpp | 83 + cpp/benchmarks/string/url_decode.cu | 112 + .../synchronization/synchronization.cpp | 60 + .../synchronization/synchronization.hpp | 103 + cpp/benchmarks/text/edit_distance.cpp | 58 + cpp/benchmarks/text/hash_ngrams.cpp | 60 + cpp/benchmarks/text/jaccard.cpp | 62 + cpp/benchmarks/text/minhash.cpp | 71 + cpp/benchmarks/text/ngrams.cpp | 72 + cpp/benchmarks/text/normalize.cpp | 65 + cpp/benchmarks/text/replace.cpp | 75 + cpp/benchmarks/text/subword.cpp | 91 + cpp/benchmarks/text/tokenize.cpp | 85 + cpp/benchmarks/transpose/transpose.cpp | 55 + .../type_dispatcher/type_dispatcher.cu | 231 + cpp/cmake/Modules/ConfigureCUDA.cmake | 47 + cpp/cmake/Modules/FindcuFile.cmake | 120 + .../Modules/JitifyPreprocessKernels.cmake | 66 + cpp/cmake/config.json | 47 + cpp/cmake/thirdparty/get_arrow.cmake | 414 + cpp/cmake/thirdparty/get_cucollections.cmake | 25 + cpp/cmake/thirdparty/get_cufile.cmake | 32 + cpp/cmake/thirdparty/get_dlpack.cmake | 41 + cpp/cmake/thirdparty/get_fmt.cmake | 22 + cpp/cmake/thirdparty/get_gtest.cmake | 39 + cpp/cmake/thirdparty/get_jitify.cmake | 32 + cpp/cmake/thirdparty/get_kvikio.cmake | 36 + cpp/cmake/thirdparty/get_libcudacxx.cmake | 37 + cpp/cmake/thirdparty/get_nvbench.cmake | 28 + cpp/cmake/thirdparty/get_nvcomp.cmake | 31 + cpp/cmake/thirdparty/get_rmm.cmake | 24 + cpp/cmake/thirdparty/get_spdlog.cmake | 34 + cpp/cmake/thirdparty/get_thrust.cmake | 46 + .../cub_segmented_sort_with_bool_key.diff | 14 + .../patches/nvbench_global_setup.diff | 29 + .../thirdparty/patches/nvbench_override.json | 19 + .../thrust_disable_64bit_dispatching.diff | 29 + .../thrust_faster_scan_compile_times.diff | 39 + .../thrust_faster_sort_compile_times.diff | 48 + .../thirdparty/patches/thrust_override.json | 39 + ...ust_transform_iter_with_reduce_by_key.diff | 26 + cpp/doxygen/Doxyfile | 2590 +++ cpp/doxygen/DoxygenLayout.xml | 227 + cpp/doxygen/developer_guide/BENCHMARKING.md | 56 + .../developer_guide/DEVELOPER_GUIDE.md | 1375 ++ cpp/doxygen/developer_guide/DOCUMENTATION.md | 450 + cpp/doxygen/developer_guide/TESTING.md | 526 + cpp/doxygen/developer_guide/strings.png | Bin 0 -> 41562 bytes cpp/doxygen/header.html | 61 + cpp/doxygen/main_page.md | 5 + cpp/doxygen/modify_fences.sh | 9 + cpp/doxygen/regex.md | 124 + cpp/doxygen/unicode.md | 23 + cpp/examples/README.md | 9 + cpp/examples/basic/4stock_5day.csv | 21 + cpp/examples/basic/CMakeLists.txt | 32 + cpp/examples/basic/README.md | 23 + cpp/examples/basic/src/process_csv.cpp | 104 + cpp/examples/build.sh | 31 + cpp/examples/strings/CMakeLists.txt | 49 + cpp/examples/strings/README.md | 37 + cpp/examples/strings/common.hpp | 114 + cpp/examples/strings/custom_optimized.cu | 166 + cpp/examples/strings/custom_prealloc.cu | 126 + cpp/examples/strings/custom_with_malloc.cu | 158 + cpp/examples/strings/libcudf_apis.cpp | 62 + cpp/examples/strings/names.csv | 20 + cpp/include/cudf/aggregation.hpp | 734 + .../cudf/ast/detail/expression_evaluator.cuh | 710 + .../cudf/ast/detail/expression_parser.hpp | 333 + .../ast/detail/expression_transformer.hpp | 64 + cpp/include/cudf/ast/detail/operators.hpp | 1226 ++ cpp/include/cudf/ast/expressions.hpp | 549 + cpp/include/cudf/binaryop.hpp | 273 + cpp/include/cudf/column/column.hpp | 335 + .../cudf/column/column_device_view.cuh | 1526 ++ cpp/include/cudf/column/column_factories.hpp | 627 + cpp/include/cudf/column/column_view.hpp | 766 + cpp/include/cudf/concatenate.hpp | 99 + cpp/include/cudf/contiguous_split.hpp | 345 + cpp/include/cudf/copying.hpp | 958 ++ cpp/include/cudf/datetime.hpp | 403 + .../cudf/detail/aggregation/aggregation.cuh | 676 + .../cudf/detail/aggregation/aggregation.hpp | 1625 ++ .../cudf/detail/aggregation/result_cache.hpp | 69 + cpp/include/cudf/detail/binaryop.hpp | 79 + .../detail/calendrical_month_sequence.cuh | 73 + cpp/include/cudf/detail/concatenate.hpp | 50 + cpp/include/cudf/detail/concatenate_masks.hpp | 71 + cpp/include/cudf/detail/contiguous_split.hpp | 127 + cpp/include/cudf/detail/copy.hpp | 282 + cpp/include/cudf/detail/copy_if.cuh | 409 + cpp/include/cudf/detail/copy_if_else.cuh | 191 + cpp/include/cudf/detail/copy_range.cuh | 210 + cpp/include/cudf/detail/datetime.hpp | 175 + cpp/include/cudf/detail/datetime_ops.cuh | 42 + cpp/include/cudf/detail/fill.hpp | 54 + cpp/include/cudf/detail/gather.cuh | 690 + cpp/include/cudf/detail/gather.hpp | 86 + cpp/include/cudf/detail/get_value.cuh | 59 + cpp/include/cudf/detail/groupby.hpp | 53 + .../detail/groupby/group_replace_nulls.hpp | 47 + .../cudf/detail/groupby/sort_helper.hpp | 234 + .../cudf/detail/hash_reduce_by_row.cuh | 171 + cpp/include/cudf/detail/indexalator.cuh | 308 + cpp/include/cudf/detail/interop.hpp | 198 + cpp/include/cudf/detail/is_element_valid.hpp | 47 + cpp/include/cudf/detail/iterator.cuh | 656 + cpp/include/cudf/detail/join.hpp | 191 + cpp/include/cudf/detail/label_bins.hpp | 58 + .../cudf/detail/labeling/label_segments.cuh | 198 + cpp/include/cudf/detail/merge.cuh | 166 + .../cudf/detail/normalizing_iterator.cuh | 367 + cpp/include/cudf/detail/null_mask.cuh | 591 + cpp/include/cudf/detail/null_mask.hpp | 264 + cpp/include/cudf/detail/nvtx/nvtx3.hpp | 1933 +++ cpp/include/cudf/detail/nvtx/ranges.hpp | 51 + cpp/include/cudf/detail/quantiles.hpp | 66 + cpp/include/cudf/detail/repeat.hpp | 53 + cpp/include/cudf/detail/replace.hpp | 104 + cpp/include/cudf/detail/replace/nulls.cuh | 44 + cpp/include/cudf/detail/reshape.hpp | 48 + cpp/include/cudf/detail/rolling.hpp | 51 + cpp/include/cudf/detail/round.hpp | 41 + cpp/include/cudf/detail/scan.hpp | 119 + cpp/include/cudf/detail/scatter.cuh | 457 + cpp/include/cudf/detail/scatter.hpp | 146 + cpp/include/cudf/detail/search.hpp | 101 + cpp/include/cudf/detail/sequence.hpp | 68 + .../cudf/detail/sizes_to_offsets_iterator.cuh | 330 + cpp/include/cudf/detail/sorting.hpp | 154 + cpp/include/cudf/detail/stream_compaction.hpp | 161 + cpp/include/cudf/detail/structs/utilities.hpp | 268 + cpp/include/cudf/detail/tdigest/tdigest.hpp | 240 + cpp/include/cudf/detail/timezone.cuh | 79 + cpp/include/cudf/detail/timezone.hpp | 36 + cpp/include/cudf/detail/transform.hpp | 104 + cpp/include/cudf/detail/transpose.hpp | 36 + cpp/include/cudf/detail/unary.hpp | 113 + .../cudf/detail/utilities/algorithm.cuh | 95 + .../cudf/detail/utilities/alignment.hpp | 46 + cpp/include/cudf/detail/utilities/assert.cuh | 61 + cpp/include/cudf/detail/utilities/cuda.cuh | 233 + .../cudf/detail/utilities/default_stream.hpp | 36 + .../cudf/detail/utilities/device_atomics.cuh | 541 + .../detail/utilities/device_operators.cuh | 242 + .../detail/utilities/element_argminmax.cuh | 61 + .../cudf/detail/utilities/int_fastdiv.h | 175 + .../cudf/detail/utilities/integer_utils.hpp | 193 + .../cudf/detail/utilities/linked_column.hpp | 71 + cpp/include/cudf/detail/utilities/logger.hpp | 27 + .../detail/utilities/pinned_host_vector.hpp | 211 + .../cudf/detail/utilities/stacktrace.hpp | 47 + .../cudf/detail/utilities/stream_pool.hpp | 64 + .../utilities/transform_unary_functions.cuh | 140 + .../detail/utilities/vector_factories.hpp | 443 + .../detail/utilities/visitor_overload.hpp | 30 + cpp/include/cudf/detail/valid_if.cuh | 191 + .../cudf/dictionary/detail/concatenate.hpp | 46 + cpp/include/cudf/dictionary/detail/encode.hpp | 88 + .../cudf/dictionary/detail/iterator.cuh | 125 + cpp/include/cudf/dictionary/detail/merge.hpp | 51 + .../cudf/dictionary/detail/replace.hpp | 66 + cpp/include/cudf/dictionary/detail/search.hpp | 65 + .../cudf/dictionary/detail/update_keys.hpp | 109 + .../dictionary/dictionary_column_view.hpp | 127 + .../cudf/dictionary/dictionary_factories.hpp | 124 + cpp/include/cudf/dictionary/encode.hpp | 88 + cpp/include/cudf/dictionary/search.hpp | 52 + cpp/include/cudf/dictionary/update_keys.hpp | 171 + cpp/include/cudf/filling.hpp | 246 + cpp/include/cudf/fixed_point/fixed_point.hpp | 833 + cpp/include/cudf/fixed_point/temporary.hpp | 87 + cpp/include/cudf/groupby.hpp | 422 + cpp/include/cudf/hashing.hpp | 169 + .../cudf/hashing/detail/default_hash.cuh | 35 + .../cudf/hashing/detail/hash_allocator.cuh | 62 + .../cudf/hashing/detail/hash_functions.cuh | 71 + cpp/include/cudf/hashing/detail/hashing.hpp | 110 + .../cudf/hashing/detail/helper_functions.cuh | 243 + .../hashing/detail/murmurhash3_x64_128.cuh | 223 + .../hashing/detail/murmurhash3_x86_32.cuh | 194 + cpp/include/cudf/interop.hpp | 182 + cpp/include/cudf/io/arrow_io_source.hpp | 85 + cpp/include/cudf/io/avro.hpp | 223 + cpp/include/cudf/io/csv.hpp | 1725 ++ cpp/include/cudf/io/data_sink.hpp | 212 + cpp/include/cudf/io/datasource.hpp | 379 + cpp/include/cudf/io/detail/avro.hpp | 47 + cpp/include/cudf/io/detail/csv.hpp | 64 + cpp/include/cudf/io/detail/json.hpp | 55 + cpp/include/cudf/io/detail/orc.hpp | 133 + cpp/include/cudf/io/detail/parquet.hpp | 254 + cpp/include/cudf/io/detail/tokenize_json.hpp | 140 + cpp/include/cudf/io/detail/utils.hpp | 28 + cpp/include/cudf/io/json.hpp | 871 + cpp/include/cudf/io/orc.hpp | 1256 ++ cpp/include/cudf/io/orc_metadata.hpp | 371 + cpp/include/cudf/io/orc_types.hpp | 86 + cpp/include/cudf/io/parquet.hpp | 1758 ++ cpp/include/cudf/io/parquet_metadata.hpp | 231 + cpp/include/cudf/io/text/byte_range_info.hpp | 109 + .../cudf/io/text/data_chunk_source.hpp | 115 + .../io/text/data_chunk_source_factories.hpp | 87 + .../cudf/io/text/detail/bgzip_utils.hpp | 112 + .../cudf/io/text/detail/multistate.hpp | 128 + .../cudf/io/text/detail/tile_state.hpp | 149 + cpp/include/cudf/io/text/detail/trie.hpp | 250 + cpp/include/cudf/io/text/multibyte_split.hpp | 103 + cpp/include/cudf/io/types.hpp | 936 ++ cpp/include/cudf/join.hpp | 1181 ++ cpp/include/cudf/labeling/label_bins.hpp | 79 + cpp/include/cudf/lists/combine.hpp | 100 + cpp/include/cudf/lists/contains.hpp | 174 + cpp/include/cudf/lists/count_elements.hpp | 58 + cpp/include/cudf/lists/detail/combine.hpp | 47 + cpp/include/cudf/lists/detail/concatenate.hpp | 52 + cpp/include/cudf/lists/detail/contains.hpp | 74 + cpp/include/cudf/lists/detail/copying.hpp | 53 + cpp/include/cudf/lists/detail/dremel.hpp | 216 + cpp/include/cudf/lists/detail/extract.hpp | 47 + cpp/include/cudf/lists/detail/gather.cuh | 315 + .../cudf/lists/detail/interleave_columns.hpp | 54 + .../lists/detail/lists_column_factories.hpp | 71 + cpp/include/cudf/lists/detail/reverse.hpp | 30 + cpp/include/cudf/lists/detail/scatter.cuh | 283 + .../cudf/lists/detail/scatter_helper.cuh | 143 + .../cudf/lists/detail/set_operations.hpp | 77 + cpp/include/cudf/lists/detail/sorting.hpp | 50 + .../cudf/lists/detail/stream_compaction.hpp | 47 + cpp/include/cudf/lists/explode.hpp | 209 + cpp/include/cudf/lists/extract.hpp | 111 + cpp/include/cudf/lists/filling.hpp | 109 + cpp/include/cudf/lists/gather.hpp | 80 + cpp/include/cudf/lists/list_device_view.cuh | 380 + cpp/include/cudf/lists/list_view.hpp | 32 + .../cudf/lists/lists_column_device_view.cuh | 120 + cpp/include/cudf/lists/lists_column_view.hpp | 139 + cpp/include/cudf/lists/reverse.hpp | 54 + cpp/include/cudf/lists/set_operations.hpp | 171 + cpp/include/cudf/lists/sorting.hpp | 73 + cpp/include/cudf/lists/stream_compaction.hpp | 92 + cpp/include/cudf/merge.hpp | 108 + cpp/include/cudf/null_mask.hpp | 190 + cpp/include/cudf/partitioning.hpp | 248 + cpp/include/cudf/quantiles.hpp | 131 + cpp/include/cudf/reduction.hpp | 222 + .../cudf/reduction/detail/histogram.hpp | 57 + .../cudf/reduction/detail/reduction.cuh | 235 + .../cudf/reduction/detail/reduction.hpp | 40 + .../reduction/detail/reduction_functions.hpp | 356 + .../reduction/detail/reduction_operators.cuh | 295 + .../reduction/detail/segmented_reduction.cuh | 198 + .../detail/segmented_reduction_functions.hpp | 358 + cpp/include/cudf/replace.hpp | 310 + cpp/include/cudf/reshape.hpp | 107 + cpp/include/cudf/rolling.hpp | 597 + .../cudf/rolling/range_window_bounds.hpp | 108 + cpp/include/cudf/round.hpp | 79 + cpp/include/cudf/scalar/scalar.hpp | 893 + .../cudf/scalar/scalar_device_view.cuh | 443 + cpp/include/cudf/scalar/scalar_factories.hpp | 229 + cpp/include/cudf/search.hpp | 170 + cpp/include/cudf/sorting.hpp | 355 + cpp/include/cudf/stream_compaction.hpp | 378 + cpp/include/cudf/strings/attributes.hpp | 93 + cpp/include/cudf/strings/capitalize.hpp | 131 + cpp/include/cudf/strings/case.hpp | 91 + .../cudf/strings/char_types/char_cases.hpp | 34 + .../cudf/strings/char_types/char_types.hpp | 117 + .../strings/char_types/char_types_enum.hpp | 83 + cpp/include/cudf/strings/combine.hpp | 328 + cpp/include/cudf/strings/contains.hpp | 201 + .../cudf/strings/convert/convert_booleans.hpp | 70 + .../cudf/strings/convert/convert_datetime.hpp | 251 + .../strings/convert/convert_durations.hpp | 131 + .../strings/convert/convert_fixed_point.hpp | 126 + .../cudf/strings/convert/convert_floats.hpp | 99 + .../cudf/strings/convert/convert_integers.hpp | 223 + .../cudf/strings/convert/convert_ipv4.hpp | 109 + .../cudf/strings/convert/convert_lists.hpp | 68 + .../cudf/strings/convert/convert_urls.hpp | 73 + .../cudf/strings/detail/char_tables.hpp | 106 + cpp/include/cudf/strings/detail/combine.hpp | 72 + .../cudf/strings/detail/concatenate.hpp | 51 + .../strings/detail/convert/fixed_point.cuh | 166 + .../detail/convert/fixed_point_to_string.cuh | 80 + .../strings/detail/convert/int_to_string.cuh | 97 + .../cudf/strings/detail/convert/is_float.cuh | 120 + .../detail/convert/string_to_float.cuh | 139 + .../strings/detail/convert/string_to_int.cuh | 54 + .../cudf/strings/detail/converters.hpp | 157 + .../cudf/strings/detail/copy_if_else.cuh | 116 + .../cudf/strings/detail/copy_range.cuh | 216 + cpp/include/cudf/strings/detail/copying.hpp | 87 + cpp/include/cudf/strings/detail/fill.hpp | 54 + cpp/include/cudf/strings/detail/gather.cuh | 361 + cpp/include/cudf/strings/detail/json.hpp | 43 + cpp/include/cudf/strings/detail/merge.cuh | 112 + cpp/include/cudf/strings/detail/pad_impl.cuh | 126 + cpp/include/cudf/strings/detail/replace.hpp | 103 + cpp/include/cudf/strings/detail/scatter.cuh | 89 + .../cudf/strings/detail/split_utils.cuh | 161 + .../cudf/strings/detail/strings_children.cuh | 127 + .../detail/strings_column_factories.cuh | 199 + cpp/include/cudf/strings/detail/strip.cuh | 71 + cpp/include/cudf/strings/detail/utf8.hpp | 214 + cpp/include/cudf/strings/detail/utilities.cuh | 104 + cpp/include/cudf/strings/detail/utilities.hpp | 58 + cpp/include/cudf/strings/extract.hpp | 102 + cpp/include/cudf/strings/find.hpp | 264 + cpp/include/cudf/strings/find_multiple.hpp | 65 + cpp/include/cudf/strings/findall.hpp | 72 + cpp/include/cudf/strings/json.hpp | 174 + cpp/include/cudf/strings/padding.hpp | 94 + cpp/include/cudf/strings/regex/flags.hpp | 89 + .../cudf/strings/regex/regex_program.hpp | 138 + cpp/include/cudf/strings/repeat_strings.hpp | 129 + cpp/include/cudf/strings/replace.hpp | 158 + cpp/include/cudf/strings/replace_re.hpp | 108 + cpp/include/cudf/strings/reverse.hpp | 53 + cpp/include/cudf/strings/side_type.hpp | 37 + cpp/include/cudf/strings/slice.hpp | 112 + cpp/include/cudf/strings/split/partition.hpp | 99 + cpp/include/cudf/strings/split/split.hpp | 247 + cpp/include/cudf/strings/split/split_re.hpp | 257 + cpp/include/cudf/strings/string_view.cuh | 451 + cpp/include/cudf/strings/string_view.hpp | 409 + .../cudf/strings/strings_column_view.hpp | 153 + cpp/include/cudf/strings/strip.hpp | 71 + cpp/include/cudf/strings/translate.hpp | 107 + cpp/include/cudf/strings/wrap.hpp | 72 + .../cudf/structs/detail/concatenate.hpp | 57 + cpp/include/cudf/structs/struct_view.hpp | 32 + .../structs/structs_column_device_view.cuh | 87 + .../cudf/structs/structs_column_view.hpp | 101 + .../cudf/table/experimental/row_operators.cuh | 1963 +++ cpp/include/cudf/table/row_operators.cuh | 639 + cpp/include/cudf/table/table.hpp | 190 + cpp/include/cudf/table/table_device_view.cuh | 274 + cpp/include/cudf/table/table_view.hpp | 399 + .../cudf/tdigest/tdigest_column_view.hpp | 127 + cpp/include/cudf/timezone.hpp | 54 + cpp/include/cudf/transform.hpp | 228 + cpp/include/cudf/transpose.hpp | 50 + cpp/include/cudf/types.hpp | 333 + cpp/include/cudf/unary.hpp | 156 + cpp/include/cudf/utilities/bit.hpp | 218 + cpp/include/cudf/utilities/default_stream.hpp | 40 + cpp/include/cudf/utilities/error.hpp | 284 + cpp/include/cudf/utilities/logger.hpp | 46 + cpp/include/cudf/utilities/span.hpp | 491 + cpp/include/cudf/utilities/traits.cuh | 67 + cpp/include/cudf/utilities/traits.hpp | 598 + cpp/include/cudf/utilities/type_checks.hpp | 50 + .../cudf/utilities/type_dispatcher.hpp | 615 + cpp/include/cudf/wrappers/dictionary.hpp | 219 + cpp/include/cudf/wrappers/durations.hpp | 68 + cpp/include/cudf/wrappers/timestamps.hpp | 85 + cpp/include/cudf_test/base_fixture.hpp | 399 + cpp/include/cudf_test/column_utilities.hpp | 310 + cpp/include/cudf_test/column_wrapper.hpp | 1910 +++ cpp/include/cudf_test/cudf_gtest.hpp | 139 + cpp/include/cudf_test/cxxopts.hpp | 1504 ++ cpp/include/cudf_test/default_stream.hpp | 41 + .../cudf_test/detail/column_utilities.hpp | 85 + cpp/include/cudf_test/file_utilities.hpp | 71 + .../cudf_test/io_metadata_utilities.hpp | 31 + cpp/include/cudf_test/iterator_utilities.hpp | 139 + cpp/include/cudf_test/print_utilities.cuh | 140 + .../stream_checking_resource_adaptor.hpp | 202 + cpp/include/cudf_test/table_utilities.hpp | 79 + cpp/include/cudf_test/tdigest_utilities.cuh | 586 + cpp/include/cudf_test/timestamp_utilities.cuh | 78 + cpp/include/cudf_test/type_list_utilities.hpp | 630 + cpp/include/cudf_test/type_lists.hpp | 435 + cpp/include/doxygen_groups.h | 184 + cpp/include/nvtext/bpe_tokenize.hpp | 136 + cpp/include/nvtext/detail/generate_ngrams.hpp | 37 + cpp/include/nvtext/detail/load_hash_file.hpp | 49 + cpp/include/nvtext/detail/tokenize.hpp | 72 + cpp/include/nvtext/edit_distance.hpp | 101 + cpp/include/nvtext/generate_ngrams.hpp | 131 + cpp/include/nvtext/jaccard.hpp | 79 + cpp/include/nvtext/minhash.hpp | 152 + cpp/include/nvtext/ngrams_tokenize.hpp | 87 + cpp/include/nvtext/normalize.hpp | 105 + cpp/include/nvtext/replace.hpp | 139 + cpp/include/nvtext/stemmer.hpp | 167 + cpp/include/nvtext/subword_tokenize.hpp | 161 + cpp/include/nvtext/tokenize.hpp | 297 + cpp/libcudf_kafka/CMakeLists.txt | 111 + .../cmake/thirdparty/get_cudf.cmake | 55 + .../cmake/thirdparty/get_rdkafka.cmake | 41 + .../include/cudf_kafka/kafka_callback.hpp | 86 + .../include/cudf_kafka/kafka_consumer.hpp | 231 + cpp/libcudf_kafka/src/kafka_callback.cpp | 48 + cpp/libcudf_kafka/src/kafka_consumer.cpp | 268 + cpp/libcudf_kafka/tests/CMakeLists.txt | 49 + .../tests/kafka_consumer_tests.cpp | 67 + cpp/scripts/gdb-pretty-printers.py | 84 + cpp/scripts/load-pretty-printers.in | 3 + cpp/scripts/run-clang-tidy.py | 253 + cpp/scripts/run-cmake-format.sh | 84 + cpp/scripts/sort_ninja_log.py | 404 + cpp/src/aggregation/aggregation.cpp | 888 + cpp/src/aggregation/aggregation.cu | 36 + cpp/src/aggregation/result_cache.cpp | 55 + cpp/src/ast/expression_parser.cpp | 240 + cpp/src/ast/expressions.cpp | 83 + cpp/src/binaryop/binaryop.cpp | 442 + cpp/src/binaryop/compiled/ATan2.cu | 26 + cpp/src/binaryop/compiled/Add.cu | 26 + cpp/src/binaryop/compiled/BitwiseAnd.cu | 26 + cpp/src/binaryop/compiled/BitwiseOr.cu | 26 + cpp/src/binaryop/compiled/BitwiseXor.cu | 26 + cpp/src/binaryop/compiled/Div.cu | 26 + cpp/src/binaryop/compiled/FloorDiv.cu | 26 + cpp/src/binaryop/compiled/Greater.cu | 26 + cpp/src/binaryop/compiled/GreaterEqual.cu | 26 + cpp/src/binaryop/compiled/IntPow.cu | 26 + cpp/src/binaryop/compiled/Less.cu | 26 + cpp/src/binaryop/compiled/LessEqual.cu | 26 + cpp/src/binaryop/compiled/LogBase.cu | 26 + cpp/src/binaryop/compiled/LogicalAnd.cu | 26 + cpp/src/binaryop/compiled/LogicalOr.cu | 26 + cpp/src/binaryop/compiled/Mod.cu | 26 + cpp/src/binaryop/compiled/Mul.cu | 26 + cpp/src/binaryop/compiled/NullEquals.cu | 26 + cpp/src/binaryop/compiled/NullLogicalAnd.cu | 26 + cpp/src/binaryop/compiled/NullLogicalOr.cu | 26 + cpp/src/binaryop/compiled/NullMax.cu | 26 + cpp/src/binaryop/compiled/NullMin.cu | 26 + cpp/src/binaryop/compiled/PMod.cu | 26 + cpp/src/binaryop/compiled/Pow.cu | 26 + cpp/src/binaryop/compiled/PyMod.cu | 26 + cpp/src/binaryop/compiled/ShiftLeft.cu | 26 + cpp/src/binaryop/compiled/ShiftRight.cu | 26 + .../binaryop/compiled/ShiftRightUnsigned.cu | 26 + cpp/src/binaryop/compiled/Sub.cu | 26 + cpp/src/binaryop/compiled/TrueDiv.cu | 26 + cpp/src/binaryop/compiled/binary_ops.cu | 469 + cpp/src/binaryop/compiled/binary_ops.cuh | 307 + cpp/src/binaryop/compiled/binary_ops.hpp | 218 + cpp/src/binaryop/compiled/equality_ops.cu | 62 + cpp/src/binaryop/compiled/operation.cuh | 527 + .../binaryop/compiled/struct_binary_ops.cuh | 192 + cpp/src/binaryop/compiled/util.cpp | 221 + cpp/src/binaryop/jit/kernel.cu | 97 + cpp/src/binaryop/jit/operation-udf.hpp | 20 + cpp/src/bitmask/is_element_valid.cpp | 47 + cpp/src/bitmask/null_mask.cu | 543 + cpp/src/column/column.cu | 264 + cpp/src/column/column_device_view.cu | 176 + cpp/src/column/column_factories.cpp | 186 + cpp/src/column/column_factories.cu | 121 + cpp/src/column/column_view.cpp | 207 + cpp/src/copying/concatenate.cu | 603 + cpp/src/copying/contiguous_split.cu | 2086 +++ cpp/src/copying/copy.cpp | 195 + cpp/src/copying/copy.cu | 451 + cpp/src/copying/copy_range.cu | 295 + cpp/src/copying/gather.cu | 94 + cpp/src/copying/get_element.cu | 211 + cpp/src/copying/pack.cpp | 304 + cpp/src/copying/purge_nonempty_nulls.cu | 141 + cpp/src/copying/reverse.cu | 73 + cpp/src/copying/sample.cu | 99 + cpp/src/copying/scatter.cu | 536 + cpp/src/copying/segmented_shift.cu | 162 + cpp/src/copying/shift.cu | 180 + cpp/src/copying/slice.cu | 183 + cpp/src/copying/split.cpp | 116 + cpp/src/datetime/datetime_ops.cu | 725 + cpp/src/datetime/timezone.cpp | 515 + cpp/src/dictionary/add_keys.cu | 141 + cpp/src/dictionary/decode.cu | 76 + cpp/src/dictionary/detail/concatenate.cu | 298 + cpp/src/dictionary/detail/merge.cu | 76 + cpp/src/dictionary/dictionary_column_view.cpp | 55 + cpp/src/dictionary/dictionary_factories.cu | 151 + cpp/src/dictionary/encode.cu | 100 + cpp/src/dictionary/remove_keys.cu | 214 + cpp/src/dictionary/replace.cu | 145 + cpp/src/dictionary/search.cu | 185 + cpp/src/dictionary/set_keys.cu | 261 + cpp/src/filling/calendrical_month_sequence.cu | 50 + cpp/src/filling/fill.cu | 267 + cpp/src/filling/repeat.cu | 175 + cpp/src/filling/sequence.cu | 169 + cpp/src/groupby/common/utils.hpp | 62 + cpp/src/groupby/groupby.cu | 339 + cpp/src/groupby/hash/groupby.cu | 682 + cpp/src/groupby/hash/groupby_kernels.cuh | 119 + cpp/src/groupby/hash/multi_pass_kernels.cuh | 116 + cpp/src/groupby/sort/aggregate.cpp | 818 + cpp/src/groupby/sort/common_utils.cuh | 62 + cpp/src/groupby/sort/functors.hpp | 110 + cpp/src/groupby/sort/group_argmax.cu | 64 + cpp/src/groupby/sort/group_argmin.cu | 64 + cpp/src/groupby/sort/group_collect.cu | 127 + cpp/src/groupby/sort/group_correlation.cu | 214 + cpp/src/groupby/sort/group_count.cu | 98 + cpp/src/groupby/sort/group_count_scan.cu | 52 + cpp/src/groupby/sort/group_histogram.cu | 152 + cpp/src/groupby/sort/group_m2.cu | 147 + cpp/src/groupby/sort/group_max.cu | 44 + cpp/src/groupby/sort/group_max_scan.cu | 41 + cpp/src/groupby/sort/group_merge_lists.cu | 74 + cpp/src/groupby/sort/group_merge_m2.cu | 201 + cpp/src/groupby/sort/group_min.cu | 44 + cpp/src/groupby/sort/group_min_scan.cu | 41 + cpp/src/groupby/sort/group_nth_element.cu | 135 + cpp/src/groupby/sort/group_nunique.cu | 137 + cpp/src/groupby/sort/group_product.cu | 46 + cpp/src/groupby/sort/group_quantiles.cu | 180 + cpp/src/groupby/sort/group_rank_scan.cu | 330 + cpp/src/groupby/sort/group_reductions.hpp | 543 + cpp/src/groupby/sort/group_replace_nulls.cu | 86 + cpp/src/groupby/sort/group_scan.hpp | 199 + cpp/src/groupby/sort/group_scan_util.cuh | 246 + .../sort/group_single_pass_reduction_util.cuh | 268 + cpp/src/groupby/sort/group_std.cu | 190 + cpp/src/groupby/sort/group_sum.cu | 46 + cpp/src/groupby/sort/group_sum_scan.cu | 41 + cpp/src/groupby/sort/scan.cpp | 219 + cpp/src/groupby/sort/sort_helper.cu | 319 + cpp/src/hash/concurrent_unordered_map.cuh | 558 + cpp/src/hash/hashing.cu | 53 + cpp/src/hash/managed.cuh | 44 + cpp/src/hash/md5_hash.cu | 384 + cpp/src/hash/murmurhash3_x64_128.cu | 150 + cpp/src/hash/murmurhash3_x86_32.cu | 72 + cpp/src/hash/spark_murmurhash3_x86_32.cu | 442 + cpp/src/hash/unordered_multiset.cuh | 159 + cpp/src/hash/xxhash_64.cu | 337 + cpp/src/interop/detail/arrow_allocator.cpp | 82 + cpp/src/interop/detail/arrow_allocator.hpp | 31 + cpp/src/interop/dlpack.cpp | 313 + cpp/src/interop/from_arrow.cu | 561 + cpp/src/interop/to_arrow.cu | 473 + cpp/src/io/avro/avro.cpp | 495 + cpp/src/io/avro/avro.hpp | 184 + cpp/src/io/avro/avro_common.hpp | 168 + cpp/src/io/avro/avro_gpu.cu | 444 + cpp/src/io/avro/avro_gpu.hpp | 62 + cpp/src/io/avro/reader_impl.cu | 624 + cpp/src/io/comp/brotli_dict.cpp | 6535 ++++++++ cpp/src/io/comp/brotli_dict.hpp | 85 + cpp/src/io/comp/brotli_tables.hpp | 2653 +++ cpp/src/io/comp/cpu_unbz2.cpp | 601 + cpp/src/io/comp/debrotli.cu | 2124 +++ cpp/src/io/comp/gpuinflate.cu | 1230 ++ cpp/src/io/comp/gpuinflate.hpp | 168 + cpp/src/io/comp/io_uncomp.hpp | 59 + cpp/src/io/comp/nvcomp_adapter.cpp | 655 + cpp/src/io/comp/nvcomp_adapter.cu | 130 + cpp/src/io/comp/nvcomp_adapter.cuh | 79 + cpp/src/io/comp/nvcomp_adapter.hpp | 162 + cpp/src/io/comp/snap.cu | 360 + cpp/src/io/comp/statistics.cu | 62 + cpp/src/io/comp/unbz2.hpp | 106 + cpp/src/io/comp/uncomp.cpp | 562 + cpp/src/io/comp/unsnap.cu | 728 + cpp/src/io/csv/csv_common.hpp | 43 + cpp/src/io/csv/csv_gpu.cu | 866 + cpp/src/io/csv/csv_gpu.hpp | 236 + cpp/src/io/csv/datetime.cuh | 402 + cpp/src/io/csv/durations.cu | 235 + cpp/src/io/csv/durations.hpp | 38 + cpp/src/io/csv/reader_impl.cu | 1008 ++ cpp/src/io/csv/writer_impl.cu | 495 + cpp/src/io/fst/agent_dfa.cuh | 695 + cpp/src/io/fst/device_dfa.cuh | 94 + cpp/src/io/fst/dispatch_dfa.cuh | 456 + cpp/src/io/fst/in_reg_array.cuh | 140 + cpp/src/io/fst/logical_stack.cuh | 473 + cpp/src/io/fst/lookup_tables.cuh | 924 ++ cpp/src/io/functions.cpp | 892 + cpp/src/io/json/byte_range_info.cu | 36 + cpp/src/io/json/json_column.cu | 1037 ++ cpp/src/io/json/json_tree.cu | 852 + cpp/src/io/json/legacy/json_gpu.cu | 616 + cpp/src/io/json/legacy/json_gpu.hpp | 100 + cpp/src/io/json/legacy/read_json.hpp | 33 + cpp/src/io/json/legacy/reader_impl.cu | 657 + cpp/src/io/json/nested_json.hpp | 311 + cpp/src/io/json/nested_json_gpu.cu | 2195 +++ cpp/src/io/json/read_json.cu | 226 + cpp/src/io/json/read_json.hpp | 45 + cpp/src/io/json/write_json.cu | 912 ++ cpp/src/io/orc/aggregate_orc_metadata.cpp | 276 + cpp/src/io/orc/aggregate_orc_metadata.hpp | 138 + cpp/src/io/orc/dict_enc.cu | 277 + cpp/src/io/orc/orc.cpp | 530 + cpp/src/io/orc/orc.hpp | 683 + cpp/src/io/orc/orc_field_reader.hpp | 94 + cpp/src/io/orc/orc_field_writer.hpp | 134 + cpp/src/io/orc/orc_gpu.hpp | 484 + cpp/src/io/orc/reader_impl.cu | 1366 ++ cpp/src/io/orc/reader_impl.hpp | 89 + cpp/src/io/orc/stats_enc.cu | 484 + cpp/src/io/orc/stripe_data.cu | 1902 +++ cpp/src/io/orc/stripe_enc.cu | 1374 ++ cpp/src/io/orc/stripe_init.cu | 612 + cpp/src/io/orc/writer_impl.cu | 2684 +++ cpp/src/io/orc/writer_impl.hpp | 369 + cpp/src/io/parquet/chunk_dict.cu | 303 + .../io/parquet/compact_protocol_reader.cpp | 875 + .../io/parquet/compact_protocol_reader.hpp | 152 + .../io/parquet/compact_protocol_writer.cpp | 396 + .../io/parquet/compact_protocol_writer.hpp | 120 + cpp/src/io/parquet/decode_preprocess.cu | 417 + cpp/src/io/parquet/delta_binary.cuh | 294 + cpp/src/io/parquet/page_data.cu | 654 + cpp/src/io/parquet/page_decode.cuh | 1387 ++ cpp/src/io/parquet/page_delta_decode.cu | 187 + cpp/src/io/parquet/page_enc.cu | 2349 +++ cpp/src/io/parquet/page_hdr.cu | 534 + cpp/src/io/parquet/page_string_decode.cu | 808 + cpp/src/io/parquet/page_string_utils.cuh | 110 + cpp/src/io/parquet/parquet.hpp | 410 + cpp/src/io/parquet/parquet_common.hpp | 161 + cpp/src/io/parquet/parquet_gpu.cuh | 84 + cpp/src/io/parquet/parquet_gpu.hpp | 867 + cpp/src/io/parquet/predicate_pushdown.cpp | 530 + cpp/src/io/parquet/reader.cpp | 62 + cpp/src/io/parquet/reader_impl.cpp | 574 + cpp/src/io/parquet/reader_impl.hpp | 304 + cpp/src/io/parquet/reader_impl_helpers.cpp | 659 + cpp/src/io/parquet/reader_impl_helpers.hpp | 278 + cpp/src/io/parquet/reader_impl_preprocess.cu | 2017 +++ cpp/src/io/parquet/rle_stream.cuh | 365 + cpp/src/io/parquet/writer_impl.cu | 2398 +++ cpp/src/io/parquet/writer_impl.hpp | 186 + cpp/src/io/statistics/byte_array_view.cuh | 180 + cpp/src/io/statistics/column_statistics.cuh | 417 + .../io/statistics/conversion_type_select.cuh | 142 + .../io/statistics/orc_column_statistics.cu | 43 + .../statistics/parquet_column_statistics.cu | 43 + cpp/src/io/statistics/statistics.cuh | 142 + .../statistics_type_identification.cuh | 288 + .../io/statistics/temp_storage_wrapper.cuh | 112 + .../io/statistics/typed_statistics_chunk.cuh | 276 + cpp/src/io/text/bgzip_data_chunk_source.cu | 382 + cpp/src/io/text/bgzip_utils.cpp | 179 + cpp/src/io/text/byte_range_info.cpp | 47 + .../io/text/data_chunk_source_factories.cpp | 339 + cpp/src/io/text/device_data_chunks.hpp | 47 + cpp/src/io/text/multibyte_split.cu | 589 + cpp/src/io/utilities/arrow_io_source.cpp | 85 + cpp/src/io/utilities/block_utils.cuh | 191 + cpp/src/io/utilities/column_buffer.cpp | 356 + cpp/src/io/utilities/column_buffer.hpp | 258 + .../io/utilities/column_type_histogram.hpp | 44 + cpp/src/io/utilities/column_utils.cuh | 92 + cpp/src/io/utilities/config_utils.cpp | 83 + cpp/src/io/utilities/config_utils.hpp | 81 + cpp/src/io/utilities/data_casting.cu | 984 ++ cpp/src/io/utilities/data_sink.cpp | 217 + cpp/src/io/utilities/datasource.cpp | 425 + cpp/src/io/utilities/file_io_utilities.cpp | 348 + cpp/src/io/utilities/file_io_utilities.hpp | 224 + cpp/src/io/utilities/hostdevice_span.hpp | 179 + cpp/src/io/utilities/hostdevice_vector.hpp | 258 + cpp/src/io/utilities/output_builder.cuh | 357 + cpp/src/io/utilities/parsing_utils.cu | 221 + cpp/src/io/utilities/parsing_utils.cuh | 716 + cpp/src/io/utilities/row_selection.cpp | 42 + cpp/src/io/utilities/row_selection.hpp | 40 + cpp/src/io/utilities/string_parsing.hpp | 79 + cpp/src/io/utilities/thread_pool.hpp | 381 + cpp/src/io/utilities/time_utils.cuh | 53 + cpp/src/io/utilities/trie.cu | 111 + cpp/src/io/utilities/trie.cuh | 104 + cpp/src/io/utilities/type_inference.cu | 292 + cpp/src/jit/cache.cpp | 143 + cpp/src/jit/cache.hpp | 28 + cpp/src/jit/parser.cpp | 416 + cpp/src/jit/parser.hpp | 242 + cpp/src/jit/util.cpp | 71 + cpp/src/jit/util.hpp | 37 + cpp/src/join/conditional_join.cu | 426 + cpp/src/join/conditional_join.hpp | 74 + cpp/src/join/conditional_join_kernels.cuh | 276 + cpp/src/join/cross_join.cu | 83 + cpp/src/join/hash_join.cu | 649 + cpp/src/join/join.cu | 150 + cpp/src/join/join_common_utils.cuh | 327 + cpp/src/join/join_common_utils.hpp | 69 + cpp/src/join/join_utils.cu | 158 + cpp/src/join/mixed_join.cu | 594 + cpp/src/join/mixed_join_common_utils.cuh | 165 + cpp/src/join/mixed_join_kernel.cu | 39 + cpp/src/join/mixed_join_kernel.cuh | 114 + cpp/src/join/mixed_join_kernel_nulls.cu | 39 + cpp/src/join/mixed_join_kernels.cuh | 123 + cpp/src/join/mixed_join_kernels_semi.cu | 110 + cpp/src/join/mixed_join_kernels_semi.cuh | 121 + cpp/src/join/mixed_join_semi.cu | 626 + cpp/src/join/mixed_join_size_kernel.cu | 37 + cpp/src/join/mixed_join_size_kernel.cuh | 105 + cpp/src/join/mixed_join_size_kernel_nulls.cu | 37 + cpp/src/join/mixed_join_size_kernels_semi.cu | 122 + cpp/src/join/semi_join.cu | 116 + cpp/src/labeling/label_bins.cu | 250 + .../combine/concatenate_list_elements.cu | 281 + cpp/src/lists/combine/concatenate_rows.cu | 315 + cpp/src/lists/contains.cu | 406 + cpp/src/lists/copying/concatenate.cu | 144 + cpp/src/lists/copying/copying.cu | 102 + cpp/src/lists/copying/gather.cu | 184 + cpp/src/lists/copying/scatter_helper.cu | 483 + cpp/src/lists/copying/segmented_gather.cu | 127 + cpp/src/lists/count_elements.cu | 83 + cpp/src/lists/dremel.cu | 486 + cpp/src/lists/explode.cu | 347 + cpp/src/lists/extract.cu | 221 + cpp/src/lists/interleave_columns.cu | 423 + cpp/src/lists/lists_column_factories.cu | 156 + cpp/src/lists/lists_column_view.cu | 69 + cpp/src/lists/reverse.cu | 95 + cpp/src/lists/segmented_sort.cu | 139 + cpp/src/lists/sequences.cu | 226 + cpp/src/lists/set_operations.cu | 319 + .../stream_compaction/apply_boolean_mask.cu | 110 + cpp/src/lists/stream_compaction/distinct.cu | 85 + cpp/src/lists/utilities.cu | 79 + cpp/src/lists/utilities.hpp | 67 + cpp/src/merge/merge.cu | 555 + cpp/src/partitioning/partitioning.cu | 843 + cpp/src/partitioning/round_robin.cu | 277 + cpp/src/quantiles/quantile.cu | 196 + cpp/src/quantiles/quantiles.cu | 113 + cpp/src/quantiles/quantiles_util.hpp | 219 + cpp/src/quantiles/tdigest/tdigest.cu | 413 + .../quantiles/tdigest/tdigest_aggregation.cu | 1294 ++ .../quantiles/tdigest/tdigest_column_view.cpp | 78 + cpp/src/quantiles/tdigest/tdigest_util.cuh | 56 + cpp/src/reductions/all.cu | 106 + cpp/src/reductions/any.cu | 106 + cpp/src/reductions/collect_ops.cu | 118 + cpp/src/reductions/compound.cuh | 159 + cpp/src/reductions/histogram.cu | 273 + cpp/src/reductions/max.cu | 47 + cpp/src/reductions/mean.cu | 43 + cpp/src/reductions/min.cu | 43 + cpp/src/reductions/minmax.cu | 283 + .../reductions/nested_type_minmax_util.cuh | 182 + cpp/src/reductions/nth_element.cu | 66 + cpp/src/reductions/product.cu | 45 + cpp/src/reductions/reductions.cpp | 224 + cpp/src/reductions/scan/rank_scan.cu | 155 + cpp/src/reductions/scan/scan.cpp | 67 + cpp/src/reductions/scan/scan.cuh | 66 + cpp/src/reductions/scan/scan_exclusive.cu | 107 + cpp/src/reductions/scan/scan_inclusive.cu | 310 + cpp/src/reductions/segmented/all.cu | 45 + cpp/src/reductions/segmented/any.cu | 45 + cpp/src/reductions/segmented/compound.cuh | 175 + cpp/src/reductions/segmented/counts.cu | 54 + cpp/src/reductions/segmented/counts.hpp | 55 + cpp/src/reductions/segmented/max.cu | 42 + cpp/src/reductions/segmented/mean.cu | 42 + cpp/src/reductions/segmented/min.cu | 42 + cpp/src/reductions/segmented/nunique.cu | 113 + cpp/src/reductions/segmented/product.cu | 39 + cpp/src/reductions/segmented/reductions.cpp | 170 + cpp/src/reductions/segmented/simple.cuh | 496 + cpp/src/reductions/segmented/std.cu | 42 + cpp/src/reductions/segmented/sum.cu | 40 + .../reductions/segmented/sum_of_squares.cu | 41 + .../reductions/segmented/update_validity.cu | 49 + .../reductions/segmented/update_validity.hpp | 58 + cpp/src/reductions/segmented/var.cu | 41 + cpp/src/reductions/simple.cuh | 478 + cpp/src/reductions/std.cu | 49 + cpp/src/reductions/sum.cu | 46 + cpp/src/reductions/sum_of_squares.cu | 45 + cpp/src/reductions/var.cu | 49 + cpp/src/replace/clamp.cu | 408 + cpp/src/replace/nans.cu | 249 + cpp/src/replace/nulls.cu | 474 + cpp/src/replace/replace.cu | 550 + cpp/src/reshape/byte_cast.cu | 192 + cpp/src/reshape/interleave_columns.cu | 299 + cpp/src/reshape/tile.cu | 71 + cpp/src/rolling/detail/lead_lag_nested.cuh | 208 + cpp/src/rolling/detail/nth_element.cuh | 174 + .../detail/optimized_unbounded_window.cpp | 161 + .../detail/optimized_unbounded_window.hpp | 56 + .../rolling/detail/range_comparator_utils.cuh | 143 + .../rolling/detail/range_window_bounds.hpp | 166 + cpp/src/rolling/detail/rolling.cuh | 1390 ++ cpp/src/rolling/detail/rolling.hpp | 99 + .../rolling/detail/rolling_collect_list.cu | 163 + .../rolling/detail/rolling_collect_list.cuh | 229 + .../rolling/detail/rolling_fixed_window.cu | 87 + cpp/src/rolling/detail/rolling_jit.hpp | 61 + .../rolling/detail/rolling_variable_window.cu | 85 + cpp/src/rolling/grouped_rolling.cu | 1292 ++ cpp/src/rolling/jit/kernel.cu | 106 + cpp/src/rolling/jit/operation-udf.hpp | 20 + cpp/src/rolling/jit/operation.hpp | 41 + cpp/src/rolling/range_window_bounds.cpp | 89 + cpp/src/rolling/rolling.cu | 80 + cpp/src/round/round.cu | 357 + cpp/src/scalar/scalar.cpp | 600 + cpp/src/scalar/scalar_factories.cpp | 202 + cpp/src/search/contains_column.cu | 164 + cpp/src/search/contains_scalar.cu | 169 + cpp/src/search/contains_table.cu | 286 + cpp/src/search/search_ordered.cu | 165 + cpp/src/sort/is_sorted.cu | 96 + cpp/src/sort/rank.cu | 376 + cpp/src/sort/segmented_sort.cu | 105 + cpp/src/sort/segmented_sort_impl.cuh | 328 + cpp/src/sort/sort.cu | 140 + cpp/src/sort/sort_column.cu | 55 + cpp/src/sort/sort_column_impl.cuh | 203 + cpp/src/sort/sort_impl.cuh | 100 + cpp/src/sort/stable_segmented_sort.cu | 79 + cpp/src/sort/stable_sort.cu | 83 + cpp/src/sort/stable_sort_column.cu | 55 + .../stream_compaction/apply_boolean_mask.cu | 98 + cpp/src/stream_compaction/distinct.cu | 179 + cpp/src/stream_compaction/distinct_count.cu | 226 + cpp/src/stream_compaction/distinct_helpers.cu | 109 + .../stream_compaction/distinct_helpers.hpp | 87 + cpp/src/stream_compaction/drop_nans.cu | 135 + cpp/src/stream_compaction/drop_nulls.cu | 108 + cpp/src/stream_compaction/stable_distinct.cu | 87 + .../stream_compaction_common.cuh | 113 + .../stream_compaction_common.hpp | 38 + cpp/src/stream_compaction/unique.cu | 128 + cpp/src/stream_compaction/unique_count.cu | 76 + .../stream_compaction/unique_count_column.cu | 110 + cpp/src/strings/attributes.cu | 284 + cpp/src/strings/capitalize.cu | 315 + cpp/src/strings/case.cu | 337 + cpp/src/strings/char_types/char_cases.cu | 201 + cpp/src/strings/char_types/char_cases.h | 5236 ++++++ cpp/src/strings/char_types/char_flags.h | 3488 ++++ cpp/src/strings/char_types/char_types.cu | 239 + cpp/src/strings/combine/concatenate.cu | 295 + cpp/src/strings/combine/join.cu | 190 + cpp/src/strings/combine/join_list_elements.cu | 336 + cpp/src/strings/contains.cu | 151 + cpp/src/strings/convert/convert_booleans.cu | 166 + cpp/src/strings/convert/convert_datetime.cu | 1178 ++ cpp/src/strings/convert/convert_durations.cu | 740 + .../strings/convert/convert_fixed_point.cu | 351 + cpp/src/strings/convert/convert_floats.cu | 484 + cpp/src/strings/convert/convert_hex.cu | 304 + cpp/src/strings/convert/convert_integers.cu | 417 + cpp/src/strings/convert/convert_ipv4.cu | 240 + cpp/src/strings/convert/convert_lists.cu | 243 + cpp/src/strings/convert/convert_urls.cu | 439 + cpp/src/strings/copying/concatenate.cu | 316 + cpp/src/strings/copying/copying.cu | 88 + cpp/src/strings/copying/shift.cu | 134 + cpp/src/strings/count_matches.cu | 81 + cpp/src/strings/count_matches.hpp | 52 + cpp/src/strings/extract/extract.cu | 143 + cpp/src/strings/extract/extract_all.cu | 176 + cpp/src/strings/filling/fill.cu | 110 + cpp/src/strings/filter_chars.cu | 169 + cpp/src/strings/json/json_path.cu | 1057 ++ cpp/src/strings/like.cu | 204 + cpp/src/strings/padding.cu | 186 + cpp/src/strings/regex/regcomp.cpp | 1274 ++ cpp/src/strings/regex/regcomp.h | 162 + cpp/src/strings/regex/regex.cuh | 311 + cpp/src/strings/regex/regex.inl | 431 + cpp/src/strings/regex/regex_program.cpp | 63 + cpp/src/strings/regex/regex_program_impl.h | 51 + cpp/src/strings/regex/regexec.cpp | 172 + cpp/src/strings/regex/utilities.cuh | 156 + cpp/src/strings/repeat_strings.cu | 286 + cpp/src/strings/replace/backref_re.cu | 158 + cpp/src/strings/replace/backref_re.cuh | 123 + cpp/src/strings/replace/multi.cu | 500 + cpp/src/strings/replace/multi_re.cu | 216 + cpp/src/strings/replace/replace.cu | 772 + cpp/src/strings/replace/replace_re.cu | 145 + cpp/src/strings/reverse.cu | 89 + cpp/src/strings/search/find.cu | 676 + cpp/src/strings/search/find_multiple.cu | 99 + cpp/src/strings/search/findall.cu | 145 + cpp/src/strings/slice.cu | 268 + cpp/src/strings/split/partition.cu | 259 + cpp/src/strings/split/split.cu | 452 + cpp/src/strings/split/split.cuh | 406 + cpp/src/strings/split/split_re.cu | 377 + cpp/src/strings/split/split_record.cu | 225 + cpp/src/strings/strings_column_factories.cu | 183 + cpp/src/strings/strings_column_view.cpp | 67 + cpp/src/strings/strings_scalar_factories.cpp | 31 + cpp/src/strings/strip.cu | 96 + cpp/src/strings/translate.cu | 136 + cpp/src/strings/utilities.cu | 133 + cpp/src/strings/wrap.cu | 143 + cpp/src/structs/copying/concatenate.cu | 79 + cpp/src/structs/structs_column_factories.cu | 63 + cpp/src/structs/structs_column_view.cpp | 52 + cpp/src/structs/utilities.cpp | 437 + cpp/src/table/row_operators.cu | 876 + cpp/src/table/table.cpp | 92 + cpp/src/table/table_device_view.cu | 56 + cpp/src/table/table_view.cpp | 144 + cpp/src/text/detokenize.cu | 181 + cpp/src/text/edit_distance.cu | 319 + cpp/src/text/generate_ngrams.cu | 337 + cpp/src/text/jaccard.cu | 307 + cpp/src/text/minhash.cu | 279 + cpp/src/text/ngrams_tokenize.cu | 275 + cpp/src/text/normalize.cu | 263 + cpp/src/text/replace.cu | 299 + cpp/src/text/stemmer.cu | 285 + cpp/src/text/subword/bpe_tokenizer.cu | 564 + cpp/src/text/subword/bpe_tokenizer.cuh | 114 + cpp/src/text/subword/data_normalizer.cu | 349 + .../text/subword/detail/codepoint_metadata.ah | 13466 ++++++++++++++++ cpp/src/text/subword/detail/cp_data.h | 37 + .../text/subword/detail/data_normalizer.hpp | 101 + cpp/src/text/subword/detail/hash_utils.cuh | 172 + .../text/subword/detail/tokenizer_utils.cuh | 76 + .../subword/detail/wordpiece_tokenizer.hpp | 110 + cpp/src/text/subword/load_hash_file.cu | 297 + cpp/src/text/subword/load_merges_file.cu | 174 + cpp/src/text/subword/subword_tokenize.cu | 312 + cpp/src/text/subword/wordpiece_tokenizer.cu | 562 + cpp/src/text/tokenize.cu | 274 + cpp/src/text/utilities/tokenize_ops.cuh | 255 + cpp/src/text/vocabulary_tokenize.cu | 257 + cpp/src/transform/bools_to_mask.cu | 67 + cpp/src/transform/compute_column.cu | 146 + cpp/src/transform/encode.cu | 79 + cpp/src/transform/jit/kernel.cu | 52 + cpp/src/transform/jit/operation-udf.hpp | 20 + cpp/src/transform/mask_to_bools.cu | 69 + cpp/src/transform/nans_to_nulls.cu | 101 + cpp/src/transform/one_hot_encode.cu | 118 + cpp/src/transform/row_bit_count.cu | 546 + cpp/src/transform/transform.cpp | 105 + cpp/src/transpose/transpose.cu | 69 + cpp/src/unary/cast_ops.cu | 424 + cpp/src/unary/math_ops.cu | 651 + cpp/src/unary/nan_ops.cu | 107 + cpp/src/unary/null_ops.cu | 71 + cpp/src/unary/unary_ops.cuh | 79 + cpp/src/utilities/default_stream.cpp | 46 + cpp/src/utilities/linked_column.cpp | 55 + cpp/src/utilities/logger.cpp | 81 + cpp/src/utilities/stacktrace.cpp | 88 + cpp/src/utilities/stream_pool.cpp | 256 + cpp/src/utilities/traits.cpp | 409 + cpp/src/utilities/type_checks.cpp | 78 + cpp/src/utilities/type_dispatcher.cpp | 23 + cpp/tests/CMakeLists.txt | 644 + cpp/tests/ast/transform_tests.cpp | 728 + cpp/tests/binaryop/assert-binops.h | 227 + .../binop-compiled-fixed_point-test.cpp | 845 + cpp/tests/binaryop/binop-compiled-test.cpp | 786 + cpp/tests/binaryop/binop-fixture.hpp | 76 + cpp/tests/binaryop/binop-generic-ptx-test.cpp | 212 + cpp/tests/binaryop/binop-null-test.cpp | 138 + .../binaryop/binop-verify-input-test.cpp | 46 + cpp/tests/binaryop/util/operation.h | 447 + cpp/tests/binaryop/util/runtime_support.h | 29 + cpp/tests/bitmask/bitmask_tests.cpp | 752 + cpp/tests/bitmask/is_element_valid_tests.cpp | 85 + cpp/tests/bitmask/set_nullmask_tests.cu | 141 + cpp/tests/bitmask/valid_if_tests.cu | 103 + cpp/tests/column/bit_cast_test.cpp | 133 + cpp/tests/column/column_device_view_test.cu | 69 + cpp/tests/column/column_test.cpp | 648 + .../column/column_view_device_span_test.cpp | 73 + cpp/tests/column/column_view_shallow_test.cpp | 436 + cpp/tests/column/compound_test.cu | 216 + cpp/tests/column/factories_test.cpp | 770 + cpp/tests/copying/concatenate_tests.cpp | 1690 ++ .../copying/copy_if_else_nested_tests.cpp | 513 + cpp/tests/copying/copy_range_tests.cpp | 520 + cpp/tests/copying/copy_tests.cpp | 719 + cpp/tests/copying/detail_gather_tests.cu | 122 + cpp/tests/copying/gather_list_tests.cpp | 437 + cpp/tests/copying/gather_str_tests.cpp | 159 + cpp/tests/copying/gather_struct_tests.cpp | 472 + cpp/tests/copying/gather_tests.cpp | 244 + cpp/tests/copying/get_value_tests.cpp | 923 ++ cpp/tests/copying/pack_tests.cpp | 542 + .../copying/purge_nonempty_nulls_tests.cpp | 485 + cpp/tests/copying/reverse_tests.cpp | 182 + cpp/tests/copying/sample_tests.cpp | 112 + .../copying/scatter_list_scalar_tests.cpp | 453 + cpp/tests/copying/scatter_list_tests.cpp | 968 ++ .../copying/scatter_struct_scalar_tests.cpp | 267 + cpp/tests/copying/scatter_struct_tests.cpp | 259 + cpp/tests/copying/scatter_tests.cpp | 847 + .../copying/segmented_gather_list_tests.cpp | 621 + cpp/tests/copying/shift_tests.cpp | 282 + cpp/tests/copying/slice_tests.cpp | 532 + cpp/tests/copying/slice_tests.cuh | 223 + cpp/tests/copying/split_tests.cpp | 2531 +++ cpp/tests/copying/utility_tests.cpp | 225 + cpp/tests/datetime/datetime_ops_test.cpp | 1053 ++ .../device_atomics/device_atomics_test.cu | 259 + cpp/tests/dictionary/add_keys_test.cpp | 89 + cpp/tests/dictionary/decode_test.cpp | 68 + cpp/tests/dictionary/encode_test.cpp | 77 + cpp/tests/dictionary/factories_test.cpp | 120 + cpp/tests/dictionary/fill_test.cpp | 80 + cpp/tests/dictionary/gather_test.cpp | 95 + cpp/tests/dictionary/remove_keys_test.cpp | 124 + cpp/tests/dictionary/scatter_test.cpp | 144 + cpp/tests/dictionary/search_test.cpp | 84 + cpp/tests/dictionary/set_keys_test.cpp | 110 + cpp/tests/dictionary/slice_test.cpp | 88 + cpp/tests/encode/encode_tests.cpp | 139 + cpp/tests/error/error_handling_test.cu | 140 + cpp/tests/filling/fill_tests.cpp | 407 + cpp/tests/filling/repeat_tests.cpp | 307 + cpp/tests/filling/sequence_tests.cpp | 179 + cpp/tests/fixed_point/fixed_point_tests.cpp | 577 + cpp/tests/fixed_point/fixed_point_tests.cu | 124 + cpp/tests/groupby/argmax_tests.cpp | 256 + cpp/tests/groupby/argmin_tests.cpp | 255 + cpp/tests/groupby/collect_list_tests.cpp | 229 + cpp/tests/groupby/collect_set_tests.cpp | 399 + cpp/tests/groupby/correlation_tests.cpp | 241 + cpp/tests/groupby/count_scan_tests.cpp | 208 + cpp/tests/groupby/count_tests.cpp | 222 + cpp/tests/groupby/covariance_tests.cpp | 255 + cpp/tests/groupby/groupby_test_util.cpp | 141 + cpp/tests/groupby/groupby_test_util.hpp | 52 + cpp/tests/groupby/groups_tests.cpp | 118 + cpp/tests/groupby/histogram_tests.cpp | 396 + cpp/tests/groupby/keys_tests.cpp | 421 + cpp/tests/groupby/lists_tests.cpp | 107 + cpp/tests/groupby/m2_tests.cpp | 241 + cpp/tests/groupby/max_scan_tests.cpp | 272 + cpp/tests/groupby/max_tests.cpp | 551 + cpp/tests/groupby/mean_tests.cpp | 206 + cpp/tests/groupby/median_tests.cpp | 144 + cpp/tests/groupby/merge_lists_tests.cpp | 386 + cpp/tests/groupby/merge_m2_tests.cpp | 477 + cpp/tests/groupby/merge_sets_tests.cpp | 358 + cpp/tests/groupby/min_scan_tests.cpp | 273 + cpp/tests/groupby/min_tests.cpp | 547 + cpp/tests/groupby/nth_element_tests.cpp | 515 + cpp/tests/groupby/nunique_tests.cpp | 228 + cpp/tests/groupby/product_tests.cpp | 186 + cpp/tests/groupby/quantile_tests.cpp | 214 + cpp/tests/groupby/rank_scan_tests.cpp | 596 + cpp/tests/groupby/replace_nulls_tests.cpp | 369 + cpp/tests/groupby/shift_tests.cpp | 529 + cpp/tests/groupby/std_tests.cpp | 163 + cpp/tests/groupby/structs_tests.cpp | 318 + cpp/tests/groupby/sum_of_squares_tests.cpp | 142 + cpp/tests/groupby/sum_scan_tests.cpp | 172 + cpp/tests/groupby/sum_tests.cpp | 231 + cpp/tests/groupby/tdigest_tests.cu | 508 + cpp/tests/groupby/var_tests.cpp | 199 + cpp/tests/hash_map/map_test.cu | 216 + cpp/tests/hashing/md5_test.cpp | 290 + .../hashing/murmurhash3_x64_128_test.cpp | 113 + cpp/tests/hashing/murmurhash3_x86_32_test.cpp | 405 + .../hashing/spark_murmurhash3_x86_32_test.cpp | 576 + cpp/tests/hashing/xxhash_64_test.cpp | 177 + .../test_default_stream_identification.cu | 39 + cpp/tests/interop/arrow_utils.hpp | 206 + cpp/tests/interop/dlpack_test.cpp | 496 + cpp/tests/interop/from_arrow_test.cpp | 553 + cpp/tests/interop/to_arrow_test.cpp | 684 + cpp/tests/io/arrow_io_source_test.cpp | 102 + cpp/tests/io/comp/decomp_test.cpp | 226 + cpp/tests/io/csv_test.cpp | 2500 +++ cpp/tests/io/file_io_test.cpp | 45 + cpp/tests/io/fst/common.hpp | 82 + cpp/tests/io/fst/fst_test.cu | 212 + cpp/tests/io/fst/logical_stack_test.cu | 249 + cpp/tests/io/json_chunked_reader.cpp | 126 + cpp/tests/io/json_test.cpp | 1994 +++ cpp/tests/io/json_tree.cpp | 895 + cpp/tests/io/json_type_cast_test.cu | 253 + cpp/tests/io/json_writer.cpp | 556 + cpp/tests/io/metadata_utilities.cpp | 65 + cpp/tests/io/nested_json_test.cpp | 926 ++ cpp/tests/io/orc_test.cpp | 1937 +++ cpp/tests/io/parquet_chunked_reader_test.cpp | 1016 ++ cpp/tests/io/parquet_test.cpp | 6735 ++++++++ cpp/tests/io/row_selection_test.cpp | 137 + cpp/tests/io/text/data_chunk_source_test.cpp | 389 + cpp/tests/io/text/multibyte_split_test.cpp | 561 + cpp/tests/io/type_inference_test.cu | 273 + cpp/tests/iterator/README.md | 18 + cpp/tests/iterator/indexalator_test.cu | 96 + cpp/tests/iterator/iterator_tests.cuh | 132 + cpp/tests/iterator/optional_iterator_test.cuh | 95 + .../iterator/optional_iterator_test_chrono.cu | 27 + .../optional_iterator_test_numeric.cu | 134 + cpp/tests/iterator/pair_iterator_test.cuh | 84 + .../iterator/pair_iterator_test_chrono.cu | 24 + .../iterator/pair_iterator_test_numeric.cu | 138 + cpp/tests/iterator/scalar_iterator_test.cu | 81 + .../sizes_to_offsets_iterator_test.cu | 97 + cpp/tests/iterator/value_iterator.cpp | 18 + cpp/tests/iterator/value_iterator_test.cuh | 78 + .../iterator/value_iterator_test_chrono.cu | 27 + .../iterator/value_iterator_test_numeric.cu | 27 + .../iterator/value_iterator_test_strings.cu | 140 + .../iterator/value_iterator_test_transform.cu | 142 + cpp/tests/join/conditional_join_tests.cu | 891 + cpp/tests/join/cross_join_tests.cpp | 141 + cpp/tests/join/join_tests.cpp | 2147 +++ cpp/tests/join/mixed_join_tests.cu | 943 ++ cpp/tests/join/semi_anti_join_tests.cpp | 311 + cpp/tests/labeling/label_bins_tests.cpp | 436 + .../concatenate_list_elements_tests.cpp | 809 + .../lists/combine/concatenate_rows_tests.cpp | 961 ++ cpp/tests/lists/contains_tests.cpp | 1841 +++ cpp/tests/lists/count_elements_tests.cpp | 101 + cpp/tests/lists/explode_tests.cpp | 1177 ++ cpp/tests/lists/extract_tests.cpp | 428 + cpp/tests/lists/reverse_tests.cpp | 472 + cpp/tests/lists/sequences_tests.cpp | 248 + .../difference_distinct_tests.cpp | 670 + .../set_operations/have_overlap_tests.cpp | 551 + .../intersect_distinct_tests.cpp | 637 + .../set_operations/union_distinct_tests.cpp | 622 + cpp/tests/lists/sort_lists_tests.cpp | 291 + .../apply_boolean_mask_tests.cpp | 232 + .../stream_compaction/distinct_tests.cpp | 758 + cpp/tests/merge/merge_dictionary_test.cpp | 150 + cpp/tests/merge/merge_string_test.cpp | 413 + cpp/tests/merge/merge_test.cpp | 908 ++ .../partitioning/hash_partition_test.cpp | 435 + cpp/tests/partitioning/partition_test.cpp | 347 + cpp/tests/partitioning/round_robin_test.cpp | 744 + .../quantiles/percentile_approx_test.cpp | 455 + cpp/tests/quantiles/quantile_test.cpp | 475 + cpp/tests/quantiles/quantiles_test.cpp | 167 + cpp/tests/reductions/collect_ops_tests.cpp | 369 + cpp/tests/reductions/list_rank_test.cpp | 229 + cpp/tests/reductions/rank_tests.cpp | 328 + cpp/tests/reductions/reduction_tests.cpp | 3147 ++++ cpp/tests/reductions/scan_tests.cpp | 755 + cpp/tests/reductions/scan_tests.hpp | 109 + .../reductions/segmented_reduction_tests.cpp | 1494 ++ cpp/tests/reductions/tdigest_tests.cu | 162 + cpp/tests/replace/clamp_test.cpp | 660 + cpp/tests/replace/normalize_replace_tests.cpp | 82 + cpp/tests/replace/replace_nans_tests.cpp | 195 + cpp/tests/replace/replace_nulls_tests.cpp | 752 + cpp/tests/replace/replace_tests.cpp | 624 + cpp/tests/reshape/byte_cast_tests.cpp | 408 + .../reshape/interleave_columns_tests.cpp | 1365 ++ cpp/tests/reshape/tile_tests.cpp | 115 + cpp/tests/rolling/collect_ops_test.cpp | 2288 +++ cpp/tests/rolling/empty_input_test.cpp | 411 + .../rolling/grouped_rolling_range_test.cpp | 941 ++ cpp/tests/rolling/grouped_rolling_test.cpp | 2472 +++ cpp/tests/rolling/lead_lag_test.cpp | 1131 ++ cpp/tests/rolling/nth_element_test.cpp | 632 + cpp/tests/rolling/offset_row_window_test.cpp | 343 + cpp/tests/rolling/range_comparator_test.cu | 147 + .../rolling/range_rolling_window_test.cpp | 596 + .../rolling/range_window_bounds_test.cpp | 217 + cpp/tests/rolling/rolling_test.cpp | 1628 ++ cpp/tests/rolling/rolling_test.hpp | 61 + cpp/tests/round/round_tests.cpp | 785 + cpp/tests/scalar/factories_test.cpp | 182 + cpp/tests/scalar/scalar_device_view_test.cu | 139 + cpp/tests/scalar/scalar_test.cpp | 295 + cpp/tests/search/search_dictionary_test.cpp | 106 + cpp/tests/search/search_list_test.cpp | 670 + cpp/tests/search/search_struct_test.cpp | 749 + cpp/tests/search/search_test.cpp | 1873 +++ cpp/tests/sort/is_sorted_tests.cpp | 500 + cpp/tests/sort/rank_test.cpp | 921 ++ cpp/tests/sort/segmented_sort_tests.cpp | 340 + cpp/tests/sort/sort_nested_types_tests.cpp | 463 + cpp/tests/sort/sort_test.cpp | 1116 ++ cpp/tests/sort/stable_sort_tests.cpp | 291 + .../apply_boolean_mask_tests.cpp | 375 + .../distinct_count_tests.cpp | 367 + .../stream_compaction/distinct_tests.cpp | 1414 ++ .../stream_compaction/drop_nans_tests.cpp | 139 + .../stream_compaction/drop_nulls_tests.cpp | 228 + .../stable_distinct_tests.cpp | 1354 ++ .../stream_compaction/unique_count_tests.cpp | 288 + cpp/tests/stream_compaction/unique_tests.cpp | 811 + cpp/tests/streams/concatenate_test.cpp | 51 + cpp/tests/streams/copying_test.cpp | 339 + cpp/tests/streams/dictionary_test.cpp | 105 + cpp/tests/streams/filling_test.cpp | 76 + cpp/tests/streams/groupby_test.cpp | 67 + cpp/tests/streams/hash_test.cpp | 54 + cpp/tests/streams/interop_test.cpp | 68 + cpp/tests/streams/replace_test.cpp | 109 + cpp/tests/streams/search_test.cpp | 69 + cpp/tests/streams/sorting_test.cpp | 132 + cpp/tests/streams/strings/case_test.cpp | 55 + cpp/tests/streams/strings/find_test.cpp | 49 + cpp/tests/streams/text/ngrams_test.cpp | 59 + cpp/tests/strings/array_tests.cpp | 225 + cpp/tests/strings/attrs_tests.cpp | 106 + cpp/tests/strings/booleans_tests.cpp | 85 + cpp/tests/strings/case_tests.cpp | 291 + cpp/tests/strings/chars_types_tests.cpp | 336 + .../strings/combine/concatenate_tests.cpp | 515 + .../combine/join_list_elements_tests.cpp | 583 + .../strings/combine/join_strings_tests.cpp | 100 + cpp/tests/strings/concatenate_tests.cpp | 109 + cpp/tests/strings/contains_tests.cpp | 813 + cpp/tests/strings/datetime_tests.cpp | 641 + cpp/tests/strings/durations_tests.cpp | 765 + cpp/tests/strings/extract_tests.cpp | 322 + cpp/tests/strings/factories_test.cu | 229 + cpp/tests/strings/fill_tests.cpp | 85 + cpp/tests/strings/find_multiple_tests.cpp | 82 + cpp/tests/strings/find_tests.cpp | 438 + cpp/tests/strings/findall_tests.cpp | 122 + cpp/tests/strings/fixed_point_tests.cpp | 345 + cpp/tests/strings/floats_tests.cpp | 209 + cpp/tests/strings/format_lists_tests.cpp | 164 + cpp/tests/strings/integers_tests.cpp | 458 + cpp/tests/strings/ipv4_tests.cpp | 111 + cpp/tests/strings/json_tests.cpp | 1024 ++ cpp/tests/strings/like_tests.cpp | 208 + cpp/tests/strings/pad_tests.cpp | 221 + cpp/tests/strings/repeat_strings_tests.cpp | 587 + cpp/tests/strings/replace_regex_tests.cpp | 442 + cpp/tests/strings/replace_tests.cpp | 480 + cpp/tests/strings/reverse_tests.cpp | 53 + cpp/tests/strings/slice_tests.cpp | 314 + cpp/tests/strings/split_tests.cpp | 944 ++ cpp/tests/strings/strip_tests.cpp | 119 + cpp/tests/strings/translate_tests.cpp | 121 + cpp/tests/strings/urls_tests.cpp | 235 + cpp/tests/structs/structs_column_tests.cpp | 642 + cpp/tests/structs/utilities_tests.cpp | 661 + .../table/experimental_row_operator_tests.cu | 297 + .../table/row_operator_tests_utilities.cu | 214 + .../table/row_operator_tests_utilities.hpp | 45 + cpp/tests/table/row_operators_tests.cpp | 97 + cpp/tests/table/table_tests.cpp | 153 + cpp/tests/table/table_view_tests.cu | 145 + cpp/tests/text/bpe_tests.cpp | 110 + cpp/tests/text/edit_distance_tests.cpp | 101 + cpp/tests/text/jaccard_tests.cpp | 80 + cpp/tests/text/minhash_tests.cpp | 172 + cpp/tests/text/ngrams_tests.cpp | 172 + cpp/tests/text/ngrams_tokenize_tests.cpp | 127 + cpp/tests/text/normalize_tests.cpp | 161 + cpp/tests/text/replace_tests.cpp | 152 + cpp/tests/text/stemmer_tests.cpp | 179 + cpp/tests/text/subword_tests.cpp | 442 + cpp/tests/text/tokenize_tests.cpp | 250 + cpp/tests/transform/bools_to_mask_test.cpp | 92 + .../transform/integration/assert_unary.h | 51 + .../integration/unary_transform_test.cpp | 222 + cpp/tests/transform/mask_to_bools_test.cpp | 79 + cpp/tests/transform/nans_to_null_test.cpp | 138 + cpp/tests/transform/one_hot_encode_tests.cpp | 272 + cpp/tests/transform/row_bit_count_test.cu | 764 + cpp/tests/transpose/transpose_test.cpp | 202 + cpp/tests/types/traits_test.cpp | 134 + cpp/tests/types/type_dispatcher_test.cu | 177 + cpp/tests/unary/cast_tests.cpp | 1088 ++ cpp/tests/unary/math_ops_test.cpp | 438 + cpp/tests/unary/unary_ops_test.cpp | 408 + cpp/tests/utilities/base_fixture.cpp | 34 + cpp/tests/utilities/column_utilities.cu | 1337 ++ cpp/tests/utilities/default_stream.cpp | 27 + cpp/tests/utilities/identify_stream_usage.cpp | 295 + cpp/tests/utilities/table_utilities.cu | 48 + cpp/tests/utilities/tdigest_utilities.cu | 157 + .../column_utilities_tests.cpp | 563 + .../utilities_tests/column_wrapper_tests.cpp | 282 + .../utilities_tests/default_stream_tests.cpp | 25 + .../lists_column_wrapper_tests.cpp | 1561 ++ cpp/tests/utilities_tests/logger_tests.cpp | 77 + cpp/tests/utilities_tests/span_tests.cu | 456 + .../utilities_tests/type_check_tests.cpp | 218 + cpp/tests/utilities_tests/type_list_tests.cpp | 250 + cpp/tests/wrappers/timestamps_test.cu | 213 + dependencies.yaml | 676 + docs/cudf/Makefile | 20 + docs/cudf/README.md | 6 + docs/cudf/make.bat | 36 + docs/cudf/source/_ext/PandasCompat.py | 157 + docs/cudf/source/_static/EMPTY | 0 .../source/_static/RAPIDS-logo-purple.png | Bin 0 -> 22593 bytes docs/cudf/source/_static/colab.png | Bin 0 -> 59280 bytes .../_static/cudf-pandas-execution-flow.png | Bin 0 -> 46508 bytes .../source/_static/cudf-pandas-profile.png | Bin 0 -> 70027 bytes .../source/_static/cudf.pandas-duckdb.png | Bin 0 -> 54659 bytes .../_static/duckdb-benchmark-groupby-join.png | Bin 0 -> 196389 bytes .../source/_templates/autosummary/class.rst | 9 + docs/cudf/source/conf.py | 263 + docs/cudf/source/cudf_pandas/benchmarks.md | 111 + docs/cudf/source/cudf_pandas/faq.md | 161 + docs/cudf/source/cudf_pandas/how-it-works.md | 39 + docs/cudf/source/cudf_pandas/index.rst | 49 + docs/cudf/source/cudf_pandas/usage.md | 64 + .../source/developer_guide/benchmarking.md | 242 + .../developer_guide/contributing_guide.md | 160 + .../source/developer_guide/documentation.md | 232 + .../developer_guide/frame_class_diagram.png | Bin 0 -> 22555 bytes docs/cudf/source/developer_guide/index.md | 30 + .../source/developer_guide/library_design.md | 498 + docs/cudf/source/developer_guide/options.md | 22 + docs/cudf/source/developer_guide/pylibcudf.md | 155 + docs/cudf/source/developer_guide/testing.md | 251 + docs/cudf/source/index.rst | 32 + docs/cudf/source/user_guide/10min.ipynb | 6162 +++++++ docs/cudf/source/user_guide/PandasCompat.md | 5 + .../source/user_guide/api_docs/dataframe.rst | 280 + .../user_guide/api_docs/extension_dtypes.rst | 170 + .../user_guide/api_docs/general_functions.rst | 48 + .../user_guide/api_docs/general_utilities.rst | 13 + .../source/user_guide/api_docs/groupby.rst | 110 + .../cudf/source/user_guide/api_docs/index.rst | 25 + .../user_guide/api_docs/index_objects.rst | 338 + docs/cudf/source/user_guide/api_docs/io.rst | 83 + .../user_guide/api_docs/list_handling.rst | 27 + .../source/user_guide/api_docs/options.rst | 25 + .../source/user_guide/api_docs/series.rst | 405 + .../user_guide/api_docs/string_handling.rst | 105 + .../user_guide/api_docs/struct_handling.rst | 19 + .../user_guide/api_docs/subword_tokenize.rst | 12 + .../source/user_guide/api_docs/window.rst | 25 + docs/cudf/source/user_guide/copy-on-write.md | 179 + .../cudf/source/user_guide/cupy-interop.ipynb | 1429 ++ docs/cudf/source/user_guide/data-types.md | 155 + docs/cudf/source/user_guide/groupby.md | 274 + .../source/user_guide/guide-to-udfs.ipynb | 2658 +++ docs/cudf/source/user_guide/index.md | 19 + docs/cudf/source/user_guide/io/index.md | 9 + docs/cudf/source/user_guide/io/io.md | 180 + docs/cudf/source/user_guide/io/read-json.md | 231 + .../cudf/source/user_guide/missing-data.ipynb | 3578 ++++ docs/cudf/source/user_guide/options.md | 14 + .../source/user_guide/pandas-comparison.md | 188 + .../performance-comparisons/index.md | 8 + .../performance-comparisons.ipynb | 1651 ++ docs/dask_cudf/Makefile | 21 + docs/dask_cudf/make.bat | 35 + .../source/_static/RAPIDS-logo-purple.png | Bin 0 -> 22593 bytes docs/dask_cudf/source/api.rst | 79 + docs/dask_cudf/source/conf.py | 82 + docs/dask_cudf/source/index.rst | 112 + fetch_rapids.cmake | 19 + img/GDF_community.png | Bin 0 -> 81567 bytes img/goai_logo.png | Bin 0 -> 30695 bytes img/rapids_arrow.png | Bin 0 -> 192477 bytes img/rapids_logo.png | Bin 0 -> 113880 bytes java/README.md | 126 + java/buildscripts/build-info | 31 + java/ci/Dockerfile.centos7 | 56 + java/ci/README.md | 50 + java/ci/build-in-docker.sh | 99 + java/dev/cudf_java_styles.xml | 22 + java/pom.xml | 611 + .../main/java/ai/rapids/cudf/Aggregation.java | 993 ++ .../ai/rapids/cudf/Aggregation128Utils.java | 67 + .../ai/rapids/cudf/AggregationOverWindow.java | 73 + .../ai/rapids/cudf/ArrowColumnBuilder.java | 113 + .../java/ai/rapids/cudf/ArrowIPCOptions.java | 65 + .../ai/rapids/cudf/ArrowIPCWriterOptions.java | 138 + .../java/ai/rapids/cudf/AssertEmptyNulls.java | 36 + .../main/java/ai/rapids/cudf/AvroOptions.java | 41 + .../rapids/cudf/BaseDeviceMemoryBuffer.java | 158 + .../main/java/ai/rapids/cudf/BinaryOp.java | 79 + .../java/ai/rapids/cudf/BinaryOperable.java | 577 + .../java/ai/rapids/cudf/BitVectorHelper.java | 113 + .../main/java/ai/rapids/cudf/BufferType.java | 28 + .../main/java/ai/rapids/cudf/CSVOptions.java | 205 + .../java/ai/rapids/cudf/CSVWriterOptions.java | 162 + .../java/ai/rapids/cudf/CaptureGroups.java | 36 + .../main/java/ai/rapids/cudf/ChunkedPack.java | 103 + .../java/ai/rapids/cudf/CloseableArray.java | 106 + .../ai/rapids/cudf/ColumnFilterOptions.java | 65 + .../java/ai/rapids/cudf/ColumnVector.java | 1767 ++ .../main/java/ai/rapids/cudf/ColumnView.java | 5261 ++++++ .../ai/rapids/cudf/ColumnWriterOptions.java | 675 + .../cudf/CompressedMetadataWriterOptions.java | 82 + .../CompressionMetadataWriterOptions.java | 128 + .../java/ai/rapids/cudf/CompressionType.java | 64 + .../rapids/cudf/ContigSplitGroupByResult.java | 114 + .../java/ai/rapids/cudf/ContiguousTable.java | 121 + java/src/main/java/ai/rapids/cudf/CuFile.java | 162 + .../java/ai/rapids/cudf/CuFileBuffer.java | 84 + .../java/ai/rapids/cudf/CuFileDriver.java | 38 + .../java/ai/rapids/cudf/CuFileHandle.java | 44 + .../java/ai/rapids/cudf/CuFileReadHandle.java | 46 + .../ai/rapids/cudf/CuFileResourceCleaner.java | 78 + .../rapids/cudf/CuFileResourceDestroyer.java | 24 + .../ai/rapids/cudf/CuFileWriteHandle.java | 60 + java/src/main/java/ai/rapids/cudf/Cuda.java | 605 + .../java/ai/rapids/cudf/CudaComputeMode.java | 61 + .../java/ai/rapids/cudf/CudaException.java | 329 + .../ai/rapids/cudf/CudaFatalException.java | 35 + .../main/java/ai/rapids/cudf/CudaMemInfo.java | 35 + .../java/ai/rapids/cudf/CudaMemcpyKind.java | 35 + .../java/ai/rapids/cudf/CudaMemoryBuffer.java | 124 + .../cudf/CudfColumnSizeOverflowException.java | 34 + .../java/ai/rapids/cudf/CudfException.java | 41 + java/src/main/java/ai/rapids/cudf/DType.java | 528 + .../java/ai/rapids/cudf/DecimalUtils.java | 164 + .../cudf/DefaultHostMemoryAllocator.java | 36 + .../ai/rapids/cudf/DeviceMemoryBuffer.java | 174 + .../rapids/cudf/DeviceMemoryBufferView.java | 39 + .../main/java/ai/rapids/cudf/GatherMap.java | 85 + .../ai/rapids/cudf/GroupByAggregation.java | 340 + .../cudf/GroupByAggregationOnColumn.java | 56 + .../java/ai/rapids/cudf/GroupByOptions.java | 124 + .../rapids/cudf/GroupByScanAggregation.java | 125 + .../cudf/GroupByScanAggregationOnColumn.java | 64 + .../main/java/ai/rapids/cudf/HashJoin.java | 127 + .../main/java/ai/rapids/cudf/HashType.java | 49 + .../ai/rapids/cudf/HostBufferConsumer.java | 38 + .../ai/rapids/cudf/HostBufferProvider.java | 38 + .../java/ai/rapids/cudf/HostColumnVector.java | 2202 +++ .../ai/rapids/cudf/HostColumnVectorCore.java | 663 + .../ai/rapids/cudf/HostMemoryAllocator.java | 39 + .../java/ai/rapids/cudf/HostMemoryBuffer.java | 668 + .../cudf/HostMemoryBufferNativeUtils.java | 57 + .../ai/rapids/cudf/HostMemoryReservation.java | 32 + .../ai/rapids/cudf/JCudfSerialization.java | 2007 +++ .../main/java/ai/rapids/cudf/JSONOptions.java | 118 + .../main/java/ai/rapids/cudf/MaskState.java | 41 + .../java/ai/rapids/cudf/MemoryBuffer.java | 284 + .../java/ai/rapids/cudf/MemoryCleaner.java | 350 + .../java/ai/rapids/cudf/MixedJoinSize.java | 43 + .../main/java/ai/rapids/cudf/NaNEquality.java | 41 + .../java/ai/rapids/cudf/NativeDepsLoader.java | 232 + .../java/ai/rapids/cudf/NullEquality.java | 33 + .../main/java/ai/rapids/cudf/NullPolicy.java | 33 + .../main/java/ai/rapids/cudf/NvtxColor.java | 34 + .../main/java/ai/rapids/cudf/NvtxRange.java | 69 + .../java/ai/rapids/cudf/NvtxUniqueRange.java | 82 + .../main/java/ai/rapids/cudf/ORCOptions.java | 107 + .../java/ai/rapids/cudf/ORCWriterOptions.java | 42 + .../main/java/ai/rapids/cudf/OrderByArg.java | 59 + .../ai/rapids/cudf/OutOfBoundsPolicy.java | 39 + .../ai/rapids/cudf/PackedColumnMetadata.java | 74 + .../src/main/java/ai/rapids/cudf/PadSide.java | 42 + .../ai/rapids/cudf/ParquetChunkedReader.java | 155 + .../java/ai/rapids/cudf/ParquetOptions.java | 112 + .../ai/rapids/cudf/ParquetWriterOptions.java | 75 + .../java/ai/rapids/cudf/PartitionedTable.java | 76 + .../java/ai/rapids/cudf/PinnedMemoryPool.java | 462 + .../java/ai/rapids/cudf/QuantileMethod.java | 53 + .../main/java/ai/rapids/cudf/QuoteStyle.java | 33 + java/src/main/java/ai/rapids/cudf/Range.java | 247 + .../ai/rapids/cudf/ReductionAggregation.java | 307 + .../main/java/ai/rapids/cudf/RegexFlag.java | 37 + .../java/ai/rapids/cudf/RegexProgram.java | 134 + .../java/ai/rapids/cudf/ReplacePolicy.java | 46 + .../rapids/cudf/ReplacePolicyWithColumn.java | 46 + java/src/main/java/ai/rapids/cudf/Rmm.java | 584 + .../ai/rapids/cudf/RmmAllocationMode.java | 39 + .../rapids/cudf/RmmArenaMemoryResource.java | 67 + .../cudf/RmmCudaAsyncMemoryResource.java | 59 + .../ai/rapids/cudf/RmmCudaMemoryResource.java | 44 + .../rapids/cudf/RmmDeviceMemoryResource.java | 31 + .../java/ai/rapids/cudf/RmmEventHandler.java | 98 + .../cudf/RmmEventHandlerResourceAdaptor.java | 76 + .../java/ai/rapids/cudf/RmmException.java | 30 + .../cudf/RmmLimitingResourceAdaptor.java | 59 + .../cudf/RmmLoggingResourceAdaptor.java | 58 + .../rapids/cudf/RmmManagedMemoryResource.java | 45 + .../ai/rapids/cudf/RmmPoolMemoryResource.java | 64 + .../cudf/RmmTrackingResourceAdaptor.java | 69 + .../cudf/RmmWrappingDeviceMemoryResource.java | 56 + .../ai/rapids/cudf/RollingAggregation.java | 230 + .../cudf/RollingAggregationOnColumn.java | 65 + .../main/java/ai/rapids/cudf/RoundMode.java | 31 + java/src/main/java/ai/rapids/cudf/Scalar.java | 969 ++ .../java/ai/rapids/cudf/ScanAggregation.java | 107 + .../main/java/ai/rapids/cudf/ScanType.java | 39 + java/src/main/java/ai/rapids/cudf/Schema.java | 103 + .../cudf/SegmentedReductionAggregation.java | 104 + .../ai/rapids/cudf/StreamedTableReader.java | 42 + java/src/main/java/ai/rapids/cudf/Table.java | 4782 ++++++ .../main/java/ai/rapids/cudf/TableDebug.java | 280 + .../java/ai/rapids/cudf/TableWithMeta.java | 67 + .../main/java/ai/rapids/cudf/TableWriter.java | 75 + .../src/main/java/ai/rapids/cudf/UnaryOp.java | 60 + .../ai/rapids/cudf/UnsafeMemoryAccessor.java | 374 + .../java/ai/rapids/cudf/WindowOptions.java | 370 + .../java/ai/rapids/cudf/WriterOptions.java | 72 + .../ai/rapids/cudf/ast/AstExpression.java | 69 + .../ai/rapids/cudf/ast/BinaryOperation.java | 48 + .../ai/rapids/cudf/ast/BinaryOperator.java | 66 + .../ai/rapids/cudf/ast/ColumnReference.java | 51 + .../rapids/cudf/ast/CompiledExpression.java | 110 + .../main/java/ai/rapids/cudf/ast/Literal.java | 276 + .../ai/rapids/cudf/ast/TableReference.java | 47 + .../ai/rapids/cudf/ast/UnaryOperation.java | 44 + .../ai/rapids/cudf/ast/UnaryOperator.java | 69 + .../cudf/nvcomp/BatchedLZ4Compressor.java | 320 + .../cudf/nvcomp/BatchedLZ4Decompressor.java | 199 + .../rapids/cudf/nvcomp/CompressionType.java | 53 + .../cudf/nvcomp/NvcompCudaException.java | 28 + .../rapids/cudf/nvcomp/NvcompException.java | 28 + .../java/ai/rapids/cudf/nvcomp/NvcompJni.java | 117 + java/src/main/native/.clang-format | 204 + java/src/main/native/CMakeLists.txt | 266 + java/src/main/native/clang-format.README | 13 + java/src/main/native/include/jni_utils.hpp | 942 ++ .../main/native/include/maps_column_view.hpp | 152 + .../native/src/Aggregation128UtilsJni.cpp | 47 + java/src/main/native/src/AggregationJni.cpp | 280 + java/src/main/native/src/ChunkedPackJni.cpp | 75 + java/src/main/native/src/ChunkedReaderJni.cpp | 124 + java/src/main/native/src/ColumnVectorJni.cpp | 387 + java/src/main/native/src/ColumnViewJni.cpp | 2575 +++ java/src/main/native/src/ColumnViewJni.cu | 224 + java/src/main/native/src/ColumnViewJni.hpp | 94 + .../main/native/src/CompiledExpression.cpp | 412 + .../main/native/src/ContiguousTableJni.cpp | 146 + java/src/main/native/src/CuFileJni.cpp | 517 + java/src/main/native/src/CudaJni.cpp | 414 + java/src/main/native/src/CudfJni.cpp | 197 + java/src/main/native/src/HashJoinJni.cpp | 45 + .../src/HostMemoryBufferNativeUtilsJni.cpp | 74 + java/src/main/native/src/NvcompJni.cpp | 171 + java/src/main/native/src/NvtxRangeJni.cpp | 42 + .../main/native/src/NvtxUniqueRangeJni.cpp | 46 + .../native/src/PackedColumnMetadataJni.cpp | 41 + java/src/main/native/src/RmmJni.cpp | 765 + java/src/main/native/src/ScalarJni.cpp | 576 + java/src/main/native/src/TableJni.cpp | 3605 +++++ .../main/native/src/aggregation128_utils.cu | 129 + .../main/native/src/aggregation128_utils.hpp | 70 + .../native/src/check_nvcomp_output_sizes.cu | 47 + .../native/src/check_nvcomp_output_sizes.hpp | 33 + .../main/native/src/csv_chunked_writer.hpp | 73 + java/src/main/native/src/cudf_jni_apis.hpp | 138 + java/src/main/native/src/dtype_utils.hpp | 65 + java/src/main/native/src/emptyfile.cpp | 17 + .../src/main/native/src/jni_compiled_expr.hpp | 66 + .../main/native/src/jni_writer_data_sink.hpp | 170 + java/src/main/native/src/maps_column_view.cu | 112 + java/src/main/native/src/nvtx_common.hpp | 27 + java/src/main/native/src/row_conversion.cu | 2341 +++ java/src/main/native/src/row_conversion.hpp | 52 + .../rapids/cudf/Aggregation128UtilsTest.java | 80 + .../ai/rapids/cudf/ArrowColumnVectorTest.java | 337 + .../test/java/ai/rapids/cudf/AssertUtils.java | 272 + .../java/ai/rapids/cudf/BinaryOpTest.java | 1925 +++ .../ai/rapids/cudf/ByteColumnVectorTest.java | 243 + .../ai/rapids/cudf/ColumnBuilderHelper.java | 170 + .../java/ai/rapids/cudf/ColumnVectorTest.java | 6889 ++++++++ .../cudf/ColumnViewNonEmptyNullsTest.java | 138 + .../test/java/ai/rapids/cudf/CuFileTest.java | 157 + .../java/ai/rapids/cudf/CudaFatalTest.java | 67 + .../test/java/ai/rapids/cudf/CudaTest.java | 55 + .../java/ai/rapids/cudf/CudfTestBase.java | 96 + .../rapids/cudf/Date32ColumnVectorTest.java | 72 + .../rapids/cudf/Date64ColumnVectorTest.java | 95 + .../rapids/cudf/DecimalColumnVectorTest.java | 425 + .../rapids/cudf/DoubleColumnVectorTest.java | 174 + .../ai/rapids/cudf/FloatColumnVectorTest.java | 164 + .../java/ai/rapids/cudf/GatherMapTest.java | 102 + .../java/ai/rapids/cudf/HashJoinTest.java | 45 + .../ai/rapids/cudf/HostMemoryBufferTest.java | 301 + .../test/java/ai/rapids/cudf/IfElseTest.java | 1181 ++ .../ai/rapids/cudf/IntColumnVectorTest.java | 227 + .../java/ai/rapids/cudf/LargeTableTest.java | 66 + .../ai/rapids/cudf/LongColumnVectorTest.java | 217 + .../java/ai/rapids/cudf/MemoryBufferTest.java | 220 + .../test/java/ai/rapids/cudf/NvtxTest.java | 72 + .../ai/rapids/cudf/PinnedMemoryPoolTest.java | 134 + .../java/ai/rapids/cudf/ReductionTest.java | 627 + .../ai/rapids/cudf/RmmMemoryAccessorTest.java | 99 + .../src/test/java/ai/rapids/cudf/RmmTest.java | 614 + .../test/java/ai/rapids/cudf/ScalarTest.java | 461 + .../rapids/cudf/SegmentedReductionTest.java | 115 + .../ai/rapids/cudf/ShortColumnVectorTest.java | 178 + .../test/java/ai/rapids/cudf/TableTest.java | 9209 +++++++++++ .../test/java/ai/rapids/cudf/TestUtils.java | 229 + .../cudf/TimestampColumnVectorTest.java | 492 + .../test/java/ai/rapids/cudf/UnaryOpTest.java | 345 + .../rapids/cudf/UnsafeMemoryAccessorTest.java | 100 + .../cudf/ast/CompiledExpressionTest.java | 614 + .../ai/rapids/cudf/nvcomp/NvcompTest.java | 129 + java/src/test/resources/TestOrcFile.orc | Bin 0 -> 1025 bytes java/src/test/resources/acq.parquet | Bin 0 -> 32027 bytes java/src/test/resources/alltypes_plain.avro | Bin 0 -> 868 bytes java/src/test/resources/binary.parquet | Bin 0 -> 653 bytes java/src/test/resources/decimal.parquet | Bin 0 -> 6604 bytes java/src/test/resources/people.json | 3 + .../resources/people_with_invalid_lines.json | 4 + java/src/test/resources/simple.csv | 10 + java/src/test/resources/splittable.parquet | Bin 0 -> 320341 bytes .../test/resources/timestamp-date-test.orc | Bin 0 -> 409 bytes notebooks/10min.ipynb | 1 + notebooks/README.md | 6 + notebooks/cupy-interop.ipynb | 1 + notebooks/guide-to-udfs.ipynb | 1 + notebooks/missing-data.ipynb | 1 + notebooks/performance-comparisons | 1 + print_env.sh | 88 + pyproject.toml | 74 + python/cudf/.coveragerc | 3 + python/cudf/CMakeLists.txt | 121 + python/cudf/LICENSE | 1 + python/cudf/README.md | 1 + python/cudf/benchmarks/API/bench_dataframe.py | 183 + .../benchmarks/API/bench_dataframe_cases.py | 14 + .../benchmarks/API/bench_frame_or_index.py | 97 + python/cudf/benchmarks/API/bench_functions.py | 82 + .../benchmarks/API/bench_functions_cases.py | 148 + python/cudf/benchmarks/API/bench_index.py | 17 + .../benchmarks/API/bench_indexed_frame.py | 30 + .../cudf/benchmarks/API/bench_multiindex.py | 44 + .../cudf/benchmarks/API/bench_rangeindex.py | 47 + python/cudf/benchmarks/API/bench_series.py | 23 + python/cudf/benchmarks/common/config.py | 69 + python/cudf/benchmarks/common/utils.py | 257 + python/cudf/benchmarks/conftest.py | 234 + .../cudf/benchmarks/internal/bench_column.py | 115 + .../internal/bench_dataframe_internal.py | 13 + .../internal/bench_rangeindex_internal.py | 11 + python/cudf/benchmarks/internal/conftest.py | 56 + python/cudf/benchmarks/pytest.ini | 8 + .../cudf/cmake/Modules/ProtobufHelpers.cmake | 50 + python/cudf/cmake/Modules/WheelHelpers.cmake | 71 + python/cudf/cudf/__init__.py | 169 + python/cudf/cudf/_fuzz_testing/__init__.py | 0 python/cudf/cudf/_fuzz_testing/avro.py | 116 + python/cudf/cudf/_fuzz_testing/csv.py | 207 + python/cudf/cudf/_fuzz_testing/fuzzer.py | 116 + python/cudf/cudf/_fuzz_testing/io.py | 109 + python/cudf/cudf/_fuzz_testing/json.py | 191 + python/cudf/cudf/_fuzz_testing/main.py | 45 + python/cudf/cudf/_fuzz_testing/orc.py | 209 + python/cudf/cudf/_fuzz_testing/parquet.py | 169 + .../_fuzz_testing/tests/fuzz_test_avro.py | 38 + .../cudf/_fuzz_testing/tests/fuzz_test_csv.py | 132 + .../_fuzz_testing/tests/fuzz_test_json.py | 94 + .../cudf/_fuzz_testing/tests/fuzz_test_orc.py | 98 + .../_fuzz_testing/tests/fuzz_test_parquet.py | 104 + .../cudf/cudf/_fuzz_testing/tests/readme.md | 100 + python/cudf/cudf/_fuzz_testing/utils.py | 407 + python/cudf/cudf/_lib/CMakeLists.txt | 119 + python/cudf/cudf/_lib/__init__.pxd | 0 python/cudf/cudf/_lib/__init__.py | 47 + python/cudf/cudf/_lib/aggregation.pxd | 33 + python/cudf/cudf/_lib/aggregation.pyx | 983 ++ python/cudf/cudf/_lib/avro.pyx | 55 + python/cudf/cudf/_lib/binaryop.pxd | 5 + python/cudf/cudf/_lib/binaryop.pyx | 239 + python/cudf/cudf/_lib/column.pxd | 42 + python/cudf/cudf/_lib/column.pyi | 75 + python/cudf/cudf/_lib/column.pyx | 788 + python/cudf/cudf/_lib/concat.pyx | 63 + python/cudf/cudf/_lib/copying.pxd | 10 + python/cudf/cudf/_lib/copying.pyx | 852 + python/cudf/cudf/_lib/cpp/CMakeLists.txt | 23 + python/cudf/cudf/_lib/cpp/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/__init__.py | 0 python/cudf/cudf/_lib/cpp/aggregation.pxd | 153 + python/cudf/cudf/_lib/cpp/binaryop.pxd | 88 + python/cudf/cudf/_lib/cpp/column/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/column/__init__.py | 0 python/cudf/cudf/_lib/cpp/column/column.pxd | 31 + .../cudf/_lib/cpp/column/column_factories.pxd | 16 + .../cudf/cudf/_lib/cpp/column/column_view.pxd | 119 + python/cudf/cudf/_lib/cpp/concatenate.pxd | 31 + .../cudf/cudf/_lib/cpp/contiguous_split.pxd | 29 + python/cudf/cudf/_lib/cpp/copying.pxd | 153 + python/cudf/cudf/_lib/cpp/copying.pyx | 0 python/cudf/cudf/_lib/cpp/datetime.pxd | 57 + python/cudf/cudf/_lib/cpp/expressions.pxd | 95 + python/cudf/cudf/_lib/cpp/filling.pxd | 49 + python/cudf/cudf/_lib/cpp/groupby.pxd | 105 + python/cudf/cudf/_lib/cpp/hash.pxd | 24 + python/cudf/cudf/_lib/cpp/interop.pxd | 44 + python/cudf/cudf/_lib/cpp/io/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/io/__init__.py | 0 .../cudf/cudf/_lib/cpp/io/arrow_io_source.pxd | 15 + python/cudf/cudf/_lib/cpp/io/avro.pxd | 44 + python/cudf/cudf/_lib/cpp/io/csv.pxd | 239 + python/cudf/cudf/_lib/cpp/io/data_sink.pxd | 8 + python/cudf/cudf/_lib/cpp/io/datasource.pxd | 8 + python/cudf/cudf/_lib/cpp/io/json.pxd | 142 + python/cudf/cudf/_lib/cpp/io/orc.pxd | 173 + python/cudf/cudf/_lib/cpp/io/orc_metadata.pxd | 19 + python/cudf/cudf/_lib/cpp/io/parquet.pxd | 233 + python/cudf/cudf/_lib/cpp/io/text.pxd | 49 + python/cudf/cudf/_lib/cpp/io/timezone.pxd | 15 + python/cudf/cudf/_lib/cpp/io/types.pxd | 124 + python/cudf/cudf/_lib/cpp/join.pxd | 41 + python/cudf/cudf/_lib/cpp/labeling.pxd | 20 + python/cudf/cudf/_lib/cpp/libcpp/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/libcpp/__init__.py | 0 .../cudf/cudf/_lib/cpp/libcpp/functional.pxd | 7 + python/cudf/cudf/_lib/cpp/libcpp/memory.pxd | 12 + python/cudf/cudf/_lib/cpp/libcpp/optional.pxd | 50 + python/cudf/cudf/_lib/cpp/lists/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/lists/__init__.py | 0 python/cudf/cudf/_lib/cpp/lists/combine.pxd | 29 + python/cudf/cudf/_lib/cpp/lists/contains.pxd | 26 + .../cudf/_lib/cpp/lists/count_elements.pxd | 10 + python/cudf/cudf/_lib/cpp/lists/explode.pxd | 14 + python/cudf/cudf/_lib/cpp/lists/extract.pxd | 18 + python/cudf/cudf/_lib/cpp/lists/gather.pxd | 13 + .../cudf/_lib/cpp/lists/lists_column_view.pxd | 16 + python/cudf/cudf/_lib/cpp/lists/sorting.pxd | 15 + .../cudf/_lib/cpp/lists/stream_compaction.pxd | 16 + python/cudf/cudf/_lib/cpp/merge.pxd | 17 + python/cudf/cudf/_lib/cpp/null_mask.pxd | 46 + python/cudf/cudf/_lib/cpp/nvtext/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/nvtext/__init__.py | 0 .../cudf/_lib/cpp/nvtext/edit_distance.pxd | 19 + .../cudf/_lib/cpp/nvtext/generate_ngrams.pxd | 27 + python/cudf/cudf/_lib/cpp/nvtext/jaccard.pxd | 16 + python/cudf/cudf/_lib/cpp/nvtext/minhash.pxd | 22 + .../cudf/_lib/cpp/nvtext/ngrams_tokenize.pxd | 18 + .../cudf/cudf/_lib/cpp/nvtext/normalize.pxd | 19 + python/cudf/cudf/_lib/cpp/nvtext/replace.pxd | 25 + python/cudf/cudf/_lib/cpp/nvtext/stemmer.pxd | 30 + .../cudf/_lib/cpp/nvtext/subword_tokenize.pxd | 54 + python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd | 55 + python/cudf/cudf/_lib/cpp/partitioning.pxd | 28 + python/cudf/cudf/_lib/cpp/quantiles.pxd | 37 + python/cudf/cudf/_lib/cpp/reduce.pxd | 33 + python/cudf/cudf/_lib/cpp/replace.pxd | 49 + python/cudf/cudf/_lib/cpp/reshape.pxd | 17 + python/cudf/cudf/_lib/cpp/rolling.pxd | 26 + python/cudf/cudf/_lib/cpp/round.pxd | 20 + python/cudf/cudf/_lib/cpp/scalar/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/scalar/__init__.py | 0 python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 75 + python/cudf/cudf/_lib/cpp/search.pxd | 31 + python/cudf/cudf/_lib/cpp/sorting.pxd | 70 + .../cudf/cudf/_lib/cpp/stream_compaction.pxd | 47 + .../cudf/cudf/_lib/cpp/strings/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/strings/__init__.py | 0 .../cudf/cudf/_lib/cpp/strings/attributes.pxd | 18 + .../cudf/cudf/_lib/cpp/strings/capitalize.pxd | 16 + python/cudf/cudf/_lib/cpp/strings/case.pxd | 16 + .../cudf/cudf/_lib/cpp/strings/char_types.pxd | 37 + python/cudf/cudf/_lib/cpp/strings/combine.pxd | 44 + .../cudf/cudf/_lib/cpp/strings/contains.pxd | 28 + .../_lib/cpp/strings/convert/__init__.pxd | 0 .../cudf/_lib/cpp/strings/convert/__init__.py | 0 .../cpp/strings/convert/convert_booleans.pxd | 18 + .../cpp/strings/convert/convert_datetime.pxd | 25 + .../cpp/strings/convert/convert_durations.pxd | 20 + .../strings/convert/convert_fixed_point.pxd | 22 + .../cpp/strings/convert/convert_floats.pxd | 21 + .../cpp/strings/convert/convert_integers.pxd | 32 + .../_lib/cpp/strings/convert/convert_ipv4.pxd | 19 + .../cpp/strings/convert/convert_lists.pxd | 15 + .../_lib/cpp/strings/convert/convert_urls.pxd | 15 + python/cudf/cudf/_lib/cpp/strings/extract.pxd | 15 + python/cudf/cudf/_lib/cpp/strings/find.pxd | 48 + .../cudf/_lib/cpp/strings/find_multiple.pxd | 14 + python/cudf/cudf/_lib/cpp/strings/findall.pxd | 14 + python/cudf/cudf/_lib/cpp/strings/json.pxd | 28 + python/cudf/cudf/_lib/cpp/strings/padding.pxd | 23 + .../cudf/_lib/cpp/strings/regex_flags.pxd | 9 + .../cudf/_lib/cpp/strings/regex_program.pxd | 18 + python/cudf/cudf/_lib/cpp/strings/repeat.pxd | 19 + python/cudf/cudf/_lib/cpp/strings/replace.pxd | 29 + .../cudf/cudf/_lib/cpp/strings/replace_re.pxd | 31 + .../cudf/cudf/_lib/cpp/strings/side_type.pxd | 12 + .../cudf/_lib/cpp/strings/split/__init__.pxd | 0 .../cudf/_lib/cpp/strings/split/__init__.py | 0 .../cudf/_lib/cpp/strings/split/partition.pxd | 21 + .../cudf/_lib/cpp/strings/split/split.pxd | 59 + python/cudf/cudf/_lib/cpp/strings/strip.pxd | 16 + .../cudf/cudf/_lib/cpp/strings/substring.pxd | 21 + .../cudf/cudf/_lib/cpp/strings/translate.pxd | 28 + python/cudf/cudf/_lib/cpp/strings/wrap.pxd | 14 + python/cudf/cudf/_lib/cpp/strings_udf.pxd | 33 + python/cudf/cudf/_lib/cpp/table/__init__.pxd | 0 python/cudf/cudf/_lib/cpp/table/__init__.py | 0 python/cudf/cudf/_lib/cpp/table/table.pxd | 19 + .../cudf/cudf/_lib/cpp/table/table_view.pxd | 23 + python/cudf/cudf/_lib/cpp/transform.pxd | 50 + python/cudf/cudf/_lib/cpp/transpose.pxd | 16 + python/cudf/cudf/_lib/cpp/types.pxd | 106 + python/cudf/cudf/_lib/cpp/types.pyx | 0 python/cudf/cudf/_lib/cpp/unary.pxd | 49 + .../cudf/cudf/_lib/cpp/utilities/__init__.pxd | 0 .../cudf/cudf/_lib/cpp/utilities/__init__.py | 0 .../cudf/_lib/cpp/utilities/host_span.pxd | 9 + .../cudf/cudf/_lib/cpp/wrappers/__init__.pxd | 0 .../cudf/cudf/_lib/cpp/wrappers/__init__.py | 0 .../cudf/cudf/_lib/cpp/wrappers/decimals.pxd | 17 + .../cudf/cudf/_lib/cpp/wrappers/durations.pxd | 10 + .../cudf/_lib/cpp/wrappers/timestamps.pxd | 10 + python/cudf/cudf/_lib/csv.pyx | 602 + python/cudf/cudf/_lib/datetime.pyx | 214 + python/cudf/cudf/_lib/exception_handler.pxd | 69 + python/cudf/cudf/_lib/expressions.pxd | 31 + python/cudf/cudf/_lib/expressions.pyx | 126 + python/cudf/cudf/_lib/filling.pyx | 103 + python/cudf/cudf/_lib/groupby.pyx | 404 + python/cudf/cudf/_lib/hash.pyx | 63 + python/cudf/cudf/_lib/interop.pyx | 277 + python/cudf/cudf/_lib/io/CMakeLists.txt | 27 + python/cudf/cudf/_lib/io/__init__.pxd | 0 python/cudf/cudf/_lib/io/__init__.py | 0 python/cudf/cudf/_lib/io/datasource.pxd | 15 + python/cudf/cudf/_lib/io/datasource.pyx | 27 + python/cudf/cudf/_lib/io/utils.pxd | 21 + python/cudf/cudf/_lib/io/utils.pyx | 183 + python/cudf/cudf/_lib/join.pyx | 74 + python/cudf/cudf/_lib/json.pyx | 268 + python/cudf/cudf/_lib/labeling.pyx | 43 + python/cudf/cudf/_lib/lists.pyx | 246 + python/cudf/cudf/_lib/merge.pyx | 66 + python/cudf/cudf/_lib/null_mask.pyx | 130 + python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 24 + python/cudf/cudf/_lib/nvtext/__init__.pxd | 0 python/cudf/cudf/_lib/nvtext/__init__.py | 0 .../cudf/cudf/_lib/nvtext/edit_distance.pyx | 37 + .../cudf/cudf/_lib/nvtext/generate_ngrams.pyx | 75 + python/cudf/cudf/_lib/nvtext/jaccard.pyx | 31 + python/cudf/cudf/_lib/nvtext/minhash.pyx | 55 + .../cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx | 48 + python/cudf/cudf/_lib/nvtext/normalize.pyx | 37 + python/cudf/cudf/_lib/nvtext/replace.pyx | 87 + python/cudf/cudf/_lib/nvtext/stemmer.pyx | 68 + .../cudf/_lib/nvtext/subword_tokenize.pyx | 61 + python/cudf/cudf/_lib/nvtext/tokenize.pyx | 162 + python/cudf/cudf/_lib/orc.pyx | 494 + python/cudf/cudf/_lib/parquet.pyx | 709 + python/cudf/cudf/_lib/partitioning.pyx | 73 + .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt | 21 + python/cudf/cudf/_lib/pylibcudf/__init__.pxd | 18 + python/cudf/cudf/_lib/pylibcudf/__init__.py | 16 + python/cudf/cudf/_lib/pylibcudf/column.pxd | 50 + python/cudf/cudf/_lib/pylibcudf/column.pyx | 194 + python/cudf/cudf/_lib/pylibcudf/copying.pxd | 15 + python/cudf/cudf/_lib/pylibcudf/copying.pyx | 57 + .../cudf/_lib/pylibcudf/gpumemoryview.pxd | 9 + .../cudf/_lib/pylibcudf/gpumemoryview.pyx | 27 + python/cudf/cudf/_lib/pylibcudf/table.pxd | 18 + python/cudf/cudf/_lib/pylibcudf/table.pyx | 62 + python/cudf/cudf/_lib/pylibcudf/types.pxd | 16 + python/cudf/cudf/_lib/pylibcudf/types.pyx | 45 + python/cudf/cudf/_lib/pylibcudf/utils.pxd | 7 + python/cudf/cudf/_lib/pylibcudf/utils.pyx | 13 + python/cudf/cudf/_lib/quantiles.pyx | 119 + python/cudf/cudf/_lib/reduce.pyx | 163 + python/cudf/cudf/_lib/replace.pyx | 261 + python/cudf/cudf/_lib/reshape.pyx | 40 + python/cudf/cudf/_lib/rolling.pyx | 95 + python/cudf/cudf/_lib/round.pyx | 49 + python/cudf/cudf/_lib/scalar.pxd | 26 + python/cudf/cudf/_lib/scalar.pyx | 318 + python/cudf/cudf/_lib/search.pyx | 101 + python/cudf/cudf/_lib/sort.pyx | 477 + python/cudf/cudf/_lib/stream_compaction.pyx | 200 + python/cudf/cudf/_lib/string_casting.pyx | 800 + python/cudf/cudf/_lib/strings/CMakeLists.txt | 45 + python/cudf/cudf/_lib/strings/__init__.pxd | 0 python/cudf/cudf/_lib/strings/__init__.py | 105 + python/cudf/cudf/_lib/strings/attributes.pyx | 60 + python/cudf/cudf/_lib/strings/capitalize.pyx | 48 + python/cudf/cudf/_lib/strings/case.pyx | 48 + python/cudf/cudf/_lib/strings/char_types.pyx | 202 + python/cudf/cudf/_lib/strings/combine.pyx | 161 + python/cudf/cudf/_lib/strings/contains.pyx | 122 + .../cudf/_lib/strings/convert/CMakeLists.txt | 24 + .../cudf/_lib/strings/convert/__init__.pxd | 0 .../cudf/_lib/strings/convert/__init__.py | 0 .../strings/convert/convert_fixed_point.pyx | 110 + .../_lib/strings/convert/convert_floats.pyx | 30 + .../_lib/strings/convert/convert_integers.pyx | 30 + .../_lib/strings/convert/convert_lists.pyx | 51 + .../_lib/strings/convert/convert_urls.pyx | 69 + python/cudf/cudf/_lib/strings/extract.pyx | 45 + python/cudf/cudf/_lib/strings/find.pyx | 217 + .../cudf/cudf/_lib/strings/find_multiple.pyx | 32 + python/cudf/cudf/_lib/strings/findall.pyx | 39 + python/cudf/cudf/_lib/strings/json.pyx | 83 + python/cudf/cudf/_lib/strings/padding.pyx | 149 + python/cudf/cudf/_lib/strings/repeat.pyx | 53 + python/cudf/cudf/_lib/strings/replace.pyx | 135 + python/cudf/cudf/_lib/strings/replace_re.pyx | 116 + .../cudf/_lib/strings/split/CMakeLists.txt | 22 + .../cudf/cudf/_lib/strings/split/__init__.pxd | 0 .../cudf/cudf/_lib/strings/split/__init__.py | 0 .../cudf/_lib/strings/split/partition.pyx | 73 + python/cudf/cudf/_lib/strings/split/split.pyx | 263 + python/cudf/cudf/_lib/strings/strip.pyx | 98 + python/cudf/cudf/_lib/strings/substring.pyx | 119 + python/cudf/cudf/_lib/strings/translate.pyx | 106 + python/cudf/cudf/_lib/strings/wrap.pyx | 33 + python/cudf/cudf/_lib/strings_udf.pyx | 66 + python/cudf/cudf/_lib/text.pyx | 88 + python/cudf/cudf/_lib/timezone.pyx | 28 + python/cudf/cudf/_lib/transform.pyx | 201 + python/cudf/cudf/_lib/transpose.pyx | 31 + python/cudf/cudf/_lib/types.pxd | 21 + python/cudf/cudf/_lib/types.pyx | 333 + python/cudf/cudf/_lib/unary.pyx | 122 + python/cudf/cudf/_lib/utils.pxd | 21 + python/cudf/cudf/_lib/utils.pyx | 397 + python/cudf/cudf/_typing.py | 48 + python/cudf/cudf/api/__init__.py | 5 + python/cudf/cudf/api/extensions/__init__.py | 16 + python/cudf/cudf/api/extensions/accessor.py | 161 + python/cudf/cudf/api/types.py | 506 + python/cudf/cudf/benchmarks/README.md | 32 + python/cudf/cudf/benchmarks/bench_cudf_io.py | 86 + python/cudf/cudf/benchmarks/conftest.py | 20 + python/cudf/cudf/benchmarks/get_datasets.py | 91 + python/cudf/cudf/comm/__init__.py | 0 python/cudf/cudf/comm/serialize.py | 33 + python/cudf/cudf/core/__init__.py | 1 + python/cudf/cudf/core/_base_index.py | 2117 +++ python/cudf/cudf/core/_compat.py | 13 + python/cudf/cudf/core/_internals/__init__.py | 1 + .../cudf/cudf/core/_internals/expressions.py | 224 + python/cudf/cudf/core/_internals/timezones.py | 254 + python/cudf/cudf/core/_internals/where.py | 127 + python/cudf/cudf/core/abc.py | 186 + python/cudf/cudf/core/algorithms.py | 207 + python/cudf/cudf/core/buffer/__init__.py | 10 + python/cudf/cudf/core/buffer/buffer.py | 388 + .../core/buffer/exposure_tracked_buffer.py | 311 + python/cudf/cudf/core/buffer/spill_manager.py | 446 + .../cudf/cudf/core/buffer/spillable_buffer.py | 598 + python/cudf/cudf/core/buffer/utils.py | 135 + python/cudf/cudf/core/column/__init__.py | 39 + python/cudf/cudf/core/column/categorical.py | 1654 ++ python/cudf/cudf/core/column/column.py | 2835 ++++ python/cudf/cudf/core/column/datetime.py | 737 + python/cudf/cudf/core/column/decimal.py | 443 + python/cudf/cudf/core/column/interval.py | 147 + python/cudf/cudf/core/column/lists.py | 728 + python/cudf/cudf/core/column/methods.py | 104 + python/cudf/cudf/core/column/numerical.py | 791 + .../cudf/cudf/core/column/numerical_base.py | 230 + python/cudf/cudf/core/column/string.py | 6009 +++++++ python/cudf/cudf/core/column/struct.py | 244 + python/cudf/cudf/core/column/timedelta.py | 590 + python/cudf/cudf/core/column_accessor.py | 750 + python/cudf/cudf/core/common.py | 37 + python/cudf/cudf/core/copy_types.py | 171 + python/cudf/cudf/core/cut.py | 308 + python/cudf/cudf/core/dataframe.py | 8163 ++++++++++ python/cudf/cudf/core/df_protocol.py | 900 ++ python/cudf/cudf/core/dtypes.py | 1168 ++ python/cudf/cudf/core/frame.py | 2884 ++++ python/cudf/cudf/core/groupby/__init__.py | 8 + python/cudf/cudf/core/groupby/groupby.py | 2800 ++++ python/cudf/cudf/core/index.py | 3627 +++++ python/cudf/cudf/core/indexed_frame.py | 5476 +++++++ python/cudf/cudf/core/indexing_utils.py | 243 + python/cudf/cudf/core/join/__init__.py | 3 + python/cudf/cudf/core/join/_join_helpers.py | 186 + python/cudf/cudf/core/join/join.py | 451 + python/cudf/cudf/core/missing.py | 9 + python/cudf/cudf/core/mixins/__init__.py | 7 + python/cudf/cudf/core/mixins/binops.py | 72 + python/cudf/cudf/core/mixins/binops.pyi | 39 + python/cudf/cudf/core/mixins/mixin_factory.py | 263 + python/cudf/cudf/core/mixins/reductions.py | 35 + python/cudf/cudf/core/mixins/reductions.pyi | 29 + python/cudf/cudf/core/mixins/scans.py | 16 + python/cudf/cudf/core/mixins/scans.pyi | 11 + python/cudf/cudf/core/multiindex.py | 2117 +++ python/cudf/cudf/core/resample.py | 421 + python/cudf/cudf/core/reshape.py | 1455 ++ python/cudf/cudf/core/scalar.py | 404 + python/cudf/cudf/core/series.py | 5282 ++++++ python/cudf/cudf/core/single_column_frame.py | 438 + python/cudf/cudf/core/subword_tokenizer.py | 298 + python/cudf/cudf/core/tokenize_vocabulary.py | 48 + python/cudf/cudf/core/tools/__init__.py | 0 python/cudf/cudf/core/tools/datetimes.py | 1022 ++ python/cudf/cudf/core/tools/numeric.py | 254 + python/cudf/cudf/core/udf/__init__.py | 9 + python/cudf/cudf/core/udf/_ops.py | 66 + python/cudf/cudf/core/udf/api.py | 27 + python/cudf/cudf/core/udf/groupby_lowering.py | 190 + python/cudf/cudf/core/udf/groupby_typing.py | 284 + python/cudf/cudf/core/udf/groupby_utils.py | 229 + python/cudf/cudf/core/udf/masked_lowering.py | 400 + python/cudf/cudf/core/udf/masked_typing.py | 677 + python/cudf/cudf/core/udf/row_function.py | 164 + python/cudf/cudf/core/udf/scalar_function.py | 69 + python/cudf/cudf/core/udf/strings_lowering.py | 723 + python/cudf/cudf/core/udf/strings_typing.py | 280 + python/cudf/cudf/core/udf/strings_utils.py | 0 python/cudf/cudf/core/udf/templates.py | 80 + python/cudf/cudf/core/udf/utils.py | 362 + python/cudf/cudf/core/window/__init__.py | 3 + python/cudf/cudf/core/window/rolling.py | 562 + python/cudf/cudf/datasets.py | 177 + python/cudf/cudf/errors.py | 9 + python/cudf/cudf/io/__init__.py | 16 + python/cudf/cudf/io/avro.py | 39 + python/cudf/cudf/io/csv.py | 255 + python/cudf/cudf/io/dlpack.py | 92 + python/cudf/cudf/io/feather.py | 32 + python/cudf/cudf/io/hdf.py | 30 + python/cudf/cudf/io/json.py | 244 + python/cudf/cudf/io/orc.py | 474 + python/cudf/cudf/io/parquet.py | 1458 ++ python/cudf/cudf/io/text.py | 43 + python/cudf/cudf/options.py | 355 + python/cudf/cudf/pandas/__init__.py | 34 + python/cudf/cudf/pandas/__main__.py | 91 + python/cudf/cudf/pandas/_wrappers/__init__.py | 5 + python/cudf/cudf/pandas/_wrappers/common.py | 50 + python/cudf/cudf/pandas/_wrappers/numpy.py | 133 + python/cudf/cudf/pandas/_wrappers/pandas.py | 1306 ++ python/cudf/cudf/pandas/annotation.py | 35 + python/cudf/cudf/pandas/fast_slow_proxy.py | 1121 ++ python/cudf/cudf/pandas/magics.py | 33 + python/cudf/cudf/pandas/module_accelerator.py | 620 + python/cudf/cudf/pandas/profiler.py | 322 + .../pandas/scripts/analyze-test-failures.py | 70 + .../cudf/pandas/scripts/conftest-patch.py | 39 + .../cudf/pandas/scripts/run-pandas-tests.sh | 198 + .../pandas/scripts/summarize-test-results.py | 115 + python/cudf/cudf/testing/__init__.py | 7 + python/cudf/cudf/testing/_utils.py | 475 + python/cudf/cudf/testing/dataset_generator.py | 857 + python/cudf/cudf/testing/testing.py | 724 + python/cudf/cudf/tests/conftest.py | 178 + python/cudf/cudf/tests/data/__init__.py | 0 python/cudf/cudf/tests/data/avro/__init__.py | 0 python/cudf/cudf/tests/data/avro/example.avro | Bin 0 -> 313 bytes python/cudf/cudf/tests/data/ipums.pkl | Bin 0 -> 99199 bytes .../data/orc/TestOrcFile.Hive.AllNulls.orc | Bin 0 -> 293 bytes .../orc/TestOrcFile.Hive.EmptyListStripe.orc | Bin 0 -> 311 bytes .../orc/TestOrcFile.Hive.NullStructStripe.orc | Bin 0 -> 292 bytes .../orc/TestOrcFile.Hive.OneEmptyList.orc | Bin 0 -> 174 bytes .../data/orc/TestOrcFile.Hive.OneEmptyMap.orc | Bin 0 -> 156 bytes .../orc/TestOrcFile.Hive.OneNullStruct.orc | Bin 0 -> 158 bytes .../orc/TestOrcFile.NestedStructDataFrame.orc | Bin 0 -> 757 bytes .../TestOrcFile.NoIndStrm.IntWithNulls.orc | Bin 0 -> 101 bytes ...dStrm.StructAndIntWithNulls.TwoStripes.orc | Bin 0 -> 232 bytes ...rcFile.NoIndStrm.StructAndIntWithNulls.orc | Bin 0 -> 193 bytes ...estOrcFile.NoIndStrm.StructWithNoNulls.orc | Bin 0 -> 167 bytes .../cudf/tests/data/orc/TestOrcFile.RLEv2.orc | Bin 0 -> 445 bytes .../orc/TestOrcFile.Spark.EmptyDecompData.orc | Bin 0 -> 373 bytes ...tOrcFile.Spark.NestedNotNullableStruct.orc | Bin 0 -> 310 bytes .../data/orc/TestOrcFile.apache_timestamp.orc | Bin 0 -> 302 bytes ...TestOrcFile.boolean_corruption_PR_6636.orc | Bin 0 -> 4692 bytes ...TestOrcFile.boolean_corruption_PR_6702.orc | Bin 0 -> 7844 bytes .../TestOrcFile.decimal.multiple.values.orc | Bin 0 -> 580 bytes .../tests/data/orc/TestOrcFile.decimal.orc | Bin 0 -> 16337 bytes .../orc/TestOrcFile.decimal.runpos.issue.orc | Bin 0 -> 1365 bytes .../orc/TestOrcFile.decimal.same.values.orc | Bin 0 -> 481 bytes .../data/orc/TestOrcFile.demo-12-zlib.orc | Bin 0 -> 45979 bytes .../tests/data/orc/TestOrcFile.emptyFile.orc | Bin 0 -> 523 bytes .../cudf/tests/data/orc/TestOrcFile.gmt.orc | Bin 0 -> 2582 bytes .../data/orc/TestOrcFile.int16.rle.size.orc | Bin 0 -> 3935 bytes .../TestOrcFile.int_decimal.precision_19.orc | Bin 0 -> 488 bytes .../data/orc/TestOrcFile.largeTimestamps.orc | Bin 0 -> 6327 bytes .../data/orc/TestOrcFile.lima_timezone.orc | Bin 0 -> 741 bytes .../orc/TestOrcFile.nulls-at-end-snappy.orc | Bin 0 -> 366818 bytes .../cudf/tests/data/orc/TestOrcFile.test1.orc | Bin 0 -> 1711 bytes .../data/orc/TestOrcFile.testDate1900.orc | Bin 0 -> 30941 bytes .../data/orc/TestOrcFile.testDate2038.orc | Bin 0 -> 95787 bytes .../orc/TestOrcFile.testPySparkStruct.orc | Bin 0 -> 425 bytes .../tests/data/orc/TestOrcFile.testSnappy.orc | Bin 0 -> 126370 bytes ...tOrcFile.testStringAndBinaryStatistics.orc | Bin 0 -> 341 bytes .../orc/TestOrcFile.testStripeLevelStats.orc | Bin 0 -> 851 bytes .../data/orc/TestOrcFile.timestamp.issue.orc | Bin 0 -> 53588 bytes python/cudf/cudf/tests/data/orc/__init__.py | 0 python/cudf/cudf/tests/data/orc/nodata.orc | Bin 0 -> 242 bytes .../tests/data/orc/uncompressed_snappy.orc | Bin 0 -> 319 bytes .../cudf/cudf/tests/data/parquet/__init__.py | 0 .../tests/data/parquet/binary_decimal.parquet | Bin 0 -> 669 bytes .../tests/data/parquet/brotli_int16.parquet | Bin 0 -> 32453 bytes .../tests/data/parquet/delta_encoding.parquet | Bin 0 -> 577 bytes .../data/parquet/fixed_len_byte_array.parquet | Bin 0 -> 259 bytes .../data/parquet/mixed_compression.parquet | Bin 0 -> 637 bytes .../parquet/nested-unsigned-malformed.parquet | Bin 0 -> 2256 bytes .../data/parquet/nested_column_map.parquet | Bin 0 -> 798 bytes .../parquet/nested_decimal128_file.parquet | Bin 0 -> 1692 bytes .../tests/data/parquet/one_level_list.parquet | Bin 0 -> 255 bytes .../data/parquet/one_level_list2.parquet | Bin 0 -> 656 bytes .../data/parquet/one_level_list3.parquet | Bin 0 -> 214 bytes .../data/parquet/rle_boolean_encoding.parquet | Bin 0 -> 192 bytes .../tests/data/parquet/spark_decimal.parquet | Bin 0 -> 982 bytes .../parquet/spark_timestamp.snappy.parquet | Bin 0 -> 485 bytes .../tests/data/parquet/spark_zstd.parquet | Bin 0 -> 459 bytes .../tests/data/parquet/trailing_nans.parquet | Bin 0 -> 630 bytes .../tests/data/parquet/usec_timestamp.parquet | Bin 0 -> 1128 bytes python/cudf/cudf/tests/data/pkl/__init__.py | 0 .../stringColumnWithRangeIndex_cudf_0.16.pkl | Bin 0 -> 1709 bytes python/cudf/cudf/tests/data/sas/cars.sas7bdat | Bin 0 -> 13312 bytes .../data/subword_tokenizer_data/__init__.py | 0 .../bert_base_cased_sampled/__init__.py | 0 .../bert_base_cased_sampled/vocab-hash.txt | 4382 +++++ .../bert_base_cased_sampled/vocab.txt | 3500 ++++ .../subword_tokenizer_data/test_sentences.txt | 100 + python/cudf/cudf/tests/data/text/__init__.py | 0 python/cudf/cudf/tests/data/text/chess.pgn | 16 + python/cudf/cudf/tests/data/text/chess.pgn.gz | Bin 0 -> 881 bytes python/cudf/cudf/tests/dataframe/__init__.py | 0 .../cudf/tests/dataframe/test_attributes.py | 1 + .../tests/dataframe/test_binary_operations.py | 1 + .../cudf/tests/dataframe/test_combining.py | 1 + .../cudf/tests/dataframe/test_computation.py | 1 + .../cudf/tests/dataframe/test_constructing.py | 1 + .../cudf/tests/dataframe/test_conversion.py | 38 + .../dataframe/test_function_application.py | 1 + .../cudf/tests/dataframe/test_indexing.py | 1 + .../tests/dataframe/test_io_serialization.py | 1 + .../cudf/cudf/tests/dataframe/test_missing.py | 1 + .../cudf/tests/dataframe/test_reindexing.py | 1 + .../cudf/tests/dataframe/test_reshaping.py | 1 + .../cudf/tests/dataframe/test_selecting.py | 1 + .../cudf/cudf/tests/dataframe/test_sorting.py | 1 + .../cudf/tests/dataframe/test_timeseries.py | 1 + .../cudf/tests/general_functions/__init__.py | 0 .../general_functions/test_conversion.py | 1 + .../test_data_manipulation.py | 1 + .../general_functions/test_datetimelike.py | 1 + .../cudf/tests/general_utilities/__init__.py | 0 .../tests/general_utilities/test_testing.py | 1 + python/cudf/cudf/tests/groupby/__init__.py | 0 .../cudf/tests/groupby/test_computation.py | 18 + .../groupby/test_function_application.py | 1 + .../cudf/cudf/tests/groupby/test_indexing.py | 1 + python/cudf/cudf/tests/groupby/test_stats.py | 1 + python/cudf/cudf/tests/indexes/__init__.py | 0 .../cudf/tests/indexes/datetime/__init__.py | 0 .../tests/indexes/datetime/test_components.py | 1 + .../indexes/datetime/test_constructing.py | 1 + .../tests/indexes/datetime/test_conversion.py | 1 + .../tests/indexes/datetime/test_indexing.py | 19 + .../indexes/datetime/test_time_specific.py | 32 + .../cudf/tests/indexes/multiindex/__init__.py | 0 .../indexes/multiindex/test_constructing.py | 1 + .../indexes/multiindex/test_properties.py | 1 + .../indexes/multiindex/test_selecting.py | 1 + .../cudf/tests/indexes/test_categorical.py | 1 + .../cudf/cudf/tests/indexes/test_combining.py | 1 + .../cudf/tests/indexes/test_computation.py | 1 + .../cudf/tests/indexes/test_constructing.py | 1 + .../cudf/tests/indexes/test_conversion.py | 1 + .../cudf/cudf/tests/indexes/test_interval.py | 317 + .../cudf/tests/indexes/test_memory_usage.py | 1 + .../cudf/cudf/tests/indexes/test_missing.py | 1 + .../cudf/cudf/tests/indexes/test_modifying.py | 1 + .../tests/indexes/test_multiindex_compat.py | 1 + .../cudf/cudf/tests/indexes/test_numeric.py | 1 + .../cudf/tests/indexes/test_properties.py | 1 + .../cudf/cudf/tests/indexes/test_selecting.py | 1 + .../cudf/cudf/tests/indexes/test_sorting.py | 1 + .../cudf/tests/indexes/test_time_specific.py | 1 + .../cudf/tests/indexes/timedelta/__init__.py | 0 .../indexes/timedelta/test_components.py | 1 + .../indexes/timedelta/test_constructing.py | 1 + .../indexes/timedelta/test_conversion.py | 1 + .../cudf/cudf/tests/input_output/__init__.py | 0 .../cudf/cudf/tests/input_output/test_avro.py | 1 + .../cudf/cudf/tests/input_output/test_csv.py | 1 + .../cudf/tests/input_output/test_feather.py | 1 + .../cudf/cudf/tests/input_output/test_hdf5.py | 1 + .../cudf/cudf/tests/input_output/test_json.py | 1 + .../cudf/cudf/tests/input_output/test_orc.py | 1 + .../cudf/tests/input_output/test_parquet.py | 1 + .../cudf/cudf/tests/input_output/test_text.py | 164 + python/cudf/cudf/tests/lists/__init__.py | 0 .../cudf/tests/lists/test_list_methods.py | 1 + python/cudf/cudf/tests/options/__init__.py | 0 .../cudf/cudf/tests/options/test_options.py | 1 + python/cudf/cudf/tests/pytest.ini | 10 + python/cudf/cudf/tests/series/__init__.py | 0 .../cudf/cudf/tests/series/test_accessors.py | 1 + .../cudf/cudf/tests/series/test_attributes.py | 1 + .../tests/series/test_binary_operations.py | 1 + .../cudf/cudf/tests/series/test_categorial.py | 1 + .../cudf/cudf/tests/series/test_combining.py | 1 + .../cudf/tests/series/test_computation.py | 1 + .../cudf/tests/series/test_constructing.py | 12 + .../cudf/cudf/tests/series/test_conversion.py | 34 + .../cudf/tests/series/test_datetimelike.py | 192 + .../tests/series/test_function_application.py | 1 + .../cudf/cudf/tests/series/test_indexing.py | 1 + .../tests/series/test_io_serialization.py | 1 + python/cudf/cudf/tests/series/test_missing.py | 1 + .../cudf/cudf/tests/series/test_reshaping.py | 1 + .../cudf/cudf/tests/series/test_selecting.py | 1 + python/cudf/cudf/tests/series/test_sorting.py | 1 + .../cudf/cudf/tests/series/test_timeseries.py | 1 + python/cudf/cudf/tests/strings/__init__.py | 0 .../cudf/tests/strings/test_string_methods.py | 1 + python/cudf/cudf/tests/structs/__init__.py | 0 .../cudf/tests/structs/test_struct_methods.py | 1 + python/cudf/cudf/tests/test_api_types.py | 1124 ++ python/cudf/cudf/tests/test_apply_rows.py | 54 + python/cudf/cudf/tests/test_applymap.py | 48 + python/cudf/cudf/tests/test_array_function.py | 169 + python/cudf/cudf/tests/test_array_ufunc.py | 437 + .../test_avro_reader_fastavro_integration.py | 655 + python/cudf/cudf/tests/test_binops.py | 3337 ++++ python/cudf/cudf/tests/test_buffer.py | 113 + python/cudf/cudf/tests/test_categorical.py | 957 ++ python/cudf/cudf/tests/test_column.py | 551 + .../cudf/cudf/tests/test_column_accessor.py | 287 + python/cudf/cudf/tests/test_compile_udf.py | 67 + python/cudf/cudf/tests/test_concat.py | 1884 +++ python/cudf/cudf/tests/test_contains.py | 123 + python/cudf/cudf/tests/test_copying.py | 429 + python/cudf/cudf/tests/test_csv.py | 2250 +++ python/cudf/cudf/tests/test_cuda_apply.py | 224 + .../cudf/tests/test_cuda_array_interface.py | 229 + .../cudf/cudf/tests/test_custom_accessor.py | 92 + python/cudf/cudf/tests/test_cut.py | 316 + python/cudf/cudf/tests/test_dask.py | 21 + python/cudf/cudf/tests/test_dataframe.py | 10737 ++++++++++++ python/cudf/cudf/tests/test_dataframe_copy.py | 175 + python/cudf/cudf/tests/test_datasets.py | 57 + python/cudf/cudf/tests/test_datetime.py | 2299 +++ python/cudf/cudf/tests/test_decimal.py | 391 + python/cudf/cudf/tests/test_df_protocol.py | 285 + python/cudf/cudf/tests/test_dlpack.py | 216 + python/cudf/cudf/tests/test_doctests.py | 112 + python/cudf/cudf/tests/test_dropna.py | 296 + python/cudf/cudf/tests/test_dtypes.py | 370 + python/cudf/cudf/tests/test_duplicates.py | 631 + .../cudf/tests/test_extension_compilation.py | 335 + python/cudf/cudf/tests/test_factorize.py | 176 + python/cudf/cudf/tests/test_feather.py | 85 + python/cudf/cudf/tests/test_gcs.py | 73 + python/cudf/cudf/tests/test_groupby.py | 3542 ++++ python/cudf/cudf/tests/test_hash_vocab.py | 29 + python/cudf/cudf/tests/test_hdf.py | 151 + python/cudf/cudf/tests/test_hdfs.py | 300 + python/cudf/cudf/tests/test_index.py | 2951 ++++ python/cudf/cudf/tests/test_indexing.py | 2197 +++ python/cudf/cudf/tests/test_interpolate.py | 121 + python/cudf/cudf/tests/test_interval.py | 184 + python/cudf/cudf/tests/test_joining.py | 2272 +++ python/cudf/cudf/tests/test_json.py | 1372 ++ python/cudf/cudf/tests/test_list.py | 932 ++ python/cudf/cudf/tests/test_monotonic.py | 394 + python/cudf/cudf/tests/test_multiindex.py | 2081 +++ python/cudf/cudf/tests/test_no_cuinit.py | 110 + python/cudf/cudf/tests/test_numba_import.py | 48 + python/cudf/cudf/tests/test_numerical.py | 427 + python/cudf/cudf/tests/test_numpy_interop.py | 95 + python/cudf/cudf/tests/test_offset.py | 61 + python/cudf/cudf/tests/test_onehot.py | 174 + python/cudf/cudf/tests/test_options.py | 129 + python/cudf/cudf/tests/test_orc.py | 1939 +++ python/cudf/cudf/tests/test_pack.py | 317 + python/cudf/cudf/tests/test_pandas_interop.py | 90 + python/cudf/cudf/tests/test_parquet.py | 2882 ++++ python/cudf/cudf/tests/test_pickling.py | 151 + python/cudf/cudf/tests/test_quantiles.py | 92 + python/cudf/cudf/tests/test_query.py | 233 + python/cudf/cudf/tests/test_query_mask.py | 71 + python/cudf/cudf/tests/test_rank.py | 151 + python/cudf/cudf/tests/test_reductions.py | 366 + python/cudf/cudf/tests/test_replace.py | 1353 ++ python/cudf/cudf/tests/test_repr.py | 1484 ++ python/cudf/cudf/tests/test_resampling.py | 160 + python/cudf/cudf/tests/test_reshape.py | 808 + python/cudf/cudf/tests/test_rolling.py | 562 + python/cudf/cudf/tests/test_s3.py | 535 + python/cudf/cudf/tests/test_scalar.py | 472 + python/cudf/cudf/tests/test_scan.py | 279 + python/cudf/cudf/tests/test_search.py | 170 + python/cudf/cudf/tests/test_serialize.py | 403 + python/cudf/cudf/tests/test_series.py | 2652 +++ python/cudf/cudf/tests/test_seriesmap.py | 95 + python/cudf/cudf/tests/test_setitem.py | 506 + python/cudf/cudf/tests/test_sorting.py | 399 + python/cudf/cudf/tests/test_sparse_df.py | 18 + python/cudf/cudf/tests/test_spilling.py | 661 + python/cudf/cudf/tests/test_stats.py | 650 + python/cudf/cudf/tests/test_string.py | 3476 ++++ python/cudf/cudf/tests/test_string_udfs.py | 371 + python/cudf/cudf/tests/test_struct.py | 450 + python/cudf/cudf/tests/test_testing.py | 439 + python/cudf/cudf/tests/test_timedelta.py | 1471 ++ python/cudf/cudf/tests/test_transform.py | 34 + python/cudf/cudf/tests/test_udf_binops.py | 51 + python/cudf/cudf/tests/test_udf_masked_ops.py | 994 ++ python/cudf/cudf/tests/test_unaops.py | 131 + python/cudf/cudf/tests/text/__init__.py | 0 .../cudf/tests/text/test_subword_tokenizer.py | 238 + .../cudf/cudf/tests/text/test_text_methods.py | 1026 ++ python/cudf/cudf/tests/window/__init__.py | 0 python/cudf/cudf/tests/window/test_rolling.py | 1 + python/cudf/cudf/utils/__init__.py | 1 + python/cudf/cudf/utils/_numba.py | 191 + python/cudf/cudf/utils/_ptxcompiler.py | 107 + python/cudf/cudf/utils/applyutils.py | 375 + python/cudf/cudf/utils/cudautils.py | 138 + python/cudf/cudf/utils/docutils.py | 352 + python/cudf/cudf/utils/dtypes.py | 857 + python/cudf/cudf/utils/gpu_utils.py | 148 + python/cudf/cudf/utils/hash_vocab_utils.py | 299 + python/cudf/cudf/utils/ioutils.py | 2068 +++ python/cudf/cudf/utils/metadata/__init__.py | 1 + .../metadata/orc_column_statistics.proto | 62 + python/cudf/cudf/utils/queryutils.py | 255 + python/cudf/cudf/utils/string.py | 13 + python/cudf/cudf/utils/utils.py | 431 + .../cudf_pandas_tests/_magics_cpu_test.py | 30 + .../cudf_pandas_tests/_magics_gpu_test.py | 26 + .../cudf_pandas_tests/test_array_function.py | 91 + .../cudf_pandas_tests/test_cudf_pandas.py | 1228 ++ .../test_cudf_pandas_cudf_interop.py | 17 + .../cudf_pandas_tests/test_fast_slow_proxy.py | 522 + python/cudf/cudf_pandas_tests/test_magics.py | 50 + .../cudf/cudf_pandas_tests/test_profiler.py | 70 + python/cudf/pyproject.toml | 171 + python/cudf/setup.py | 11 + python/cudf/udf_cpp/CMakeLists.txt | 113 + python/cudf/udf_cpp/shim.cu | 723 + .../strings/include/cudf/strings/udf/case.cuh | 211 + .../include/cudf/strings/udf/char_types.cuh | 212 + .../include/cudf/strings/udf/numeric.cuh | 72 + .../strings/include/cudf/strings/udf/pad.cuh | 72 + .../include/cudf/strings/udf/replace.cuh | 64 + .../include/cudf/strings/udf/search.cuh | 64 + .../include/cudf/strings/udf/split.cuh | 282 + .../include/cudf/strings/udf/starts_with.cuh | 89 + .../include/cudf/strings/udf/strip.cuh | 80 + .../include/cudf/strings/udf/udf_apis.hpp | 73 + .../include/cudf/strings/udf/udf_string.cuh | 458 + .../include/cudf/strings/udf/udf_string.hpp | 550 + .../strings/src/strings/udf/udf_apis.cu | 123 + python/cudf_kafka/cudf_kafka/__init__.py | 0 .../cudf_kafka/cudf_kafka/_lib/__init__.pxd | 0 python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 84 + python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 102 + python/cudf_kafka/pyproject.toml | 85 + python/cudf_kafka/setup.py | 96 + python/custreamz/.coveragerc | 3 + python/custreamz/LICENSE | 1 + python/custreamz/README.md | 69 + python/custreamz/custreamz/__init__.py | 3 + python/custreamz/custreamz/kafka.py | 291 + python/custreamz/custreamz/tests/__init__.py | 0 python/custreamz/custreamz/tests/conftest.py | 30 + .../custreamz/tests/test_dataframes.py | 940 ++ .../custreamz/custreamz/tests/test_kafka.py | 20 + python/custreamz/pyproject.toml | 100 + python/custreamz/setup.py | 5 + python/dask_cudf/.coveragerc | 3 + python/dask_cudf/LICENSE | 1 + python/dask_cudf/README.md | 1 + python/dask_cudf/dask_cudf/DASK_LICENSE.txt | 31 + python/dask_cudf/dask_cudf/__init__.py | 30 + python/dask_cudf/dask_cudf/accessors.py | 286 + python/dask_cudf/dask_cudf/backends.py | 625 + python/dask_cudf/dask_cudf/core.py | 743 + python/dask_cudf/dask_cudf/groupby.py | 892 + python/dask_cudf/dask_cudf/io/__init__.py | 11 + python/dask_cudf/dask_cudf/io/csv.py | 222 + python/dask_cudf/dask_cudf/io/json.py | 75 + python/dask_cudf/dask_cudf/io/orc.py | 199 + python/dask_cudf/dask_cudf/io/parquet.py | 531 + .../dask_cudf/dask_cudf/io/tests/__init__.py | 0 .../dask_cudf/io/tests/data/orc/sample.orc | Bin 0 -> 30941 bytes .../dask_cudf/io/tests/data/text/sample.pgn | 53 + .../dask_cudf/dask_cudf/io/tests/test_csv.py | 255 + .../dask_cudf/dask_cudf/io/tests/test_json.py | 94 + .../dask_cudf/dask_cudf/io/tests/test_orc.py | 145 + .../dask_cudf/io/tests/test_parquet.py | 597 + .../dask_cudf/dask_cudf/io/tests/test_s3.py | 142 + .../dask_cudf/dask_cudf/io/tests/test_text.py | 32 + python/dask_cudf/dask_cudf/io/text.py | 55 + python/dask_cudf/dask_cudf/sorting.py | 333 + python/dask_cudf/dask_cudf/tests/__init__.py | 0 .../dask_cudf/tests/test_accessor.py | 559 + .../dask_cudf/tests/test_applymap.py | 29 + .../dask_cudf/dask_cudf/tests/test_binops.py | 90 + python/dask_cudf/dask_cudf/tests/test_core.py | 946 ++ .../dask_cudf/tests/test_delayed_io.py | 169 + .../dask_cudf/tests/test_dispatch.py | 97 + .../dask_cudf/tests/test_distributed.py | 99 + .../dask_cudf/dask_cudf/tests/test_groupby.py | 859 + python/dask_cudf/dask_cudf/tests/test_join.py | 361 + .../dask_cudf/dask_cudf/tests/test_onehot.py | 125 + .../dask_cudf/tests/test_reductions.py | 80 + python/dask_cudf/dask_cudf/tests/test_sort.py | 116 + .../dask_cudf/dask_cudf/tests/test_struct.py | 61 + python/dask_cudf/dask_cudf/tests/utils.py | 21 + python/dask_cudf/pyproject.toml | 98 + python/dask_cudf/setup.py | 14 + 2588 files changed, 738118 insertions(+) create mode 100644 .clang-format create mode 100644 .github/CODEOWNERS create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/documentation-request.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/pandas_function_request.md create mode 100644 .github/ISSUE_TEMPLATE/submit-question.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/copy-pr-bot.yaml create mode 100644 .github/labeler.yml create mode 100644 .github/ops-bot.yaml create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/jni-docker-build.yml create mode 100644 .github/workflows/labeler.yml create mode 100644 .github/workflows/pr.yaml create mode 100644 .github/workflows/test.yaml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100755 build.sh create mode 100755 ci/build_cpp.sh create mode 100755 ci/build_docs.sh create mode 100755 ci/build_python.sh create mode 100755 ci/build_wheel.sh create mode 100755 ci/build_wheel_cudf.sh create mode 100755 ci/build_wheel_dask_cudf.sh create mode 100755 ci/check_style.sh create mode 100644 ci/checks/copyright.py create mode 100755 ci/checks/doxygen.sh create mode 100755 ci/cudf_pandas_scripts/pandas-tests/diff.sh create mode 100644 ci/cudf_pandas_scripts/pandas-tests/job-summary.py create mode 100755 ci/cudf_pandas_scripts/pandas-tests/run.sh create mode 100755 ci/cudf_pandas_scripts/run_tests.sh create mode 100755 ci/release/update-version.sh create mode 100755 ci/test_cpp.sh create mode 100644 ci/test_cpp_common.sh create mode 100755 ci/test_cpp_memcheck.sh create mode 100755 ci/test_java.sh create mode 100755 ci/test_notebooks.sh create mode 100755 ci/test_python_common.sh create mode 100755 ci/test_python_cudf.sh create mode 100755 ci/test_python_other.sh create mode 100755 ci/test_wheel_cudf.sh create mode 100755 ci/test_wheel_dask_cudf.sh create mode 100755 ci/utils/nbtest.sh create mode 100644 ci/utils/nbtestlog2junitxml.py create mode 100644 ci/wheel_smoke_test_cudf.py create mode 100644 codecov.yml create mode 100644 conda/environments/all_cuda-118_arch-x86_64.yaml create mode 100644 conda/environments/all_cuda-120_arch-x86_64.yaml create mode 100644 conda/recipes/cudf/build.sh create mode 100644 conda/recipes/cudf/conda_build_config.yaml create mode 100644 conda/recipes/cudf/meta.yaml create mode 100644 conda/recipes/cudf_kafka/build.sh create mode 100644 conda/recipes/cudf_kafka/conda_build_config.yaml create mode 100644 conda/recipes/cudf_kafka/meta.yaml create mode 100644 conda/recipes/custreamz/build.sh create mode 100644 conda/recipes/custreamz/meta.yaml create mode 100644 conda/recipes/dask-cudf/build.sh create mode 100644 conda/recipes/dask-cudf/meta.yaml create mode 100644 conda/recipes/dask-cudf/run_test.sh create mode 100644 conda/recipes/libcudf/build.sh create mode 100644 conda/recipes/libcudf/conda_build_config.yaml create mode 100644 conda/recipes/libcudf/install_libcudf.sh create mode 100644 conda/recipes/libcudf/install_libcudf_example.sh create mode 100644 conda/recipes/libcudf/install_libcudf_kafka.sh create mode 100644 conda/recipes/libcudf/install_libcudf_tests.sh create mode 100644 conda/recipes/libcudf/meta.yaml create mode 100644 cpp/.clang-tidy create mode 100644 cpp/CMakeLists.txt create mode 100644 cpp/benchmarks/CMakeLists.txt create mode 100644 cpp/benchmarks/ast/transform.cpp create mode 100644 cpp/benchmarks/binaryop/binaryop.cpp create mode 100644 cpp/benchmarks/binaryop/compiled_binaryop.cpp create mode 100644 cpp/benchmarks/column/concatenate.cpp create mode 100644 cpp/benchmarks/common/generate_input.cu create mode 100644 cpp/benchmarks/common/generate_input.hpp create mode 100644 cpp/benchmarks/common/random_distribution_factory.cuh create mode 100644 cpp/benchmarks/copying/contiguous_split.cu create mode 100644 cpp/benchmarks/copying/copy_if_else.cpp create mode 100644 cpp/benchmarks/copying/gather.cu create mode 100644 cpp/benchmarks/copying/scatter.cu create mode 100644 cpp/benchmarks/copying/shift.cu create mode 100644 cpp/benchmarks/filling/repeat.cpp create mode 100644 cpp/benchmarks/fixture/benchmark_fixture.hpp create mode 100644 cpp/benchmarks/fixture/nvbench_fixture.hpp create mode 100644 cpp/benchmarks/fixture/nvbench_main.cpp create mode 100644 cpp/benchmarks/fixture/templated_benchmark_fixture.hpp create mode 100644 cpp/benchmarks/groupby/group_common.hpp create mode 100644 cpp/benchmarks/groupby/group_max.cpp create mode 100644 cpp/benchmarks/groupby/group_no_requests.cpp create mode 100644 cpp/benchmarks/groupby/group_nth.cpp create mode 100644 cpp/benchmarks/groupby/group_nunique.cpp create mode 100644 cpp/benchmarks/groupby/group_rank.cpp create mode 100644 cpp/benchmarks/groupby/group_scan.cpp create mode 100644 cpp/benchmarks/groupby/group_shift.cpp create mode 100644 cpp/benchmarks/groupby/group_struct_keys.cpp create mode 100644 cpp/benchmarks/groupby/group_struct_values.cpp create mode 100644 cpp/benchmarks/groupby/group_sum.cpp create mode 100644 cpp/benchmarks/hashing/hash.cpp create mode 100644 cpp/benchmarks/hashing/partition.cpp create mode 100644 cpp/benchmarks/io/csv/csv_reader_input.cpp create mode 100644 cpp/benchmarks/io/csv/csv_reader_options.cpp create mode 100644 cpp/benchmarks/io/csv/csv_writer.cpp create mode 100644 cpp/benchmarks/io/cuio_common.cpp create mode 100644 cpp/benchmarks/io/cuio_common.hpp create mode 100644 cpp/benchmarks/io/fst.cu create mode 100644 cpp/benchmarks/io/json/json_reader_input.cpp create mode 100644 cpp/benchmarks/io/json/json_writer.cpp create mode 100644 cpp/benchmarks/io/json/nested_json.cpp create mode 100644 cpp/benchmarks/io/nvbench_helpers.hpp create mode 100644 cpp/benchmarks/io/orc/orc_reader_input.cpp create mode 100644 cpp/benchmarks/io/orc/orc_reader_options.cpp create mode 100644 cpp/benchmarks/io/orc/orc_writer.cpp create mode 100644 cpp/benchmarks/io/orc/orc_writer_chunks.cpp create mode 100644 cpp/benchmarks/io/parquet/parquet_reader_input.cpp create mode 100644 cpp/benchmarks/io/parquet/parquet_reader_options.cpp create mode 100644 cpp/benchmarks/io/parquet/parquet_writer.cpp create mode 100644 cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp create mode 100644 cpp/benchmarks/io/text/multibyte_split.cpp create mode 100644 cpp/benchmarks/iterator/iterator.cu create mode 100644 cpp/benchmarks/join/conditional_join.cu create mode 100644 cpp/benchmarks/join/generate_input_tables.cuh create mode 100644 cpp/benchmarks/join/join.cu create mode 100644 cpp/benchmarks/join/join_common.hpp create mode 100644 cpp/benchmarks/join/left_join.cu create mode 100644 cpp/benchmarks/join/mixed_join.cu create mode 100644 cpp/benchmarks/lists/copying/scatter_lists.cu create mode 100644 cpp/benchmarks/lists/set_operations.cpp create mode 100644 cpp/benchmarks/merge/merge.cpp create mode 100644 cpp/benchmarks/null_mask/set_null_mask.cpp create mode 100644 cpp/benchmarks/quantiles/quantiles.cpp create mode 100644 cpp/benchmarks/reduction/anyall.cpp create mode 100644 cpp/benchmarks/reduction/dictionary.cpp create mode 100644 cpp/benchmarks/reduction/minmax.cpp create mode 100644 cpp/benchmarks/reduction/rank.cpp create mode 100644 cpp/benchmarks/reduction/reduce.cpp create mode 100644 cpp/benchmarks/reduction/scan.cpp create mode 100644 cpp/benchmarks/reduction/scan_structs.cpp create mode 100644 cpp/benchmarks/reduction/segmented_reduce.cpp create mode 100644 cpp/benchmarks/replace/clamp.cpp create mode 100644 cpp/benchmarks/replace/nans.cpp create mode 100644 cpp/benchmarks/search/contains_scalar.cpp create mode 100644 cpp/benchmarks/search/contains_table.cpp create mode 100644 cpp/benchmarks/search/search.cpp create mode 100644 cpp/benchmarks/sort/nested_types_common.hpp create mode 100644 cpp/benchmarks/sort/rank.cpp create mode 100644 cpp/benchmarks/sort/rank_lists.cpp create mode 100644 cpp/benchmarks/sort/rank_structs.cpp create mode 100644 cpp/benchmarks/sort/rank_types_common.hpp create mode 100644 cpp/benchmarks/sort/segmented_sort.cpp create mode 100644 cpp/benchmarks/sort/sort.cpp create mode 100644 cpp/benchmarks/sort/sort_lists.cpp create mode 100644 cpp/benchmarks/sort/sort_strings.cpp create mode 100644 cpp/benchmarks/sort/sort_structs.cpp create mode 100644 cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp create mode 100644 cpp/benchmarks/stream_compaction/distinct.cpp create mode 100644 cpp/benchmarks/stream_compaction/distinct_count.cpp create mode 100644 cpp/benchmarks/stream_compaction/stable_distinct.cpp create mode 100644 cpp/benchmarks/stream_compaction/unique.cpp create mode 100644 cpp/benchmarks/stream_compaction/unique_count.cpp create mode 100644 cpp/benchmarks/string/case.cpp create mode 100644 cpp/benchmarks/string/char_types.cpp create mode 100644 cpp/benchmarks/string/combine.cpp create mode 100644 cpp/benchmarks/string/contains.cpp create mode 100644 cpp/benchmarks/string/convert_datetime.cpp create mode 100644 cpp/benchmarks/string/convert_durations.cpp create mode 100644 cpp/benchmarks/string/convert_fixed_point.cpp create mode 100644 cpp/benchmarks/string/convert_numerics.cpp create mode 100644 cpp/benchmarks/string/copy.cu create mode 100644 cpp/benchmarks/string/count.cpp create mode 100644 cpp/benchmarks/string/extract.cpp create mode 100644 cpp/benchmarks/string/factory.cu create mode 100644 cpp/benchmarks/string/filter.cpp create mode 100644 cpp/benchmarks/string/find.cpp create mode 100644 cpp/benchmarks/string/gather.cpp create mode 100644 cpp/benchmarks/string/join_strings.cpp create mode 100644 cpp/benchmarks/string/json.cu create mode 100644 cpp/benchmarks/string/lengths.cpp create mode 100644 cpp/benchmarks/string/like.cpp create mode 100644 cpp/benchmarks/string/repeat_strings.cpp create mode 100644 cpp/benchmarks/string/replace.cpp create mode 100644 cpp/benchmarks/string/replace_re.cpp create mode 100644 cpp/benchmarks/string/reverse.cpp create mode 100644 cpp/benchmarks/string/slice.cpp create mode 100644 cpp/benchmarks/string/split.cpp create mode 100644 cpp/benchmarks/string/split_re.cpp create mode 100644 cpp/benchmarks/string/string_bench_args.hpp create mode 100644 cpp/benchmarks/string/translate.cpp create mode 100644 cpp/benchmarks/string/url_decode.cu create mode 100644 cpp/benchmarks/synchronization/synchronization.cpp create mode 100644 cpp/benchmarks/synchronization/synchronization.hpp create mode 100644 cpp/benchmarks/text/edit_distance.cpp create mode 100644 cpp/benchmarks/text/hash_ngrams.cpp create mode 100644 cpp/benchmarks/text/jaccard.cpp create mode 100644 cpp/benchmarks/text/minhash.cpp create mode 100644 cpp/benchmarks/text/ngrams.cpp create mode 100644 cpp/benchmarks/text/normalize.cpp create mode 100644 cpp/benchmarks/text/replace.cpp create mode 100644 cpp/benchmarks/text/subword.cpp create mode 100644 cpp/benchmarks/text/tokenize.cpp create mode 100644 cpp/benchmarks/transpose/transpose.cpp create mode 100644 cpp/benchmarks/type_dispatcher/type_dispatcher.cu create mode 100644 cpp/cmake/Modules/ConfigureCUDA.cmake create mode 100644 cpp/cmake/Modules/FindcuFile.cmake create mode 100644 cpp/cmake/Modules/JitifyPreprocessKernels.cmake create mode 100644 cpp/cmake/config.json create mode 100644 cpp/cmake/thirdparty/get_arrow.cmake create mode 100644 cpp/cmake/thirdparty/get_cucollections.cmake create mode 100644 cpp/cmake/thirdparty/get_cufile.cmake create mode 100644 cpp/cmake/thirdparty/get_dlpack.cmake create mode 100644 cpp/cmake/thirdparty/get_fmt.cmake create mode 100644 cpp/cmake/thirdparty/get_gtest.cmake create mode 100644 cpp/cmake/thirdparty/get_jitify.cmake create mode 100644 cpp/cmake/thirdparty/get_kvikio.cmake create mode 100644 cpp/cmake/thirdparty/get_libcudacxx.cmake create mode 100644 cpp/cmake/thirdparty/get_nvbench.cmake create mode 100644 cpp/cmake/thirdparty/get_nvcomp.cmake create mode 100644 cpp/cmake/thirdparty/get_rmm.cmake create mode 100644 cpp/cmake/thirdparty/get_spdlog.cmake create mode 100644 cpp/cmake/thirdparty/get_thrust.cmake create mode 100644 cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff create mode 100644 cpp/cmake/thirdparty/patches/nvbench_global_setup.diff create mode 100644 cpp/cmake/thirdparty/patches/nvbench_override.json create mode 100644 cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff create mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff create mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff create mode 100644 cpp/cmake/thirdparty/patches/thrust_override.json create mode 100644 cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff create mode 100644 cpp/doxygen/Doxyfile create mode 100644 cpp/doxygen/DoxygenLayout.xml create mode 100644 cpp/doxygen/developer_guide/BENCHMARKING.md create mode 100644 cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md create mode 100644 cpp/doxygen/developer_guide/DOCUMENTATION.md create mode 100644 cpp/doxygen/developer_guide/TESTING.md create mode 100644 cpp/doxygen/developer_guide/strings.png create mode 100644 cpp/doxygen/header.html create mode 100644 cpp/doxygen/main_page.md create mode 100755 cpp/doxygen/modify_fences.sh create mode 100644 cpp/doxygen/regex.md create mode 100644 cpp/doxygen/unicode.md create mode 100644 cpp/examples/README.md create mode 100644 cpp/examples/basic/4stock_5day.csv create mode 100644 cpp/examples/basic/CMakeLists.txt create mode 100644 cpp/examples/basic/README.md create mode 100644 cpp/examples/basic/src/process_csv.cpp create mode 100755 cpp/examples/build.sh create mode 100644 cpp/examples/strings/CMakeLists.txt create mode 100644 cpp/examples/strings/README.md create mode 100644 cpp/examples/strings/common.hpp create mode 100644 cpp/examples/strings/custom_optimized.cu create mode 100644 cpp/examples/strings/custom_prealloc.cu create mode 100644 cpp/examples/strings/custom_with_malloc.cu create mode 100644 cpp/examples/strings/libcudf_apis.cpp create mode 100644 cpp/examples/strings/names.csv create mode 100644 cpp/include/cudf/aggregation.hpp create mode 100644 cpp/include/cudf/ast/detail/expression_evaluator.cuh create mode 100644 cpp/include/cudf/ast/detail/expression_parser.hpp create mode 100644 cpp/include/cudf/ast/detail/expression_transformer.hpp create mode 100644 cpp/include/cudf/ast/detail/operators.hpp create mode 100644 cpp/include/cudf/ast/expressions.hpp create mode 100644 cpp/include/cudf/binaryop.hpp create mode 100644 cpp/include/cudf/column/column.hpp create mode 100644 cpp/include/cudf/column/column_device_view.cuh create mode 100644 cpp/include/cudf/column/column_factories.hpp create mode 100644 cpp/include/cudf/column/column_view.hpp create mode 100644 cpp/include/cudf/concatenate.hpp create mode 100644 cpp/include/cudf/contiguous_split.hpp create mode 100644 cpp/include/cudf/copying.hpp create mode 100644 cpp/include/cudf/datetime.hpp create mode 100644 cpp/include/cudf/detail/aggregation/aggregation.cuh create mode 100644 cpp/include/cudf/detail/aggregation/aggregation.hpp create mode 100644 cpp/include/cudf/detail/aggregation/result_cache.hpp create mode 100644 cpp/include/cudf/detail/binaryop.hpp create mode 100644 cpp/include/cudf/detail/calendrical_month_sequence.cuh create mode 100644 cpp/include/cudf/detail/concatenate.hpp create mode 100644 cpp/include/cudf/detail/concatenate_masks.hpp create mode 100644 cpp/include/cudf/detail/contiguous_split.hpp create mode 100644 cpp/include/cudf/detail/copy.hpp create mode 100644 cpp/include/cudf/detail/copy_if.cuh create mode 100644 cpp/include/cudf/detail/copy_if_else.cuh create mode 100644 cpp/include/cudf/detail/copy_range.cuh create mode 100644 cpp/include/cudf/detail/datetime.hpp create mode 100644 cpp/include/cudf/detail/datetime_ops.cuh create mode 100644 cpp/include/cudf/detail/fill.hpp create mode 100644 cpp/include/cudf/detail/gather.cuh create mode 100644 cpp/include/cudf/detail/gather.hpp create mode 100644 cpp/include/cudf/detail/get_value.cuh create mode 100644 cpp/include/cudf/detail/groupby.hpp create mode 100644 cpp/include/cudf/detail/groupby/group_replace_nulls.hpp create mode 100644 cpp/include/cudf/detail/groupby/sort_helper.hpp create mode 100644 cpp/include/cudf/detail/hash_reduce_by_row.cuh create mode 100644 cpp/include/cudf/detail/indexalator.cuh create mode 100644 cpp/include/cudf/detail/interop.hpp create mode 100644 cpp/include/cudf/detail/is_element_valid.hpp create mode 100644 cpp/include/cudf/detail/iterator.cuh create mode 100644 cpp/include/cudf/detail/join.hpp create mode 100644 cpp/include/cudf/detail/label_bins.hpp create mode 100644 cpp/include/cudf/detail/labeling/label_segments.cuh create mode 100644 cpp/include/cudf/detail/merge.cuh create mode 100644 cpp/include/cudf/detail/normalizing_iterator.cuh create mode 100644 cpp/include/cudf/detail/null_mask.cuh create mode 100644 cpp/include/cudf/detail/null_mask.hpp create mode 100644 cpp/include/cudf/detail/nvtx/nvtx3.hpp create mode 100644 cpp/include/cudf/detail/nvtx/ranges.hpp create mode 100644 cpp/include/cudf/detail/quantiles.hpp create mode 100644 cpp/include/cudf/detail/repeat.hpp create mode 100644 cpp/include/cudf/detail/replace.hpp create mode 100644 cpp/include/cudf/detail/replace/nulls.cuh create mode 100644 cpp/include/cudf/detail/reshape.hpp create mode 100644 cpp/include/cudf/detail/rolling.hpp create mode 100644 cpp/include/cudf/detail/round.hpp create mode 100644 cpp/include/cudf/detail/scan.hpp create mode 100644 cpp/include/cudf/detail/scatter.cuh create mode 100644 cpp/include/cudf/detail/scatter.hpp create mode 100644 cpp/include/cudf/detail/search.hpp create mode 100644 cpp/include/cudf/detail/sequence.hpp create mode 100644 cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh create mode 100644 cpp/include/cudf/detail/sorting.hpp create mode 100644 cpp/include/cudf/detail/stream_compaction.hpp create mode 100644 cpp/include/cudf/detail/structs/utilities.hpp create mode 100644 cpp/include/cudf/detail/tdigest/tdigest.hpp create mode 100644 cpp/include/cudf/detail/timezone.cuh create mode 100644 cpp/include/cudf/detail/timezone.hpp create mode 100644 cpp/include/cudf/detail/transform.hpp create mode 100644 cpp/include/cudf/detail/transpose.hpp create mode 100644 cpp/include/cudf/detail/unary.hpp create mode 100644 cpp/include/cudf/detail/utilities/algorithm.cuh create mode 100644 cpp/include/cudf/detail/utilities/alignment.hpp create mode 100644 cpp/include/cudf/detail/utilities/assert.cuh create mode 100644 cpp/include/cudf/detail/utilities/cuda.cuh create mode 100644 cpp/include/cudf/detail/utilities/default_stream.hpp create mode 100644 cpp/include/cudf/detail/utilities/device_atomics.cuh create mode 100644 cpp/include/cudf/detail/utilities/device_operators.cuh create mode 100644 cpp/include/cudf/detail/utilities/element_argminmax.cuh create mode 100644 cpp/include/cudf/detail/utilities/int_fastdiv.h create mode 100644 cpp/include/cudf/detail/utilities/integer_utils.hpp create mode 100644 cpp/include/cudf/detail/utilities/linked_column.hpp create mode 100644 cpp/include/cudf/detail/utilities/logger.hpp create mode 100644 cpp/include/cudf/detail/utilities/pinned_host_vector.hpp create mode 100644 cpp/include/cudf/detail/utilities/stacktrace.hpp create mode 100644 cpp/include/cudf/detail/utilities/stream_pool.hpp create mode 100644 cpp/include/cudf/detail/utilities/transform_unary_functions.cuh create mode 100644 cpp/include/cudf/detail/utilities/vector_factories.hpp create mode 100644 cpp/include/cudf/detail/utilities/visitor_overload.hpp create mode 100644 cpp/include/cudf/detail/valid_if.cuh create mode 100644 cpp/include/cudf/dictionary/detail/concatenate.hpp create mode 100644 cpp/include/cudf/dictionary/detail/encode.hpp create mode 100644 cpp/include/cudf/dictionary/detail/iterator.cuh create mode 100644 cpp/include/cudf/dictionary/detail/merge.hpp create mode 100644 cpp/include/cudf/dictionary/detail/replace.hpp create mode 100644 cpp/include/cudf/dictionary/detail/search.hpp create mode 100644 cpp/include/cudf/dictionary/detail/update_keys.hpp create mode 100644 cpp/include/cudf/dictionary/dictionary_column_view.hpp create mode 100644 cpp/include/cudf/dictionary/dictionary_factories.hpp create mode 100644 cpp/include/cudf/dictionary/encode.hpp create mode 100644 cpp/include/cudf/dictionary/search.hpp create mode 100644 cpp/include/cudf/dictionary/update_keys.hpp create mode 100644 cpp/include/cudf/filling.hpp create mode 100644 cpp/include/cudf/fixed_point/fixed_point.hpp create mode 100644 cpp/include/cudf/fixed_point/temporary.hpp create mode 100644 cpp/include/cudf/groupby.hpp create mode 100644 cpp/include/cudf/hashing.hpp create mode 100644 cpp/include/cudf/hashing/detail/default_hash.cuh create mode 100644 cpp/include/cudf/hashing/detail/hash_allocator.cuh create mode 100644 cpp/include/cudf/hashing/detail/hash_functions.cuh create mode 100644 cpp/include/cudf/hashing/detail/hashing.hpp create mode 100644 cpp/include/cudf/hashing/detail/helper_functions.cuh create mode 100644 cpp/include/cudf/hashing/detail/murmurhash3_x64_128.cuh create mode 100644 cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh create mode 100644 cpp/include/cudf/interop.hpp create mode 100644 cpp/include/cudf/io/arrow_io_source.hpp create mode 100644 cpp/include/cudf/io/avro.hpp create mode 100644 cpp/include/cudf/io/csv.hpp create mode 100644 cpp/include/cudf/io/data_sink.hpp create mode 100644 cpp/include/cudf/io/datasource.hpp create mode 100644 cpp/include/cudf/io/detail/avro.hpp create mode 100644 cpp/include/cudf/io/detail/csv.hpp create mode 100644 cpp/include/cudf/io/detail/json.hpp create mode 100644 cpp/include/cudf/io/detail/orc.hpp create mode 100644 cpp/include/cudf/io/detail/parquet.hpp create mode 100644 cpp/include/cudf/io/detail/tokenize_json.hpp create mode 100644 cpp/include/cudf/io/detail/utils.hpp create mode 100644 cpp/include/cudf/io/json.hpp create mode 100644 cpp/include/cudf/io/orc.hpp create mode 100644 cpp/include/cudf/io/orc_metadata.hpp create mode 100644 cpp/include/cudf/io/orc_types.hpp create mode 100644 cpp/include/cudf/io/parquet.hpp create mode 100644 cpp/include/cudf/io/parquet_metadata.hpp create mode 100644 cpp/include/cudf/io/text/byte_range_info.hpp create mode 100644 cpp/include/cudf/io/text/data_chunk_source.hpp create mode 100644 cpp/include/cudf/io/text/data_chunk_source_factories.hpp create mode 100644 cpp/include/cudf/io/text/detail/bgzip_utils.hpp create mode 100644 cpp/include/cudf/io/text/detail/multistate.hpp create mode 100644 cpp/include/cudf/io/text/detail/tile_state.hpp create mode 100644 cpp/include/cudf/io/text/detail/trie.hpp create mode 100644 cpp/include/cudf/io/text/multibyte_split.hpp create mode 100644 cpp/include/cudf/io/types.hpp create mode 100644 cpp/include/cudf/join.hpp create mode 100644 cpp/include/cudf/labeling/label_bins.hpp create mode 100644 cpp/include/cudf/lists/combine.hpp create mode 100644 cpp/include/cudf/lists/contains.hpp create mode 100644 cpp/include/cudf/lists/count_elements.hpp create mode 100644 cpp/include/cudf/lists/detail/combine.hpp create mode 100644 cpp/include/cudf/lists/detail/concatenate.hpp create mode 100644 cpp/include/cudf/lists/detail/contains.hpp create mode 100644 cpp/include/cudf/lists/detail/copying.hpp create mode 100644 cpp/include/cudf/lists/detail/dremel.hpp create mode 100644 cpp/include/cudf/lists/detail/extract.hpp create mode 100644 cpp/include/cudf/lists/detail/gather.cuh create mode 100644 cpp/include/cudf/lists/detail/interleave_columns.hpp create mode 100644 cpp/include/cudf/lists/detail/lists_column_factories.hpp create mode 100644 cpp/include/cudf/lists/detail/reverse.hpp create mode 100644 cpp/include/cudf/lists/detail/scatter.cuh create mode 100644 cpp/include/cudf/lists/detail/scatter_helper.cuh create mode 100644 cpp/include/cudf/lists/detail/set_operations.hpp create mode 100644 cpp/include/cudf/lists/detail/sorting.hpp create mode 100644 cpp/include/cudf/lists/detail/stream_compaction.hpp create mode 100644 cpp/include/cudf/lists/explode.hpp create mode 100644 cpp/include/cudf/lists/extract.hpp create mode 100644 cpp/include/cudf/lists/filling.hpp create mode 100644 cpp/include/cudf/lists/gather.hpp create mode 100644 cpp/include/cudf/lists/list_device_view.cuh create mode 100644 cpp/include/cudf/lists/list_view.hpp create mode 100644 cpp/include/cudf/lists/lists_column_device_view.cuh create mode 100644 cpp/include/cudf/lists/lists_column_view.hpp create mode 100644 cpp/include/cudf/lists/reverse.hpp create mode 100644 cpp/include/cudf/lists/set_operations.hpp create mode 100644 cpp/include/cudf/lists/sorting.hpp create mode 100644 cpp/include/cudf/lists/stream_compaction.hpp create mode 100644 cpp/include/cudf/merge.hpp create mode 100644 cpp/include/cudf/null_mask.hpp create mode 100644 cpp/include/cudf/partitioning.hpp create mode 100644 cpp/include/cudf/quantiles.hpp create mode 100644 cpp/include/cudf/reduction.hpp create mode 100644 cpp/include/cudf/reduction/detail/histogram.hpp create mode 100644 cpp/include/cudf/reduction/detail/reduction.cuh create mode 100644 cpp/include/cudf/reduction/detail/reduction.hpp create mode 100644 cpp/include/cudf/reduction/detail/reduction_functions.hpp create mode 100644 cpp/include/cudf/reduction/detail/reduction_operators.cuh create mode 100644 cpp/include/cudf/reduction/detail/segmented_reduction.cuh create mode 100644 cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp create mode 100644 cpp/include/cudf/replace.hpp create mode 100644 cpp/include/cudf/reshape.hpp create mode 100644 cpp/include/cudf/rolling.hpp create mode 100644 cpp/include/cudf/rolling/range_window_bounds.hpp create mode 100644 cpp/include/cudf/round.hpp create mode 100644 cpp/include/cudf/scalar/scalar.hpp create mode 100644 cpp/include/cudf/scalar/scalar_device_view.cuh create mode 100644 cpp/include/cudf/scalar/scalar_factories.hpp create mode 100644 cpp/include/cudf/search.hpp create mode 100644 cpp/include/cudf/sorting.hpp create mode 100644 cpp/include/cudf/stream_compaction.hpp create mode 100644 cpp/include/cudf/strings/attributes.hpp create mode 100644 cpp/include/cudf/strings/capitalize.hpp create mode 100644 cpp/include/cudf/strings/case.hpp create mode 100644 cpp/include/cudf/strings/char_types/char_cases.hpp create mode 100644 cpp/include/cudf/strings/char_types/char_types.hpp create mode 100644 cpp/include/cudf/strings/char_types/char_types_enum.hpp create mode 100644 cpp/include/cudf/strings/combine.hpp create mode 100644 cpp/include/cudf/strings/contains.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_booleans.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_datetime.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_durations.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_fixed_point.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_floats.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_integers.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_ipv4.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_lists.hpp create mode 100644 cpp/include/cudf/strings/convert/convert_urls.hpp create mode 100644 cpp/include/cudf/strings/detail/char_tables.hpp create mode 100644 cpp/include/cudf/strings/detail/combine.hpp create mode 100644 cpp/include/cudf/strings/detail/concatenate.hpp create mode 100644 cpp/include/cudf/strings/detail/convert/fixed_point.cuh create mode 100644 cpp/include/cudf/strings/detail/convert/fixed_point_to_string.cuh create mode 100644 cpp/include/cudf/strings/detail/convert/int_to_string.cuh create mode 100644 cpp/include/cudf/strings/detail/convert/is_float.cuh create mode 100644 cpp/include/cudf/strings/detail/convert/string_to_float.cuh create mode 100644 cpp/include/cudf/strings/detail/convert/string_to_int.cuh create mode 100644 cpp/include/cudf/strings/detail/converters.hpp create mode 100644 cpp/include/cudf/strings/detail/copy_if_else.cuh create mode 100644 cpp/include/cudf/strings/detail/copy_range.cuh create mode 100644 cpp/include/cudf/strings/detail/copying.hpp create mode 100644 cpp/include/cudf/strings/detail/fill.hpp create mode 100644 cpp/include/cudf/strings/detail/gather.cuh create mode 100644 cpp/include/cudf/strings/detail/json.hpp create mode 100644 cpp/include/cudf/strings/detail/merge.cuh create mode 100644 cpp/include/cudf/strings/detail/pad_impl.cuh create mode 100644 cpp/include/cudf/strings/detail/replace.hpp create mode 100644 cpp/include/cudf/strings/detail/scatter.cuh create mode 100644 cpp/include/cudf/strings/detail/split_utils.cuh create mode 100644 cpp/include/cudf/strings/detail/strings_children.cuh create mode 100644 cpp/include/cudf/strings/detail/strings_column_factories.cuh create mode 100644 cpp/include/cudf/strings/detail/strip.cuh create mode 100644 cpp/include/cudf/strings/detail/utf8.hpp create mode 100644 cpp/include/cudf/strings/detail/utilities.cuh create mode 100644 cpp/include/cudf/strings/detail/utilities.hpp create mode 100644 cpp/include/cudf/strings/extract.hpp create mode 100644 cpp/include/cudf/strings/find.hpp create mode 100644 cpp/include/cudf/strings/find_multiple.hpp create mode 100644 cpp/include/cudf/strings/findall.hpp create mode 100644 cpp/include/cudf/strings/json.hpp create mode 100644 cpp/include/cudf/strings/padding.hpp create mode 100644 cpp/include/cudf/strings/regex/flags.hpp create mode 100644 cpp/include/cudf/strings/regex/regex_program.hpp create mode 100644 cpp/include/cudf/strings/repeat_strings.hpp create mode 100644 cpp/include/cudf/strings/replace.hpp create mode 100644 cpp/include/cudf/strings/replace_re.hpp create mode 100644 cpp/include/cudf/strings/reverse.hpp create mode 100644 cpp/include/cudf/strings/side_type.hpp create mode 100644 cpp/include/cudf/strings/slice.hpp create mode 100644 cpp/include/cudf/strings/split/partition.hpp create mode 100644 cpp/include/cudf/strings/split/split.hpp create mode 100644 cpp/include/cudf/strings/split/split_re.hpp create mode 100644 cpp/include/cudf/strings/string_view.cuh create mode 100644 cpp/include/cudf/strings/string_view.hpp create mode 100644 cpp/include/cudf/strings/strings_column_view.hpp create mode 100644 cpp/include/cudf/strings/strip.hpp create mode 100644 cpp/include/cudf/strings/translate.hpp create mode 100644 cpp/include/cudf/strings/wrap.hpp create mode 100644 cpp/include/cudf/structs/detail/concatenate.hpp create mode 100644 cpp/include/cudf/structs/struct_view.hpp create mode 100644 cpp/include/cudf/structs/structs_column_device_view.cuh create mode 100644 cpp/include/cudf/structs/structs_column_view.hpp create mode 100644 cpp/include/cudf/table/experimental/row_operators.cuh create mode 100644 cpp/include/cudf/table/row_operators.cuh create mode 100644 cpp/include/cudf/table/table.hpp create mode 100644 cpp/include/cudf/table/table_device_view.cuh create mode 100644 cpp/include/cudf/table/table_view.hpp create mode 100644 cpp/include/cudf/tdigest/tdigest_column_view.hpp create mode 100644 cpp/include/cudf/timezone.hpp create mode 100644 cpp/include/cudf/transform.hpp create mode 100644 cpp/include/cudf/transpose.hpp create mode 100644 cpp/include/cudf/types.hpp create mode 100644 cpp/include/cudf/unary.hpp create mode 100644 cpp/include/cudf/utilities/bit.hpp create mode 100644 cpp/include/cudf/utilities/default_stream.hpp create mode 100644 cpp/include/cudf/utilities/error.hpp create mode 100644 cpp/include/cudf/utilities/logger.hpp create mode 100644 cpp/include/cudf/utilities/span.hpp create mode 100644 cpp/include/cudf/utilities/traits.cuh create mode 100644 cpp/include/cudf/utilities/traits.hpp create mode 100644 cpp/include/cudf/utilities/type_checks.hpp create mode 100644 cpp/include/cudf/utilities/type_dispatcher.hpp create mode 100644 cpp/include/cudf/wrappers/dictionary.hpp create mode 100644 cpp/include/cudf/wrappers/durations.hpp create mode 100644 cpp/include/cudf/wrappers/timestamps.hpp create mode 100644 cpp/include/cudf_test/base_fixture.hpp create mode 100644 cpp/include/cudf_test/column_utilities.hpp create mode 100644 cpp/include/cudf_test/column_wrapper.hpp create mode 100644 cpp/include/cudf_test/cudf_gtest.hpp create mode 100644 cpp/include/cudf_test/cxxopts.hpp create mode 100644 cpp/include/cudf_test/default_stream.hpp create mode 100644 cpp/include/cudf_test/detail/column_utilities.hpp create mode 100644 cpp/include/cudf_test/file_utilities.hpp create mode 100644 cpp/include/cudf_test/io_metadata_utilities.hpp create mode 100644 cpp/include/cudf_test/iterator_utilities.hpp create mode 100644 cpp/include/cudf_test/print_utilities.cuh create mode 100644 cpp/include/cudf_test/stream_checking_resource_adaptor.hpp create mode 100644 cpp/include/cudf_test/table_utilities.hpp create mode 100644 cpp/include/cudf_test/tdigest_utilities.cuh create mode 100644 cpp/include/cudf_test/timestamp_utilities.cuh create mode 100644 cpp/include/cudf_test/type_list_utilities.hpp create mode 100644 cpp/include/cudf_test/type_lists.hpp create mode 100644 cpp/include/doxygen_groups.h create mode 100644 cpp/include/nvtext/bpe_tokenize.hpp create mode 100644 cpp/include/nvtext/detail/generate_ngrams.hpp create mode 100644 cpp/include/nvtext/detail/load_hash_file.hpp create mode 100644 cpp/include/nvtext/detail/tokenize.hpp create mode 100644 cpp/include/nvtext/edit_distance.hpp create mode 100644 cpp/include/nvtext/generate_ngrams.hpp create mode 100644 cpp/include/nvtext/jaccard.hpp create mode 100644 cpp/include/nvtext/minhash.hpp create mode 100644 cpp/include/nvtext/ngrams_tokenize.hpp create mode 100644 cpp/include/nvtext/normalize.hpp create mode 100644 cpp/include/nvtext/replace.hpp create mode 100644 cpp/include/nvtext/stemmer.hpp create mode 100644 cpp/include/nvtext/subword_tokenize.hpp create mode 100644 cpp/include/nvtext/tokenize.hpp create mode 100644 cpp/libcudf_kafka/CMakeLists.txt create mode 100644 cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake create mode 100644 cpp/libcudf_kafka/cmake/thirdparty/get_rdkafka.cmake create mode 100644 cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp create mode 100644 cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp create mode 100644 cpp/libcudf_kafka/src/kafka_callback.cpp create mode 100644 cpp/libcudf_kafka/src/kafka_consumer.cpp create mode 100644 cpp/libcudf_kafka/tests/CMakeLists.txt create mode 100644 cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp create mode 100644 cpp/scripts/gdb-pretty-printers.py create mode 100644 cpp/scripts/load-pretty-printers.in create mode 100644 cpp/scripts/run-clang-tidy.py create mode 100755 cpp/scripts/run-cmake-format.sh create mode 100755 cpp/scripts/sort_ninja_log.py create mode 100644 cpp/src/aggregation/aggregation.cpp create mode 100644 cpp/src/aggregation/aggregation.cu create mode 100644 cpp/src/aggregation/result_cache.cpp create mode 100644 cpp/src/ast/expression_parser.cpp create mode 100644 cpp/src/ast/expressions.cpp create mode 100644 cpp/src/binaryop/binaryop.cpp create mode 100644 cpp/src/binaryop/compiled/ATan2.cu create mode 100644 cpp/src/binaryop/compiled/Add.cu create mode 100644 cpp/src/binaryop/compiled/BitwiseAnd.cu create mode 100644 cpp/src/binaryop/compiled/BitwiseOr.cu create mode 100644 cpp/src/binaryop/compiled/BitwiseXor.cu create mode 100644 cpp/src/binaryop/compiled/Div.cu create mode 100644 cpp/src/binaryop/compiled/FloorDiv.cu create mode 100644 cpp/src/binaryop/compiled/Greater.cu create mode 100644 cpp/src/binaryop/compiled/GreaterEqual.cu create mode 100644 cpp/src/binaryop/compiled/IntPow.cu create mode 100644 cpp/src/binaryop/compiled/Less.cu create mode 100644 cpp/src/binaryop/compiled/LessEqual.cu create mode 100644 cpp/src/binaryop/compiled/LogBase.cu create mode 100644 cpp/src/binaryop/compiled/LogicalAnd.cu create mode 100644 cpp/src/binaryop/compiled/LogicalOr.cu create mode 100644 cpp/src/binaryop/compiled/Mod.cu create mode 100644 cpp/src/binaryop/compiled/Mul.cu create mode 100644 cpp/src/binaryop/compiled/NullEquals.cu create mode 100644 cpp/src/binaryop/compiled/NullLogicalAnd.cu create mode 100644 cpp/src/binaryop/compiled/NullLogicalOr.cu create mode 100644 cpp/src/binaryop/compiled/NullMax.cu create mode 100644 cpp/src/binaryop/compiled/NullMin.cu create mode 100644 cpp/src/binaryop/compiled/PMod.cu create mode 100644 cpp/src/binaryop/compiled/Pow.cu create mode 100644 cpp/src/binaryop/compiled/PyMod.cu create mode 100644 cpp/src/binaryop/compiled/ShiftLeft.cu create mode 100644 cpp/src/binaryop/compiled/ShiftRight.cu create mode 100644 cpp/src/binaryop/compiled/ShiftRightUnsigned.cu create mode 100644 cpp/src/binaryop/compiled/Sub.cu create mode 100644 cpp/src/binaryop/compiled/TrueDiv.cu create mode 100644 cpp/src/binaryop/compiled/binary_ops.cu create mode 100644 cpp/src/binaryop/compiled/binary_ops.cuh create mode 100644 cpp/src/binaryop/compiled/binary_ops.hpp create mode 100644 cpp/src/binaryop/compiled/equality_ops.cu create mode 100644 cpp/src/binaryop/compiled/operation.cuh create mode 100644 cpp/src/binaryop/compiled/struct_binary_ops.cuh create mode 100644 cpp/src/binaryop/compiled/util.cpp create mode 100644 cpp/src/binaryop/jit/kernel.cu create mode 100644 cpp/src/binaryop/jit/operation-udf.hpp create mode 100644 cpp/src/bitmask/is_element_valid.cpp create mode 100644 cpp/src/bitmask/null_mask.cu create mode 100644 cpp/src/column/column.cu create mode 100644 cpp/src/column/column_device_view.cu create mode 100644 cpp/src/column/column_factories.cpp create mode 100644 cpp/src/column/column_factories.cu create mode 100644 cpp/src/column/column_view.cpp create mode 100644 cpp/src/copying/concatenate.cu create mode 100644 cpp/src/copying/contiguous_split.cu create mode 100644 cpp/src/copying/copy.cpp create mode 100644 cpp/src/copying/copy.cu create mode 100644 cpp/src/copying/copy_range.cu create mode 100644 cpp/src/copying/gather.cu create mode 100644 cpp/src/copying/get_element.cu create mode 100644 cpp/src/copying/pack.cpp create mode 100644 cpp/src/copying/purge_nonempty_nulls.cu create mode 100644 cpp/src/copying/reverse.cu create mode 100644 cpp/src/copying/sample.cu create mode 100644 cpp/src/copying/scatter.cu create mode 100644 cpp/src/copying/segmented_shift.cu create mode 100644 cpp/src/copying/shift.cu create mode 100644 cpp/src/copying/slice.cu create mode 100644 cpp/src/copying/split.cpp create mode 100644 cpp/src/datetime/datetime_ops.cu create mode 100644 cpp/src/datetime/timezone.cpp create mode 100644 cpp/src/dictionary/add_keys.cu create mode 100644 cpp/src/dictionary/decode.cu create mode 100644 cpp/src/dictionary/detail/concatenate.cu create mode 100644 cpp/src/dictionary/detail/merge.cu create mode 100644 cpp/src/dictionary/dictionary_column_view.cpp create mode 100644 cpp/src/dictionary/dictionary_factories.cu create mode 100644 cpp/src/dictionary/encode.cu create mode 100644 cpp/src/dictionary/remove_keys.cu create mode 100644 cpp/src/dictionary/replace.cu create mode 100644 cpp/src/dictionary/search.cu create mode 100644 cpp/src/dictionary/set_keys.cu create mode 100644 cpp/src/filling/calendrical_month_sequence.cu create mode 100644 cpp/src/filling/fill.cu create mode 100644 cpp/src/filling/repeat.cu create mode 100644 cpp/src/filling/sequence.cu create mode 100644 cpp/src/groupby/common/utils.hpp create mode 100644 cpp/src/groupby/groupby.cu create mode 100644 cpp/src/groupby/hash/groupby.cu create mode 100644 cpp/src/groupby/hash/groupby_kernels.cuh create mode 100644 cpp/src/groupby/hash/multi_pass_kernels.cuh create mode 100644 cpp/src/groupby/sort/aggregate.cpp create mode 100644 cpp/src/groupby/sort/common_utils.cuh create mode 100644 cpp/src/groupby/sort/functors.hpp create mode 100644 cpp/src/groupby/sort/group_argmax.cu create mode 100644 cpp/src/groupby/sort/group_argmin.cu create mode 100644 cpp/src/groupby/sort/group_collect.cu create mode 100644 cpp/src/groupby/sort/group_correlation.cu create mode 100644 cpp/src/groupby/sort/group_count.cu create mode 100644 cpp/src/groupby/sort/group_count_scan.cu create mode 100644 cpp/src/groupby/sort/group_histogram.cu create mode 100644 cpp/src/groupby/sort/group_m2.cu create mode 100644 cpp/src/groupby/sort/group_max.cu create mode 100644 cpp/src/groupby/sort/group_max_scan.cu create mode 100644 cpp/src/groupby/sort/group_merge_lists.cu create mode 100644 cpp/src/groupby/sort/group_merge_m2.cu create mode 100644 cpp/src/groupby/sort/group_min.cu create mode 100644 cpp/src/groupby/sort/group_min_scan.cu create mode 100644 cpp/src/groupby/sort/group_nth_element.cu create mode 100644 cpp/src/groupby/sort/group_nunique.cu create mode 100644 cpp/src/groupby/sort/group_product.cu create mode 100644 cpp/src/groupby/sort/group_quantiles.cu create mode 100644 cpp/src/groupby/sort/group_rank_scan.cu create mode 100644 cpp/src/groupby/sort/group_reductions.hpp create mode 100644 cpp/src/groupby/sort/group_replace_nulls.cu create mode 100644 cpp/src/groupby/sort/group_scan.hpp create mode 100644 cpp/src/groupby/sort/group_scan_util.cuh create mode 100644 cpp/src/groupby/sort/group_single_pass_reduction_util.cuh create mode 100644 cpp/src/groupby/sort/group_std.cu create mode 100644 cpp/src/groupby/sort/group_sum.cu create mode 100644 cpp/src/groupby/sort/group_sum_scan.cu create mode 100644 cpp/src/groupby/sort/scan.cpp create mode 100644 cpp/src/groupby/sort/sort_helper.cu create mode 100644 cpp/src/hash/concurrent_unordered_map.cuh create mode 100644 cpp/src/hash/hashing.cu create mode 100644 cpp/src/hash/managed.cuh create mode 100644 cpp/src/hash/md5_hash.cu create mode 100644 cpp/src/hash/murmurhash3_x64_128.cu create mode 100644 cpp/src/hash/murmurhash3_x86_32.cu create mode 100644 cpp/src/hash/spark_murmurhash3_x86_32.cu create mode 100644 cpp/src/hash/unordered_multiset.cuh create mode 100644 cpp/src/hash/xxhash_64.cu create mode 100644 cpp/src/interop/detail/arrow_allocator.cpp create mode 100644 cpp/src/interop/detail/arrow_allocator.hpp create mode 100644 cpp/src/interop/dlpack.cpp create mode 100644 cpp/src/interop/from_arrow.cu create mode 100644 cpp/src/interop/to_arrow.cu create mode 100644 cpp/src/io/avro/avro.cpp create mode 100644 cpp/src/io/avro/avro.hpp create mode 100644 cpp/src/io/avro/avro_common.hpp create mode 100644 cpp/src/io/avro/avro_gpu.cu create mode 100644 cpp/src/io/avro/avro_gpu.hpp create mode 100644 cpp/src/io/avro/reader_impl.cu create mode 100644 cpp/src/io/comp/brotli_dict.cpp create mode 100644 cpp/src/io/comp/brotli_dict.hpp create mode 100644 cpp/src/io/comp/brotli_tables.hpp create mode 100644 cpp/src/io/comp/cpu_unbz2.cpp create mode 100644 cpp/src/io/comp/debrotli.cu create mode 100644 cpp/src/io/comp/gpuinflate.cu create mode 100644 cpp/src/io/comp/gpuinflate.hpp create mode 100644 cpp/src/io/comp/io_uncomp.hpp create mode 100644 cpp/src/io/comp/nvcomp_adapter.cpp create mode 100644 cpp/src/io/comp/nvcomp_adapter.cu create mode 100644 cpp/src/io/comp/nvcomp_adapter.cuh create mode 100644 cpp/src/io/comp/nvcomp_adapter.hpp create mode 100644 cpp/src/io/comp/snap.cu create mode 100644 cpp/src/io/comp/statistics.cu create mode 100644 cpp/src/io/comp/unbz2.hpp create mode 100644 cpp/src/io/comp/uncomp.cpp create mode 100644 cpp/src/io/comp/unsnap.cu create mode 100644 cpp/src/io/csv/csv_common.hpp create mode 100644 cpp/src/io/csv/csv_gpu.cu create mode 100644 cpp/src/io/csv/csv_gpu.hpp create mode 100644 cpp/src/io/csv/datetime.cuh create mode 100644 cpp/src/io/csv/durations.cu create mode 100644 cpp/src/io/csv/durations.hpp create mode 100644 cpp/src/io/csv/reader_impl.cu create mode 100644 cpp/src/io/csv/writer_impl.cu create mode 100644 cpp/src/io/fst/agent_dfa.cuh create mode 100644 cpp/src/io/fst/device_dfa.cuh create mode 100644 cpp/src/io/fst/dispatch_dfa.cuh create mode 100644 cpp/src/io/fst/in_reg_array.cuh create mode 100644 cpp/src/io/fst/logical_stack.cuh create mode 100644 cpp/src/io/fst/lookup_tables.cuh create mode 100644 cpp/src/io/functions.cpp create mode 100644 cpp/src/io/json/byte_range_info.cu create mode 100644 cpp/src/io/json/json_column.cu create mode 100644 cpp/src/io/json/json_tree.cu create mode 100644 cpp/src/io/json/legacy/json_gpu.cu create mode 100644 cpp/src/io/json/legacy/json_gpu.hpp create mode 100644 cpp/src/io/json/legacy/read_json.hpp create mode 100644 cpp/src/io/json/legacy/reader_impl.cu create mode 100644 cpp/src/io/json/nested_json.hpp create mode 100644 cpp/src/io/json/nested_json_gpu.cu create mode 100644 cpp/src/io/json/read_json.cu create mode 100644 cpp/src/io/json/read_json.hpp create mode 100644 cpp/src/io/json/write_json.cu create mode 100644 cpp/src/io/orc/aggregate_orc_metadata.cpp create mode 100644 cpp/src/io/orc/aggregate_orc_metadata.hpp create mode 100644 cpp/src/io/orc/dict_enc.cu create mode 100644 cpp/src/io/orc/orc.cpp create mode 100644 cpp/src/io/orc/orc.hpp create mode 100644 cpp/src/io/orc/orc_field_reader.hpp create mode 100644 cpp/src/io/orc/orc_field_writer.hpp create mode 100644 cpp/src/io/orc/orc_gpu.hpp create mode 100644 cpp/src/io/orc/reader_impl.cu create mode 100644 cpp/src/io/orc/reader_impl.hpp create mode 100644 cpp/src/io/orc/stats_enc.cu create mode 100644 cpp/src/io/orc/stripe_data.cu create mode 100644 cpp/src/io/orc/stripe_enc.cu create mode 100644 cpp/src/io/orc/stripe_init.cu create mode 100644 cpp/src/io/orc/writer_impl.cu create mode 100644 cpp/src/io/orc/writer_impl.hpp create mode 100644 cpp/src/io/parquet/chunk_dict.cu create mode 100644 cpp/src/io/parquet/compact_protocol_reader.cpp create mode 100644 cpp/src/io/parquet/compact_protocol_reader.hpp create mode 100644 cpp/src/io/parquet/compact_protocol_writer.cpp create mode 100644 cpp/src/io/parquet/compact_protocol_writer.hpp create mode 100644 cpp/src/io/parquet/decode_preprocess.cu create mode 100644 cpp/src/io/parquet/delta_binary.cuh create mode 100644 cpp/src/io/parquet/page_data.cu create mode 100644 cpp/src/io/parquet/page_decode.cuh create mode 100644 cpp/src/io/parquet/page_delta_decode.cu create mode 100644 cpp/src/io/parquet/page_enc.cu create mode 100644 cpp/src/io/parquet/page_hdr.cu create mode 100644 cpp/src/io/parquet/page_string_decode.cu create mode 100644 cpp/src/io/parquet/page_string_utils.cuh create mode 100644 cpp/src/io/parquet/parquet.hpp create mode 100644 cpp/src/io/parquet/parquet_common.hpp create mode 100644 cpp/src/io/parquet/parquet_gpu.cuh create mode 100644 cpp/src/io/parquet/parquet_gpu.hpp create mode 100644 cpp/src/io/parquet/predicate_pushdown.cpp create mode 100644 cpp/src/io/parquet/reader.cpp create mode 100644 cpp/src/io/parquet/reader_impl.cpp create mode 100644 cpp/src/io/parquet/reader_impl.hpp create mode 100644 cpp/src/io/parquet/reader_impl_helpers.cpp create mode 100644 cpp/src/io/parquet/reader_impl_helpers.hpp create mode 100644 cpp/src/io/parquet/reader_impl_preprocess.cu create mode 100644 cpp/src/io/parquet/rle_stream.cuh create mode 100644 cpp/src/io/parquet/writer_impl.cu create mode 100644 cpp/src/io/parquet/writer_impl.hpp create mode 100644 cpp/src/io/statistics/byte_array_view.cuh create mode 100644 cpp/src/io/statistics/column_statistics.cuh create mode 100644 cpp/src/io/statistics/conversion_type_select.cuh create mode 100644 cpp/src/io/statistics/orc_column_statistics.cu create mode 100644 cpp/src/io/statistics/parquet_column_statistics.cu create mode 100644 cpp/src/io/statistics/statistics.cuh create mode 100644 cpp/src/io/statistics/statistics_type_identification.cuh create mode 100644 cpp/src/io/statistics/temp_storage_wrapper.cuh create mode 100644 cpp/src/io/statistics/typed_statistics_chunk.cuh create mode 100644 cpp/src/io/text/bgzip_data_chunk_source.cu create mode 100644 cpp/src/io/text/bgzip_utils.cpp create mode 100644 cpp/src/io/text/byte_range_info.cpp create mode 100644 cpp/src/io/text/data_chunk_source_factories.cpp create mode 100644 cpp/src/io/text/device_data_chunks.hpp create mode 100644 cpp/src/io/text/multibyte_split.cu create mode 100644 cpp/src/io/utilities/arrow_io_source.cpp create mode 100644 cpp/src/io/utilities/block_utils.cuh create mode 100644 cpp/src/io/utilities/column_buffer.cpp create mode 100644 cpp/src/io/utilities/column_buffer.hpp create mode 100644 cpp/src/io/utilities/column_type_histogram.hpp create mode 100644 cpp/src/io/utilities/column_utils.cuh create mode 100644 cpp/src/io/utilities/config_utils.cpp create mode 100644 cpp/src/io/utilities/config_utils.hpp create mode 100644 cpp/src/io/utilities/data_casting.cu create mode 100644 cpp/src/io/utilities/data_sink.cpp create mode 100644 cpp/src/io/utilities/datasource.cpp create mode 100644 cpp/src/io/utilities/file_io_utilities.cpp create mode 100644 cpp/src/io/utilities/file_io_utilities.hpp create mode 100644 cpp/src/io/utilities/hostdevice_span.hpp create mode 100644 cpp/src/io/utilities/hostdevice_vector.hpp create mode 100644 cpp/src/io/utilities/output_builder.cuh create mode 100644 cpp/src/io/utilities/parsing_utils.cu create mode 100644 cpp/src/io/utilities/parsing_utils.cuh create mode 100644 cpp/src/io/utilities/row_selection.cpp create mode 100644 cpp/src/io/utilities/row_selection.hpp create mode 100644 cpp/src/io/utilities/string_parsing.hpp create mode 100644 cpp/src/io/utilities/thread_pool.hpp create mode 100644 cpp/src/io/utilities/time_utils.cuh create mode 100644 cpp/src/io/utilities/trie.cu create mode 100644 cpp/src/io/utilities/trie.cuh create mode 100644 cpp/src/io/utilities/type_inference.cu create mode 100644 cpp/src/jit/cache.cpp create mode 100644 cpp/src/jit/cache.hpp create mode 100644 cpp/src/jit/parser.cpp create mode 100644 cpp/src/jit/parser.hpp create mode 100644 cpp/src/jit/util.cpp create mode 100644 cpp/src/jit/util.hpp create mode 100644 cpp/src/join/conditional_join.cu create mode 100644 cpp/src/join/conditional_join.hpp create mode 100644 cpp/src/join/conditional_join_kernels.cuh create mode 100644 cpp/src/join/cross_join.cu create mode 100644 cpp/src/join/hash_join.cu create mode 100644 cpp/src/join/join.cu create mode 100644 cpp/src/join/join_common_utils.cuh create mode 100644 cpp/src/join/join_common_utils.hpp create mode 100644 cpp/src/join/join_utils.cu create mode 100644 cpp/src/join/mixed_join.cu create mode 100644 cpp/src/join/mixed_join_common_utils.cuh create mode 100644 cpp/src/join/mixed_join_kernel.cu create mode 100644 cpp/src/join/mixed_join_kernel.cuh create mode 100644 cpp/src/join/mixed_join_kernel_nulls.cu create mode 100644 cpp/src/join/mixed_join_kernels.cuh create mode 100644 cpp/src/join/mixed_join_kernels_semi.cu create mode 100644 cpp/src/join/mixed_join_kernels_semi.cuh create mode 100644 cpp/src/join/mixed_join_semi.cu create mode 100644 cpp/src/join/mixed_join_size_kernel.cu create mode 100644 cpp/src/join/mixed_join_size_kernel.cuh create mode 100644 cpp/src/join/mixed_join_size_kernel_nulls.cu create mode 100644 cpp/src/join/mixed_join_size_kernels_semi.cu create mode 100644 cpp/src/join/semi_join.cu create mode 100644 cpp/src/labeling/label_bins.cu create mode 100644 cpp/src/lists/combine/concatenate_list_elements.cu create mode 100644 cpp/src/lists/combine/concatenate_rows.cu create mode 100644 cpp/src/lists/contains.cu create mode 100644 cpp/src/lists/copying/concatenate.cu create mode 100644 cpp/src/lists/copying/copying.cu create mode 100644 cpp/src/lists/copying/gather.cu create mode 100644 cpp/src/lists/copying/scatter_helper.cu create mode 100644 cpp/src/lists/copying/segmented_gather.cu create mode 100644 cpp/src/lists/count_elements.cu create mode 100644 cpp/src/lists/dremel.cu create mode 100644 cpp/src/lists/explode.cu create mode 100644 cpp/src/lists/extract.cu create mode 100644 cpp/src/lists/interleave_columns.cu create mode 100644 cpp/src/lists/lists_column_factories.cu create mode 100644 cpp/src/lists/lists_column_view.cu create mode 100644 cpp/src/lists/reverse.cu create mode 100644 cpp/src/lists/segmented_sort.cu create mode 100644 cpp/src/lists/sequences.cu create mode 100644 cpp/src/lists/set_operations.cu create mode 100644 cpp/src/lists/stream_compaction/apply_boolean_mask.cu create mode 100644 cpp/src/lists/stream_compaction/distinct.cu create mode 100644 cpp/src/lists/utilities.cu create mode 100644 cpp/src/lists/utilities.hpp create mode 100644 cpp/src/merge/merge.cu create mode 100644 cpp/src/partitioning/partitioning.cu create mode 100644 cpp/src/partitioning/round_robin.cu create mode 100644 cpp/src/quantiles/quantile.cu create mode 100644 cpp/src/quantiles/quantiles.cu create mode 100644 cpp/src/quantiles/quantiles_util.hpp create mode 100644 cpp/src/quantiles/tdigest/tdigest.cu create mode 100644 cpp/src/quantiles/tdigest/tdigest_aggregation.cu create mode 100644 cpp/src/quantiles/tdigest/tdigest_column_view.cpp create mode 100644 cpp/src/quantiles/tdigest/tdigest_util.cuh create mode 100644 cpp/src/reductions/all.cu create mode 100644 cpp/src/reductions/any.cu create mode 100644 cpp/src/reductions/collect_ops.cu create mode 100644 cpp/src/reductions/compound.cuh create mode 100644 cpp/src/reductions/histogram.cu create mode 100644 cpp/src/reductions/max.cu create mode 100644 cpp/src/reductions/mean.cu create mode 100644 cpp/src/reductions/min.cu create mode 100644 cpp/src/reductions/minmax.cu create mode 100644 cpp/src/reductions/nested_type_minmax_util.cuh create mode 100644 cpp/src/reductions/nth_element.cu create mode 100644 cpp/src/reductions/product.cu create mode 100644 cpp/src/reductions/reductions.cpp create mode 100644 cpp/src/reductions/scan/rank_scan.cu create mode 100644 cpp/src/reductions/scan/scan.cpp create mode 100644 cpp/src/reductions/scan/scan.cuh create mode 100644 cpp/src/reductions/scan/scan_exclusive.cu create mode 100644 cpp/src/reductions/scan/scan_inclusive.cu create mode 100644 cpp/src/reductions/segmented/all.cu create mode 100644 cpp/src/reductions/segmented/any.cu create mode 100644 cpp/src/reductions/segmented/compound.cuh create mode 100644 cpp/src/reductions/segmented/counts.cu create mode 100644 cpp/src/reductions/segmented/counts.hpp create mode 100644 cpp/src/reductions/segmented/max.cu create mode 100644 cpp/src/reductions/segmented/mean.cu create mode 100644 cpp/src/reductions/segmented/min.cu create mode 100644 cpp/src/reductions/segmented/nunique.cu create mode 100644 cpp/src/reductions/segmented/product.cu create mode 100644 cpp/src/reductions/segmented/reductions.cpp create mode 100644 cpp/src/reductions/segmented/simple.cuh create mode 100644 cpp/src/reductions/segmented/std.cu create mode 100644 cpp/src/reductions/segmented/sum.cu create mode 100644 cpp/src/reductions/segmented/sum_of_squares.cu create mode 100644 cpp/src/reductions/segmented/update_validity.cu create mode 100644 cpp/src/reductions/segmented/update_validity.hpp create mode 100644 cpp/src/reductions/segmented/var.cu create mode 100644 cpp/src/reductions/simple.cuh create mode 100644 cpp/src/reductions/std.cu create mode 100644 cpp/src/reductions/sum.cu create mode 100644 cpp/src/reductions/sum_of_squares.cu create mode 100644 cpp/src/reductions/var.cu create mode 100644 cpp/src/replace/clamp.cu create mode 100644 cpp/src/replace/nans.cu create mode 100644 cpp/src/replace/nulls.cu create mode 100644 cpp/src/replace/replace.cu create mode 100644 cpp/src/reshape/byte_cast.cu create mode 100644 cpp/src/reshape/interleave_columns.cu create mode 100644 cpp/src/reshape/tile.cu create mode 100644 cpp/src/rolling/detail/lead_lag_nested.cuh create mode 100644 cpp/src/rolling/detail/nth_element.cuh create mode 100644 cpp/src/rolling/detail/optimized_unbounded_window.cpp create mode 100644 cpp/src/rolling/detail/optimized_unbounded_window.hpp create mode 100644 cpp/src/rolling/detail/range_comparator_utils.cuh create mode 100644 cpp/src/rolling/detail/range_window_bounds.hpp create mode 100644 cpp/src/rolling/detail/rolling.cuh create mode 100644 cpp/src/rolling/detail/rolling.hpp create mode 100644 cpp/src/rolling/detail/rolling_collect_list.cu create mode 100644 cpp/src/rolling/detail/rolling_collect_list.cuh create mode 100644 cpp/src/rolling/detail/rolling_fixed_window.cu create mode 100644 cpp/src/rolling/detail/rolling_jit.hpp create mode 100644 cpp/src/rolling/detail/rolling_variable_window.cu create mode 100644 cpp/src/rolling/grouped_rolling.cu create mode 100644 cpp/src/rolling/jit/kernel.cu create mode 100644 cpp/src/rolling/jit/operation-udf.hpp create mode 100644 cpp/src/rolling/jit/operation.hpp create mode 100644 cpp/src/rolling/range_window_bounds.cpp create mode 100644 cpp/src/rolling/rolling.cu create mode 100644 cpp/src/round/round.cu create mode 100644 cpp/src/scalar/scalar.cpp create mode 100644 cpp/src/scalar/scalar_factories.cpp create mode 100644 cpp/src/search/contains_column.cu create mode 100644 cpp/src/search/contains_scalar.cu create mode 100644 cpp/src/search/contains_table.cu create mode 100644 cpp/src/search/search_ordered.cu create mode 100644 cpp/src/sort/is_sorted.cu create mode 100644 cpp/src/sort/rank.cu create mode 100644 cpp/src/sort/segmented_sort.cu create mode 100644 cpp/src/sort/segmented_sort_impl.cuh create mode 100644 cpp/src/sort/sort.cu create mode 100644 cpp/src/sort/sort_column.cu create mode 100644 cpp/src/sort/sort_column_impl.cuh create mode 100644 cpp/src/sort/sort_impl.cuh create mode 100644 cpp/src/sort/stable_segmented_sort.cu create mode 100644 cpp/src/sort/stable_sort.cu create mode 100644 cpp/src/sort/stable_sort_column.cu create mode 100644 cpp/src/stream_compaction/apply_boolean_mask.cu create mode 100644 cpp/src/stream_compaction/distinct.cu create mode 100644 cpp/src/stream_compaction/distinct_count.cu create mode 100644 cpp/src/stream_compaction/distinct_helpers.cu create mode 100644 cpp/src/stream_compaction/distinct_helpers.hpp create mode 100644 cpp/src/stream_compaction/drop_nans.cu create mode 100644 cpp/src/stream_compaction/drop_nulls.cu create mode 100644 cpp/src/stream_compaction/stable_distinct.cu create mode 100644 cpp/src/stream_compaction/stream_compaction_common.cuh create mode 100644 cpp/src/stream_compaction/stream_compaction_common.hpp create mode 100644 cpp/src/stream_compaction/unique.cu create mode 100644 cpp/src/stream_compaction/unique_count.cu create mode 100644 cpp/src/stream_compaction/unique_count_column.cu create mode 100644 cpp/src/strings/attributes.cu create mode 100644 cpp/src/strings/capitalize.cu create mode 100644 cpp/src/strings/case.cu create mode 100644 cpp/src/strings/char_types/char_cases.cu create mode 100644 cpp/src/strings/char_types/char_cases.h create mode 100644 cpp/src/strings/char_types/char_flags.h create mode 100644 cpp/src/strings/char_types/char_types.cu create mode 100644 cpp/src/strings/combine/concatenate.cu create mode 100644 cpp/src/strings/combine/join.cu create mode 100644 cpp/src/strings/combine/join_list_elements.cu create mode 100644 cpp/src/strings/contains.cu create mode 100644 cpp/src/strings/convert/convert_booleans.cu create mode 100644 cpp/src/strings/convert/convert_datetime.cu create mode 100644 cpp/src/strings/convert/convert_durations.cu create mode 100644 cpp/src/strings/convert/convert_fixed_point.cu create mode 100644 cpp/src/strings/convert/convert_floats.cu create mode 100644 cpp/src/strings/convert/convert_hex.cu create mode 100644 cpp/src/strings/convert/convert_integers.cu create mode 100644 cpp/src/strings/convert/convert_ipv4.cu create mode 100644 cpp/src/strings/convert/convert_lists.cu create mode 100644 cpp/src/strings/convert/convert_urls.cu create mode 100644 cpp/src/strings/copying/concatenate.cu create mode 100644 cpp/src/strings/copying/copying.cu create mode 100644 cpp/src/strings/copying/shift.cu create mode 100644 cpp/src/strings/count_matches.cu create mode 100644 cpp/src/strings/count_matches.hpp create mode 100644 cpp/src/strings/extract/extract.cu create mode 100644 cpp/src/strings/extract/extract_all.cu create mode 100644 cpp/src/strings/filling/fill.cu create mode 100644 cpp/src/strings/filter_chars.cu create mode 100644 cpp/src/strings/json/json_path.cu create mode 100644 cpp/src/strings/like.cu create mode 100644 cpp/src/strings/padding.cu create mode 100644 cpp/src/strings/regex/regcomp.cpp create mode 100644 cpp/src/strings/regex/regcomp.h create mode 100644 cpp/src/strings/regex/regex.cuh create mode 100644 cpp/src/strings/regex/regex.inl create mode 100644 cpp/src/strings/regex/regex_program.cpp create mode 100644 cpp/src/strings/regex/regex_program_impl.h create mode 100644 cpp/src/strings/regex/regexec.cpp create mode 100644 cpp/src/strings/regex/utilities.cuh create mode 100644 cpp/src/strings/repeat_strings.cu create mode 100644 cpp/src/strings/replace/backref_re.cu create mode 100644 cpp/src/strings/replace/backref_re.cuh create mode 100644 cpp/src/strings/replace/multi.cu create mode 100644 cpp/src/strings/replace/multi_re.cu create mode 100644 cpp/src/strings/replace/replace.cu create mode 100644 cpp/src/strings/replace/replace_re.cu create mode 100644 cpp/src/strings/reverse.cu create mode 100644 cpp/src/strings/search/find.cu create mode 100644 cpp/src/strings/search/find_multiple.cu create mode 100644 cpp/src/strings/search/findall.cu create mode 100644 cpp/src/strings/slice.cu create mode 100644 cpp/src/strings/split/partition.cu create mode 100644 cpp/src/strings/split/split.cu create mode 100644 cpp/src/strings/split/split.cuh create mode 100644 cpp/src/strings/split/split_re.cu create mode 100644 cpp/src/strings/split/split_record.cu create mode 100644 cpp/src/strings/strings_column_factories.cu create mode 100644 cpp/src/strings/strings_column_view.cpp create mode 100644 cpp/src/strings/strings_scalar_factories.cpp create mode 100644 cpp/src/strings/strip.cu create mode 100644 cpp/src/strings/translate.cu create mode 100644 cpp/src/strings/utilities.cu create mode 100644 cpp/src/strings/wrap.cu create mode 100644 cpp/src/structs/copying/concatenate.cu create mode 100644 cpp/src/structs/structs_column_factories.cu create mode 100644 cpp/src/structs/structs_column_view.cpp create mode 100644 cpp/src/structs/utilities.cpp create mode 100644 cpp/src/table/row_operators.cu create mode 100644 cpp/src/table/table.cpp create mode 100644 cpp/src/table/table_device_view.cu create mode 100644 cpp/src/table/table_view.cpp create mode 100644 cpp/src/text/detokenize.cu create mode 100644 cpp/src/text/edit_distance.cu create mode 100644 cpp/src/text/generate_ngrams.cu create mode 100644 cpp/src/text/jaccard.cu create mode 100644 cpp/src/text/minhash.cu create mode 100644 cpp/src/text/ngrams_tokenize.cu create mode 100644 cpp/src/text/normalize.cu create mode 100644 cpp/src/text/replace.cu create mode 100644 cpp/src/text/stemmer.cu create mode 100644 cpp/src/text/subword/bpe_tokenizer.cu create mode 100644 cpp/src/text/subword/bpe_tokenizer.cuh create mode 100644 cpp/src/text/subword/data_normalizer.cu create mode 100644 cpp/src/text/subword/detail/codepoint_metadata.ah create mode 100644 cpp/src/text/subword/detail/cp_data.h create mode 100644 cpp/src/text/subword/detail/data_normalizer.hpp create mode 100644 cpp/src/text/subword/detail/hash_utils.cuh create mode 100644 cpp/src/text/subword/detail/tokenizer_utils.cuh create mode 100644 cpp/src/text/subword/detail/wordpiece_tokenizer.hpp create mode 100644 cpp/src/text/subword/load_hash_file.cu create mode 100644 cpp/src/text/subword/load_merges_file.cu create mode 100644 cpp/src/text/subword/subword_tokenize.cu create mode 100644 cpp/src/text/subword/wordpiece_tokenizer.cu create mode 100644 cpp/src/text/tokenize.cu create mode 100644 cpp/src/text/utilities/tokenize_ops.cuh create mode 100644 cpp/src/text/vocabulary_tokenize.cu create mode 100644 cpp/src/transform/bools_to_mask.cu create mode 100644 cpp/src/transform/compute_column.cu create mode 100644 cpp/src/transform/encode.cu create mode 100644 cpp/src/transform/jit/kernel.cu create mode 100644 cpp/src/transform/jit/operation-udf.hpp create mode 100644 cpp/src/transform/mask_to_bools.cu create mode 100644 cpp/src/transform/nans_to_nulls.cu create mode 100644 cpp/src/transform/one_hot_encode.cu create mode 100644 cpp/src/transform/row_bit_count.cu create mode 100644 cpp/src/transform/transform.cpp create mode 100644 cpp/src/transpose/transpose.cu create mode 100644 cpp/src/unary/cast_ops.cu create mode 100644 cpp/src/unary/math_ops.cu create mode 100644 cpp/src/unary/nan_ops.cu create mode 100644 cpp/src/unary/null_ops.cu create mode 100644 cpp/src/unary/unary_ops.cuh create mode 100644 cpp/src/utilities/default_stream.cpp create mode 100644 cpp/src/utilities/linked_column.cpp create mode 100644 cpp/src/utilities/logger.cpp create mode 100644 cpp/src/utilities/stacktrace.cpp create mode 100644 cpp/src/utilities/stream_pool.cpp create mode 100644 cpp/src/utilities/traits.cpp create mode 100644 cpp/src/utilities/type_checks.cpp create mode 100644 cpp/src/utilities/type_dispatcher.cpp create mode 100644 cpp/tests/CMakeLists.txt create mode 100644 cpp/tests/ast/transform_tests.cpp create mode 100644 cpp/tests/binaryop/assert-binops.h create mode 100644 cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp create mode 100644 cpp/tests/binaryop/binop-compiled-test.cpp create mode 100644 cpp/tests/binaryop/binop-fixture.hpp create mode 100644 cpp/tests/binaryop/binop-generic-ptx-test.cpp create mode 100644 cpp/tests/binaryop/binop-null-test.cpp create mode 100644 cpp/tests/binaryop/binop-verify-input-test.cpp create mode 100644 cpp/tests/binaryop/util/operation.h create mode 100644 cpp/tests/binaryop/util/runtime_support.h create mode 100644 cpp/tests/bitmask/bitmask_tests.cpp create mode 100644 cpp/tests/bitmask/is_element_valid_tests.cpp create mode 100644 cpp/tests/bitmask/set_nullmask_tests.cu create mode 100644 cpp/tests/bitmask/valid_if_tests.cu create mode 100644 cpp/tests/column/bit_cast_test.cpp create mode 100644 cpp/tests/column/column_device_view_test.cu create mode 100644 cpp/tests/column/column_test.cpp create mode 100644 cpp/tests/column/column_view_device_span_test.cpp create mode 100644 cpp/tests/column/column_view_shallow_test.cpp create mode 100644 cpp/tests/column/compound_test.cu create mode 100644 cpp/tests/column/factories_test.cpp create mode 100644 cpp/tests/copying/concatenate_tests.cpp create mode 100644 cpp/tests/copying/copy_if_else_nested_tests.cpp create mode 100644 cpp/tests/copying/copy_range_tests.cpp create mode 100644 cpp/tests/copying/copy_tests.cpp create mode 100644 cpp/tests/copying/detail_gather_tests.cu create mode 100644 cpp/tests/copying/gather_list_tests.cpp create mode 100644 cpp/tests/copying/gather_str_tests.cpp create mode 100644 cpp/tests/copying/gather_struct_tests.cpp create mode 100644 cpp/tests/copying/gather_tests.cpp create mode 100644 cpp/tests/copying/get_value_tests.cpp create mode 100644 cpp/tests/copying/pack_tests.cpp create mode 100644 cpp/tests/copying/purge_nonempty_nulls_tests.cpp create mode 100644 cpp/tests/copying/reverse_tests.cpp create mode 100644 cpp/tests/copying/sample_tests.cpp create mode 100644 cpp/tests/copying/scatter_list_scalar_tests.cpp create mode 100644 cpp/tests/copying/scatter_list_tests.cpp create mode 100644 cpp/tests/copying/scatter_struct_scalar_tests.cpp create mode 100644 cpp/tests/copying/scatter_struct_tests.cpp create mode 100644 cpp/tests/copying/scatter_tests.cpp create mode 100644 cpp/tests/copying/segmented_gather_list_tests.cpp create mode 100644 cpp/tests/copying/shift_tests.cpp create mode 100644 cpp/tests/copying/slice_tests.cpp create mode 100644 cpp/tests/copying/slice_tests.cuh create mode 100644 cpp/tests/copying/split_tests.cpp create mode 100644 cpp/tests/copying/utility_tests.cpp create mode 100644 cpp/tests/datetime/datetime_ops_test.cpp create mode 100644 cpp/tests/device_atomics/device_atomics_test.cu create mode 100644 cpp/tests/dictionary/add_keys_test.cpp create mode 100644 cpp/tests/dictionary/decode_test.cpp create mode 100644 cpp/tests/dictionary/encode_test.cpp create mode 100644 cpp/tests/dictionary/factories_test.cpp create mode 100644 cpp/tests/dictionary/fill_test.cpp create mode 100644 cpp/tests/dictionary/gather_test.cpp create mode 100644 cpp/tests/dictionary/remove_keys_test.cpp create mode 100644 cpp/tests/dictionary/scatter_test.cpp create mode 100644 cpp/tests/dictionary/search_test.cpp create mode 100644 cpp/tests/dictionary/set_keys_test.cpp create mode 100644 cpp/tests/dictionary/slice_test.cpp create mode 100644 cpp/tests/encode/encode_tests.cpp create mode 100644 cpp/tests/error/error_handling_test.cu create mode 100644 cpp/tests/filling/fill_tests.cpp create mode 100644 cpp/tests/filling/repeat_tests.cpp create mode 100644 cpp/tests/filling/sequence_tests.cpp create mode 100644 cpp/tests/fixed_point/fixed_point_tests.cpp create mode 100644 cpp/tests/fixed_point/fixed_point_tests.cu create mode 100644 cpp/tests/groupby/argmax_tests.cpp create mode 100644 cpp/tests/groupby/argmin_tests.cpp create mode 100644 cpp/tests/groupby/collect_list_tests.cpp create mode 100644 cpp/tests/groupby/collect_set_tests.cpp create mode 100644 cpp/tests/groupby/correlation_tests.cpp create mode 100644 cpp/tests/groupby/count_scan_tests.cpp create mode 100644 cpp/tests/groupby/count_tests.cpp create mode 100644 cpp/tests/groupby/covariance_tests.cpp create mode 100644 cpp/tests/groupby/groupby_test_util.cpp create mode 100644 cpp/tests/groupby/groupby_test_util.hpp create mode 100644 cpp/tests/groupby/groups_tests.cpp create mode 100644 cpp/tests/groupby/histogram_tests.cpp create mode 100644 cpp/tests/groupby/keys_tests.cpp create mode 100644 cpp/tests/groupby/lists_tests.cpp create mode 100644 cpp/tests/groupby/m2_tests.cpp create mode 100644 cpp/tests/groupby/max_scan_tests.cpp create mode 100644 cpp/tests/groupby/max_tests.cpp create mode 100644 cpp/tests/groupby/mean_tests.cpp create mode 100644 cpp/tests/groupby/median_tests.cpp create mode 100644 cpp/tests/groupby/merge_lists_tests.cpp create mode 100644 cpp/tests/groupby/merge_m2_tests.cpp create mode 100644 cpp/tests/groupby/merge_sets_tests.cpp create mode 100644 cpp/tests/groupby/min_scan_tests.cpp create mode 100644 cpp/tests/groupby/min_tests.cpp create mode 100644 cpp/tests/groupby/nth_element_tests.cpp create mode 100644 cpp/tests/groupby/nunique_tests.cpp create mode 100644 cpp/tests/groupby/product_tests.cpp create mode 100644 cpp/tests/groupby/quantile_tests.cpp create mode 100644 cpp/tests/groupby/rank_scan_tests.cpp create mode 100644 cpp/tests/groupby/replace_nulls_tests.cpp create mode 100644 cpp/tests/groupby/shift_tests.cpp create mode 100644 cpp/tests/groupby/std_tests.cpp create mode 100644 cpp/tests/groupby/structs_tests.cpp create mode 100644 cpp/tests/groupby/sum_of_squares_tests.cpp create mode 100644 cpp/tests/groupby/sum_scan_tests.cpp create mode 100644 cpp/tests/groupby/sum_tests.cpp create mode 100644 cpp/tests/groupby/tdigest_tests.cu create mode 100644 cpp/tests/groupby/var_tests.cpp create mode 100644 cpp/tests/hash_map/map_test.cu create mode 100644 cpp/tests/hashing/md5_test.cpp create mode 100644 cpp/tests/hashing/murmurhash3_x64_128_test.cpp create mode 100644 cpp/tests/hashing/murmurhash3_x86_32_test.cpp create mode 100644 cpp/tests/hashing/spark_murmurhash3_x86_32_test.cpp create mode 100644 cpp/tests/hashing/xxhash_64_test.cpp create mode 100644 cpp/tests/identify_stream_usage/test_default_stream_identification.cu create mode 100644 cpp/tests/interop/arrow_utils.hpp create mode 100644 cpp/tests/interop/dlpack_test.cpp create mode 100644 cpp/tests/interop/from_arrow_test.cpp create mode 100644 cpp/tests/interop/to_arrow_test.cpp create mode 100644 cpp/tests/io/arrow_io_source_test.cpp create mode 100644 cpp/tests/io/comp/decomp_test.cpp create mode 100644 cpp/tests/io/csv_test.cpp create mode 100644 cpp/tests/io/file_io_test.cpp create mode 100644 cpp/tests/io/fst/common.hpp create mode 100644 cpp/tests/io/fst/fst_test.cu create mode 100644 cpp/tests/io/fst/logical_stack_test.cu create mode 100644 cpp/tests/io/json_chunked_reader.cpp create mode 100644 cpp/tests/io/json_test.cpp create mode 100644 cpp/tests/io/json_tree.cpp create mode 100644 cpp/tests/io/json_type_cast_test.cu create mode 100644 cpp/tests/io/json_writer.cpp create mode 100644 cpp/tests/io/metadata_utilities.cpp create mode 100644 cpp/tests/io/nested_json_test.cpp create mode 100644 cpp/tests/io/orc_test.cpp create mode 100644 cpp/tests/io/parquet_chunked_reader_test.cpp create mode 100644 cpp/tests/io/parquet_test.cpp create mode 100644 cpp/tests/io/row_selection_test.cpp create mode 100644 cpp/tests/io/text/data_chunk_source_test.cpp create mode 100644 cpp/tests/io/text/multibyte_split_test.cpp create mode 100644 cpp/tests/io/type_inference_test.cu create mode 100644 cpp/tests/iterator/README.md create mode 100644 cpp/tests/iterator/indexalator_test.cu create mode 100644 cpp/tests/iterator/iterator_tests.cuh create mode 100644 cpp/tests/iterator/optional_iterator_test.cuh create mode 100644 cpp/tests/iterator/optional_iterator_test_chrono.cu create mode 100644 cpp/tests/iterator/optional_iterator_test_numeric.cu create mode 100644 cpp/tests/iterator/pair_iterator_test.cuh create mode 100644 cpp/tests/iterator/pair_iterator_test_chrono.cu create mode 100644 cpp/tests/iterator/pair_iterator_test_numeric.cu create mode 100644 cpp/tests/iterator/scalar_iterator_test.cu create mode 100644 cpp/tests/iterator/sizes_to_offsets_iterator_test.cu create mode 100644 cpp/tests/iterator/value_iterator.cpp create mode 100644 cpp/tests/iterator/value_iterator_test.cuh create mode 100644 cpp/tests/iterator/value_iterator_test_chrono.cu create mode 100644 cpp/tests/iterator/value_iterator_test_numeric.cu create mode 100644 cpp/tests/iterator/value_iterator_test_strings.cu create mode 100644 cpp/tests/iterator/value_iterator_test_transform.cu create mode 100644 cpp/tests/join/conditional_join_tests.cu create mode 100644 cpp/tests/join/cross_join_tests.cpp create mode 100644 cpp/tests/join/join_tests.cpp create mode 100644 cpp/tests/join/mixed_join_tests.cu create mode 100644 cpp/tests/join/semi_anti_join_tests.cpp create mode 100644 cpp/tests/labeling/label_bins_tests.cpp create mode 100644 cpp/tests/lists/combine/concatenate_list_elements_tests.cpp create mode 100644 cpp/tests/lists/combine/concatenate_rows_tests.cpp create mode 100644 cpp/tests/lists/contains_tests.cpp create mode 100644 cpp/tests/lists/count_elements_tests.cpp create mode 100644 cpp/tests/lists/explode_tests.cpp create mode 100644 cpp/tests/lists/extract_tests.cpp create mode 100644 cpp/tests/lists/reverse_tests.cpp create mode 100644 cpp/tests/lists/sequences_tests.cpp create mode 100644 cpp/tests/lists/set_operations/difference_distinct_tests.cpp create mode 100644 cpp/tests/lists/set_operations/have_overlap_tests.cpp create mode 100644 cpp/tests/lists/set_operations/intersect_distinct_tests.cpp create mode 100644 cpp/tests/lists/set_operations/union_distinct_tests.cpp create mode 100644 cpp/tests/lists/sort_lists_tests.cpp create mode 100644 cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp create mode 100644 cpp/tests/lists/stream_compaction/distinct_tests.cpp create mode 100644 cpp/tests/merge/merge_dictionary_test.cpp create mode 100644 cpp/tests/merge/merge_string_test.cpp create mode 100644 cpp/tests/merge/merge_test.cpp create mode 100644 cpp/tests/partitioning/hash_partition_test.cpp create mode 100644 cpp/tests/partitioning/partition_test.cpp create mode 100644 cpp/tests/partitioning/round_robin_test.cpp create mode 100644 cpp/tests/quantiles/percentile_approx_test.cpp create mode 100644 cpp/tests/quantiles/quantile_test.cpp create mode 100644 cpp/tests/quantiles/quantiles_test.cpp create mode 100644 cpp/tests/reductions/collect_ops_tests.cpp create mode 100644 cpp/tests/reductions/list_rank_test.cpp create mode 100644 cpp/tests/reductions/rank_tests.cpp create mode 100644 cpp/tests/reductions/reduction_tests.cpp create mode 100644 cpp/tests/reductions/scan_tests.cpp create mode 100644 cpp/tests/reductions/scan_tests.hpp create mode 100644 cpp/tests/reductions/segmented_reduction_tests.cpp create mode 100644 cpp/tests/reductions/tdigest_tests.cu create mode 100644 cpp/tests/replace/clamp_test.cpp create mode 100644 cpp/tests/replace/normalize_replace_tests.cpp create mode 100644 cpp/tests/replace/replace_nans_tests.cpp create mode 100644 cpp/tests/replace/replace_nulls_tests.cpp create mode 100644 cpp/tests/replace/replace_tests.cpp create mode 100644 cpp/tests/reshape/byte_cast_tests.cpp create mode 100644 cpp/tests/reshape/interleave_columns_tests.cpp create mode 100644 cpp/tests/reshape/tile_tests.cpp create mode 100644 cpp/tests/rolling/collect_ops_test.cpp create mode 100644 cpp/tests/rolling/empty_input_test.cpp create mode 100644 cpp/tests/rolling/grouped_rolling_range_test.cpp create mode 100644 cpp/tests/rolling/grouped_rolling_test.cpp create mode 100644 cpp/tests/rolling/lead_lag_test.cpp create mode 100644 cpp/tests/rolling/nth_element_test.cpp create mode 100644 cpp/tests/rolling/offset_row_window_test.cpp create mode 100644 cpp/tests/rolling/range_comparator_test.cu create mode 100644 cpp/tests/rolling/range_rolling_window_test.cpp create mode 100644 cpp/tests/rolling/range_window_bounds_test.cpp create mode 100644 cpp/tests/rolling/rolling_test.cpp create mode 100644 cpp/tests/rolling/rolling_test.hpp create mode 100644 cpp/tests/round/round_tests.cpp create mode 100644 cpp/tests/scalar/factories_test.cpp create mode 100644 cpp/tests/scalar/scalar_device_view_test.cu create mode 100644 cpp/tests/scalar/scalar_test.cpp create mode 100644 cpp/tests/search/search_dictionary_test.cpp create mode 100644 cpp/tests/search/search_list_test.cpp create mode 100644 cpp/tests/search/search_struct_test.cpp create mode 100644 cpp/tests/search/search_test.cpp create mode 100644 cpp/tests/sort/is_sorted_tests.cpp create mode 100644 cpp/tests/sort/rank_test.cpp create mode 100644 cpp/tests/sort/segmented_sort_tests.cpp create mode 100644 cpp/tests/sort/sort_nested_types_tests.cpp create mode 100644 cpp/tests/sort/sort_test.cpp create mode 100644 cpp/tests/sort/stable_sort_tests.cpp create mode 100644 cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp create mode 100644 cpp/tests/stream_compaction/distinct_count_tests.cpp create mode 100644 cpp/tests/stream_compaction/distinct_tests.cpp create mode 100644 cpp/tests/stream_compaction/drop_nans_tests.cpp create mode 100644 cpp/tests/stream_compaction/drop_nulls_tests.cpp create mode 100644 cpp/tests/stream_compaction/stable_distinct_tests.cpp create mode 100644 cpp/tests/stream_compaction/unique_count_tests.cpp create mode 100644 cpp/tests/stream_compaction/unique_tests.cpp create mode 100644 cpp/tests/streams/concatenate_test.cpp create mode 100644 cpp/tests/streams/copying_test.cpp create mode 100644 cpp/tests/streams/dictionary_test.cpp create mode 100644 cpp/tests/streams/filling_test.cpp create mode 100644 cpp/tests/streams/groupby_test.cpp create mode 100644 cpp/tests/streams/hash_test.cpp create mode 100644 cpp/tests/streams/interop_test.cpp create mode 100644 cpp/tests/streams/replace_test.cpp create mode 100644 cpp/tests/streams/search_test.cpp create mode 100644 cpp/tests/streams/sorting_test.cpp create mode 100644 cpp/tests/streams/strings/case_test.cpp create mode 100644 cpp/tests/streams/strings/find_test.cpp create mode 100644 cpp/tests/streams/text/ngrams_test.cpp create mode 100644 cpp/tests/strings/array_tests.cpp create mode 100644 cpp/tests/strings/attrs_tests.cpp create mode 100644 cpp/tests/strings/booleans_tests.cpp create mode 100644 cpp/tests/strings/case_tests.cpp create mode 100644 cpp/tests/strings/chars_types_tests.cpp create mode 100644 cpp/tests/strings/combine/concatenate_tests.cpp create mode 100644 cpp/tests/strings/combine/join_list_elements_tests.cpp create mode 100644 cpp/tests/strings/combine/join_strings_tests.cpp create mode 100644 cpp/tests/strings/concatenate_tests.cpp create mode 100644 cpp/tests/strings/contains_tests.cpp create mode 100644 cpp/tests/strings/datetime_tests.cpp create mode 100644 cpp/tests/strings/durations_tests.cpp create mode 100644 cpp/tests/strings/extract_tests.cpp create mode 100644 cpp/tests/strings/factories_test.cu create mode 100644 cpp/tests/strings/fill_tests.cpp create mode 100644 cpp/tests/strings/find_multiple_tests.cpp create mode 100644 cpp/tests/strings/find_tests.cpp create mode 100644 cpp/tests/strings/findall_tests.cpp create mode 100644 cpp/tests/strings/fixed_point_tests.cpp create mode 100644 cpp/tests/strings/floats_tests.cpp create mode 100644 cpp/tests/strings/format_lists_tests.cpp create mode 100644 cpp/tests/strings/integers_tests.cpp create mode 100644 cpp/tests/strings/ipv4_tests.cpp create mode 100644 cpp/tests/strings/json_tests.cpp create mode 100644 cpp/tests/strings/like_tests.cpp create mode 100644 cpp/tests/strings/pad_tests.cpp create mode 100644 cpp/tests/strings/repeat_strings_tests.cpp create mode 100644 cpp/tests/strings/replace_regex_tests.cpp create mode 100644 cpp/tests/strings/replace_tests.cpp create mode 100644 cpp/tests/strings/reverse_tests.cpp create mode 100644 cpp/tests/strings/slice_tests.cpp create mode 100644 cpp/tests/strings/split_tests.cpp create mode 100644 cpp/tests/strings/strip_tests.cpp create mode 100644 cpp/tests/strings/translate_tests.cpp create mode 100644 cpp/tests/strings/urls_tests.cpp create mode 100644 cpp/tests/structs/structs_column_tests.cpp create mode 100644 cpp/tests/structs/utilities_tests.cpp create mode 100644 cpp/tests/table/experimental_row_operator_tests.cu create mode 100644 cpp/tests/table/row_operator_tests_utilities.cu create mode 100644 cpp/tests/table/row_operator_tests_utilities.hpp create mode 100644 cpp/tests/table/row_operators_tests.cpp create mode 100644 cpp/tests/table/table_tests.cpp create mode 100644 cpp/tests/table/table_view_tests.cu create mode 100644 cpp/tests/text/bpe_tests.cpp create mode 100644 cpp/tests/text/edit_distance_tests.cpp create mode 100644 cpp/tests/text/jaccard_tests.cpp create mode 100644 cpp/tests/text/minhash_tests.cpp create mode 100644 cpp/tests/text/ngrams_tests.cpp create mode 100644 cpp/tests/text/ngrams_tokenize_tests.cpp create mode 100644 cpp/tests/text/normalize_tests.cpp create mode 100644 cpp/tests/text/replace_tests.cpp create mode 100644 cpp/tests/text/stemmer_tests.cpp create mode 100644 cpp/tests/text/subword_tests.cpp create mode 100644 cpp/tests/text/tokenize_tests.cpp create mode 100644 cpp/tests/transform/bools_to_mask_test.cpp create mode 100644 cpp/tests/transform/integration/assert_unary.h create mode 100644 cpp/tests/transform/integration/unary_transform_test.cpp create mode 100644 cpp/tests/transform/mask_to_bools_test.cpp create mode 100644 cpp/tests/transform/nans_to_null_test.cpp create mode 100644 cpp/tests/transform/one_hot_encode_tests.cpp create mode 100644 cpp/tests/transform/row_bit_count_test.cu create mode 100644 cpp/tests/transpose/transpose_test.cpp create mode 100644 cpp/tests/types/traits_test.cpp create mode 100644 cpp/tests/types/type_dispatcher_test.cu create mode 100644 cpp/tests/unary/cast_tests.cpp create mode 100644 cpp/tests/unary/math_ops_test.cpp create mode 100644 cpp/tests/unary/unary_ops_test.cpp create mode 100644 cpp/tests/utilities/base_fixture.cpp create mode 100644 cpp/tests/utilities/column_utilities.cu create mode 100644 cpp/tests/utilities/default_stream.cpp create mode 100644 cpp/tests/utilities/identify_stream_usage.cpp create mode 100644 cpp/tests/utilities/table_utilities.cu create mode 100644 cpp/tests/utilities/tdigest_utilities.cu create mode 100644 cpp/tests/utilities_tests/column_utilities_tests.cpp create mode 100644 cpp/tests/utilities_tests/column_wrapper_tests.cpp create mode 100644 cpp/tests/utilities_tests/default_stream_tests.cpp create mode 100644 cpp/tests/utilities_tests/lists_column_wrapper_tests.cpp create mode 100644 cpp/tests/utilities_tests/logger_tests.cpp create mode 100644 cpp/tests/utilities_tests/span_tests.cu create mode 100644 cpp/tests/utilities_tests/type_check_tests.cpp create mode 100644 cpp/tests/utilities_tests/type_list_tests.cpp create mode 100644 cpp/tests/wrappers/timestamps_test.cu create mode 100644 dependencies.yaml create mode 100644 docs/cudf/Makefile create mode 100644 docs/cudf/README.md create mode 100644 docs/cudf/make.bat create mode 100644 docs/cudf/source/_ext/PandasCompat.py create mode 100644 docs/cudf/source/_static/EMPTY create mode 100644 docs/cudf/source/_static/RAPIDS-logo-purple.png create mode 100644 docs/cudf/source/_static/colab.png create mode 100644 docs/cudf/source/_static/cudf-pandas-execution-flow.png create mode 100644 docs/cudf/source/_static/cudf-pandas-profile.png create mode 100644 docs/cudf/source/_static/cudf.pandas-duckdb.png create mode 100644 docs/cudf/source/_static/duckdb-benchmark-groupby-join.png create mode 100644 docs/cudf/source/_templates/autosummary/class.rst create mode 100644 docs/cudf/source/conf.py create mode 100644 docs/cudf/source/cudf_pandas/benchmarks.md create mode 100644 docs/cudf/source/cudf_pandas/faq.md create mode 100644 docs/cudf/source/cudf_pandas/how-it-works.md create mode 100644 docs/cudf/source/cudf_pandas/index.rst create mode 100644 docs/cudf/source/cudf_pandas/usage.md create mode 100644 docs/cudf/source/developer_guide/benchmarking.md create mode 100644 docs/cudf/source/developer_guide/contributing_guide.md create mode 100644 docs/cudf/source/developer_guide/documentation.md create mode 100644 docs/cudf/source/developer_guide/frame_class_diagram.png create mode 100644 docs/cudf/source/developer_guide/index.md create mode 100644 docs/cudf/source/developer_guide/library_design.md create mode 100644 docs/cudf/source/developer_guide/options.md create mode 100644 docs/cudf/source/developer_guide/pylibcudf.md create mode 100644 docs/cudf/source/developer_guide/testing.md create mode 100644 docs/cudf/source/index.rst create mode 100644 docs/cudf/source/user_guide/10min.ipynb create mode 100644 docs/cudf/source/user_guide/PandasCompat.md create mode 100644 docs/cudf/source/user_guide/api_docs/dataframe.rst create mode 100644 docs/cudf/source/user_guide/api_docs/extension_dtypes.rst create mode 100644 docs/cudf/source/user_guide/api_docs/general_functions.rst create mode 100644 docs/cudf/source/user_guide/api_docs/general_utilities.rst create mode 100644 docs/cudf/source/user_guide/api_docs/groupby.rst create mode 100644 docs/cudf/source/user_guide/api_docs/index.rst create mode 100644 docs/cudf/source/user_guide/api_docs/index_objects.rst create mode 100644 docs/cudf/source/user_guide/api_docs/io.rst create mode 100644 docs/cudf/source/user_guide/api_docs/list_handling.rst create mode 100644 docs/cudf/source/user_guide/api_docs/options.rst create mode 100644 docs/cudf/source/user_guide/api_docs/series.rst create mode 100644 docs/cudf/source/user_guide/api_docs/string_handling.rst create mode 100644 docs/cudf/source/user_guide/api_docs/struct_handling.rst create mode 100644 docs/cudf/source/user_guide/api_docs/subword_tokenize.rst create mode 100644 docs/cudf/source/user_guide/api_docs/window.rst create mode 100644 docs/cudf/source/user_guide/copy-on-write.md create mode 100644 docs/cudf/source/user_guide/cupy-interop.ipynb create mode 100644 docs/cudf/source/user_guide/data-types.md create mode 100644 docs/cudf/source/user_guide/groupby.md create mode 100644 docs/cudf/source/user_guide/guide-to-udfs.ipynb create mode 100644 docs/cudf/source/user_guide/index.md create mode 100644 docs/cudf/source/user_guide/io/index.md create mode 100644 docs/cudf/source/user_guide/io/io.md create mode 100644 docs/cudf/source/user_guide/io/read-json.md create mode 100644 docs/cudf/source/user_guide/missing-data.ipynb create mode 100644 docs/cudf/source/user_guide/options.md create mode 100644 docs/cudf/source/user_guide/pandas-comparison.md create mode 100644 docs/cudf/source/user_guide/performance-comparisons/index.md create mode 100644 docs/cudf/source/user_guide/performance-comparisons/performance-comparisons.ipynb create mode 100644 docs/dask_cudf/Makefile create mode 100644 docs/dask_cudf/make.bat create mode 100644 docs/dask_cudf/source/_static/RAPIDS-logo-purple.png create mode 100644 docs/dask_cudf/source/api.rst create mode 100644 docs/dask_cudf/source/conf.py create mode 100644 docs/dask_cudf/source/index.rst create mode 100644 fetch_rapids.cmake create mode 100644 img/GDF_community.png create mode 100644 img/goai_logo.png create mode 100644 img/rapids_arrow.png create mode 100644 img/rapids_logo.png create mode 100644 java/README.md create mode 100755 java/buildscripts/build-info create mode 100644 java/ci/Dockerfile.centos7 create mode 100644 java/ci/README.md create mode 100755 java/ci/build-in-docker.sh create mode 100644 java/dev/cudf_java_styles.xml create mode 100644 java/pom.xml create mode 100644 java/src/main/java/ai/rapids/cudf/Aggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/Aggregation128Utils.java create mode 100644 java/src/main/java/ai/rapids/cudf/AggregationOverWindow.java create mode 100644 java/src/main/java/ai/rapids/cudf/ArrowColumnBuilder.java create mode 100644 java/src/main/java/ai/rapids/cudf/ArrowIPCOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/ArrowIPCWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java create mode 100644 java/src/main/java/ai/rapids/cudf/AvroOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/BaseDeviceMemoryBuffer.java create mode 100644 java/src/main/java/ai/rapids/cudf/BinaryOp.java create mode 100644 java/src/main/java/ai/rapids/cudf/BinaryOperable.java create mode 100644 java/src/main/java/ai/rapids/cudf/BitVectorHelper.java create mode 100644 java/src/main/java/ai/rapids/cudf/BufferType.java create mode 100644 java/src/main/java/ai/rapids/cudf/CSVOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/CSVWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/CaptureGroups.java create mode 100644 java/src/main/java/ai/rapids/cudf/ChunkedPack.java create mode 100644 java/src/main/java/ai/rapids/cudf/CloseableArray.java create mode 100644 java/src/main/java/ai/rapids/cudf/ColumnFilterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/ColumnVector.java create mode 100644 java/src/main/java/ai/rapids/cudf/ColumnView.java create mode 100644 java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/CompressedMetadataWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/CompressionType.java create mode 100644 java/src/main/java/ai/rapids/cudf/ContigSplitGroupByResult.java create mode 100644 java/src/main/java/ai/rapids/cudf/ContiguousTable.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFile.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileBuffer.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileDriver.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileHandle.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileReadHandle.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileResourceCleaner.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileResourceDestroyer.java create mode 100644 java/src/main/java/ai/rapids/cudf/CuFileWriteHandle.java create mode 100755 java/src/main/java/ai/rapids/cudf/Cuda.java create mode 100644 java/src/main/java/ai/rapids/cudf/CudaComputeMode.java create mode 100755 java/src/main/java/ai/rapids/cudf/CudaException.java create mode 100644 java/src/main/java/ai/rapids/cudf/CudaFatalException.java create mode 100755 java/src/main/java/ai/rapids/cudf/CudaMemInfo.java create mode 100755 java/src/main/java/ai/rapids/cudf/CudaMemcpyKind.java create mode 100644 java/src/main/java/ai/rapids/cudf/CudaMemoryBuffer.java create mode 100755 java/src/main/java/ai/rapids/cudf/CudfColumnSizeOverflowException.java create mode 100755 java/src/main/java/ai/rapids/cudf/CudfException.java create mode 100644 java/src/main/java/ai/rapids/cudf/DType.java create mode 100644 java/src/main/java/ai/rapids/cudf/DecimalUtils.java create mode 100644 java/src/main/java/ai/rapids/cudf/DefaultHostMemoryAllocator.java create mode 100644 java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java create mode 100644 java/src/main/java/ai/rapids/cudf/DeviceMemoryBufferView.java create mode 100644 java/src/main/java/ai/rapids/cudf/GatherMap.java create mode 100644 java/src/main/java/ai/rapids/cudf/GroupByAggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/GroupByAggregationOnColumn.java create mode 100644 java/src/main/java/ai/rapids/cudf/GroupByOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/GroupByScanAggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/GroupByScanAggregationOnColumn.java create mode 100644 java/src/main/java/ai/rapids/cudf/HashJoin.java create mode 100644 java/src/main/java/ai/rapids/cudf/HashType.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostBufferConsumer.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostBufferProvider.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostColumnVector.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostMemoryAllocator.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostMemoryBufferNativeUtils.java create mode 100644 java/src/main/java/ai/rapids/cudf/HostMemoryReservation.java create mode 100644 java/src/main/java/ai/rapids/cudf/JCudfSerialization.java create mode 100644 java/src/main/java/ai/rapids/cudf/JSONOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/MaskState.java create mode 100644 java/src/main/java/ai/rapids/cudf/MemoryBuffer.java create mode 100644 java/src/main/java/ai/rapids/cudf/MemoryCleaner.java create mode 100644 java/src/main/java/ai/rapids/cudf/MixedJoinSize.java create mode 100644 java/src/main/java/ai/rapids/cudf/NaNEquality.java create mode 100755 java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java create mode 100644 java/src/main/java/ai/rapids/cudf/NullEquality.java create mode 100644 java/src/main/java/ai/rapids/cudf/NullPolicy.java create mode 100644 java/src/main/java/ai/rapids/cudf/NvtxColor.java create mode 100644 java/src/main/java/ai/rapids/cudf/NvtxRange.java create mode 100644 java/src/main/java/ai/rapids/cudf/NvtxUniqueRange.java create mode 100644 java/src/main/java/ai/rapids/cudf/ORCOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/OrderByArg.java create mode 100644 java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java create mode 100644 java/src/main/java/ai/rapids/cudf/PackedColumnMetadata.java create mode 100644 java/src/main/java/ai/rapids/cudf/PadSide.java create mode 100644 java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java create mode 100644 java/src/main/java/ai/rapids/cudf/ParquetOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/ParquetWriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/PartitionedTable.java create mode 100644 java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java create mode 100644 java/src/main/java/ai/rapids/cudf/QuantileMethod.java create mode 100644 java/src/main/java/ai/rapids/cudf/QuoteStyle.java create mode 100644 java/src/main/java/ai/rapids/cudf/Range.java create mode 100644 java/src/main/java/ai/rapids/cudf/ReductionAggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/RegexFlag.java create mode 100644 java/src/main/java/ai/rapids/cudf/RegexProgram.java create mode 100644 java/src/main/java/ai/rapids/cudf/ReplacePolicy.java create mode 100644 java/src/main/java/ai/rapids/cudf/ReplacePolicyWithColumn.java create mode 100755 java/src/main/java/ai/rapids/cudf/Rmm.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmAllocationMode.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmArenaMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmCudaAsyncMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmCudaMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmDeviceMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmEventHandler.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmEventHandlerResourceAdaptor.java create mode 100755 java/src/main/java/ai/rapids/cudf/RmmException.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmLimitingResourceAdaptor.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmLoggingResourceAdaptor.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmManagedMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmPoolMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmTrackingResourceAdaptor.java create mode 100644 java/src/main/java/ai/rapids/cudf/RmmWrappingDeviceMemoryResource.java create mode 100644 java/src/main/java/ai/rapids/cudf/RollingAggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/RollingAggregationOnColumn.java create mode 100644 java/src/main/java/ai/rapids/cudf/RoundMode.java create mode 100644 java/src/main/java/ai/rapids/cudf/Scalar.java create mode 100644 java/src/main/java/ai/rapids/cudf/ScanAggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/ScanType.java create mode 100644 java/src/main/java/ai/rapids/cudf/Schema.java create mode 100644 java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java create mode 100644 java/src/main/java/ai/rapids/cudf/StreamedTableReader.java create mode 100644 java/src/main/java/ai/rapids/cudf/Table.java create mode 100644 java/src/main/java/ai/rapids/cudf/TableDebug.java create mode 100644 java/src/main/java/ai/rapids/cudf/TableWithMeta.java create mode 100644 java/src/main/java/ai/rapids/cudf/TableWriter.java create mode 100644 java/src/main/java/ai/rapids/cudf/UnaryOp.java create mode 100644 java/src/main/java/ai/rapids/cudf/UnsafeMemoryAccessor.java create mode 100644 java/src/main/java/ai/rapids/cudf/WindowOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/WriterOptions.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/AstExpression.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/BinaryOperation.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/BinaryOperator.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/ColumnReference.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/CompiledExpression.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/Literal.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/TableReference.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/UnaryOperation.java create mode 100644 java/src/main/java/ai/rapids/cudf/ast/UnaryOperator.java create mode 100644 java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Compressor.java create mode 100644 java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Decompressor.java create mode 100644 java/src/main/java/ai/rapids/cudf/nvcomp/CompressionType.java create mode 100644 java/src/main/java/ai/rapids/cudf/nvcomp/NvcompCudaException.java create mode 100644 java/src/main/java/ai/rapids/cudf/nvcomp/NvcompException.java create mode 100644 java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java create mode 100644 java/src/main/native/.clang-format create mode 100644 java/src/main/native/CMakeLists.txt create mode 100644 java/src/main/native/clang-format.README create mode 100644 java/src/main/native/include/jni_utils.hpp create mode 100644 java/src/main/native/include/maps_column_view.hpp create mode 100644 java/src/main/native/src/Aggregation128UtilsJni.cpp create mode 100644 java/src/main/native/src/AggregationJni.cpp create mode 100644 java/src/main/native/src/ChunkedPackJni.cpp create mode 100644 java/src/main/native/src/ChunkedReaderJni.cpp create mode 100644 java/src/main/native/src/ColumnVectorJni.cpp create mode 100644 java/src/main/native/src/ColumnViewJni.cpp create mode 100644 java/src/main/native/src/ColumnViewJni.cu create mode 100644 java/src/main/native/src/ColumnViewJni.hpp create mode 100644 java/src/main/native/src/CompiledExpression.cpp create mode 100644 java/src/main/native/src/ContiguousTableJni.cpp create mode 100644 java/src/main/native/src/CuFileJni.cpp create mode 100644 java/src/main/native/src/CudaJni.cpp create mode 100644 java/src/main/native/src/CudfJni.cpp create mode 100644 java/src/main/native/src/HashJoinJni.cpp create mode 100644 java/src/main/native/src/HostMemoryBufferNativeUtilsJni.cpp create mode 100644 java/src/main/native/src/NvcompJni.cpp create mode 100644 java/src/main/native/src/NvtxRangeJni.cpp create mode 100644 java/src/main/native/src/NvtxUniqueRangeJni.cpp create mode 100644 java/src/main/native/src/PackedColumnMetadataJni.cpp create mode 100644 java/src/main/native/src/RmmJni.cpp create mode 100644 java/src/main/native/src/ScalarJni.cpp create mode 100644 java/src/main/native/src/TableJni.cpp create mode 100644 java/src/main/native/src/aggregation128_utils.cu create mode 100644 java/src/main/native/src/aggregation128_utils.hpp create mode 100644 java/src/main/native/src/check_nvcomp_output_sizes.cu create mode 100644 java/src/main/native/src/check_nvcomp_output_sizes.hpp create mode 100644 java/src/main/native/src/csv_chunked_writer.hpp create mode 100644 java/src/main/native/src/cudf_jni_apis.hpp create mode 100644 java/src/main/native/src/dtype_utils.hpp create mode 100644 java/src/main/native/src/emptyfile.cpp create mode 100644 java/src/main/native/src/jni_compiled_expr.hpp create mode 100644 java/src/main/native/src/jni_writer_data_sink.hpp create mode 100644 java/src/main/native/src/maps_column_view.cu create mode 100644 java/src/main/native/src/nvtx_common.hpp create mode 100644 java/src/main/native/src/row_conversion.cu create mode 100644 java/src/main/native/src/row_conversion.hpp create mode 100644 java/src/test/java/ai/rapids/cudf/Aggregation128UtilsTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/AssertUtils.java create mode 100644 java/src/test/java/ai/rapids/cudf/BinaryOpTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java create mode 100644 java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/CuFileTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/CudaFatalTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/CudaTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/CudfTestBase.java create mode 100644 java/src/test/java/ai/rapids/cudf/Date32ColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/Date64ColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/DoubleColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/FloatColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/GatherMapTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/HashJoinTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/IfElseTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/LargeTableTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/LongColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/MemoryBufferTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/NvtxTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/PinnedMemoryPoolTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ReductionTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/RmmMemoryAccessorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/RmmTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ScalarTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/SegmentedReductionTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ShortColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/TableTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/TestUtils.java create mode 100644 java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/UnaryOpTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/UnsafeMemoryAccessorTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java create mode 100644 java/src/test/java/ai/rapids/cudf/nvcomp/NvcompTest.java create mode 100644 java/src/test/resources/TestOrcFile.orc create mode 100644 java/src/test/resources/acq.parquet create mode 100644 java/src/test/resources/alltypes_plain.avro create mode 100644 java/src/test/resources/binary.parquet create mode 100644 java/src/test/resources/decimal.parquet create mode 100644 java/src/test/resources/people.json create mode 100644 java/src/test/resources/people_with_invalid_lines.json create mode 100644 java/src/test/resources/simple.csv create mode 100644 java/src/test/resources/splittable.parquet create mode 100644 java/src/test/resources/timestamp-date-test.orc create mode 120000 notebooks/10min.ipynb create mode 100644 notebooks/README.md create mode 120000 notebooks/cupy-interop.ipynb create mode 120000 notebooks/guide-to-udfs.ipynb create mode 120000 notebooks/missing-data.ipynb create mode 120000 notebooks/performance-comparisons create mode 100755 print_env.sh create mode 100644 pyproject.toml create mode 100644 python/cudf/.coveragerc create mode 100644 python/cudf/CMakeLists.txt create mode 120000 python/cudf/LICENSE create mode 120000 python/cudf/README.md create mode 100644 python/cudf/benchmarks/API/bench_dataframe.py create mode 100644 python/cudf/benchmarks/API/bench_dataframe_cases.py create mode 100644 python/cudf/benchmarks/API/bench_frame_or_index.py create mode 100644 python/cudf/benchmarks/API/bench_functions.py create mode 100644 python/cudf/benchmarks/API/bench_functions_cases.py create mode 100644 python/cudf/benchmarks/API/bench_index.py create mode 100644 python/cudf/benchmarks/API/bench_indexed_frame.py create mode 100644 python/cudf/benchmarks/API/bench_multiindex.py create mode 100644 python/cudf/benchmarks/API/bench_rangeindex.py create mode 100644 python/cudf/benchmarks/API/bench_series.py create mode 100644 python/cudf/benchmarks/common/config.py create mode 100644 python/cudf/benchmarks/common/utils.py create mode 100644 python/cudf/benchmarks/conftest.py create mode 100644 python/cudf/benchmarks/internal/bench_column.py create mode 100644 python/cudf/benchmarks/internal/bench_dataframe_internal.py create mode 100644 python/cudf/benchmarks/internal/bench_rangeindex_internal.py create mode 100644 python/cudf/benchmarks/internal/conftest.py create mode 100644 python/cudf/benchmarks/pytest.ini create mode 100644 python/cudf/cmake/Modules/ProtobufHelpers.cmake create mode 100644 python/cudf/cmake/Modules/WheelHelpers.cmake create mode 100644 python/cudf/cudf/__init__.py create mode 100644 python/cudf/cudf/_fuzz_testing/__init__.py create mode 100644 python/cudf/cudf/_fuzz_testing/avro.py create mode 100644 python/cudf/cudf/_fuzz_testing/csv.py create mode 100644 python/cudf/cudf/_fuzz_testing/fuzzer.py create mode 100644 python/cudf/cudf/_fuzz_testing/io.py create mode 100644 python/cudf/cudf/_fuzz_testing/json.py create mode 100644 python/cudf/cudf/_fuzz_testing/main.py create mode 100644 python/cudf/cudf/_fuzz_testing/orc.py create mode 100644 python/cudf/cudf/_fuzz_testing/parquet.py create mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_avro.py create mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py create mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py create mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py create mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_parquet.py create mode 100644 python/cudf/cudf/_fuzz_testing/tests/readme.md create mode 100644 python/cudf/cudf/_fuzz_testing/utils.py create mode 100644 python/cudf/cudf/_lib/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/__init__.pxd create mode 100644 python/cudf/cudf/_lib/__init__.py create mode 100644 python/cudf/cudf/_lib/aggregation.pxd create mode 100644 python/cudf/cudf/_lib/aggregation.pyx create mode 100644 python/cudf/cudf/_lib/avro.pyx create mode 100644 python/cudf/cudf/_lib/binaryop.pxd create mode 100644 python/cudf/cudf/_lib/binaryop.pyx create mode 100644 python/cudf/cudf/_lib/column.pxd create mode 100644 python/cudf/cudf/_lib/column.pyi create mode 100644 python/cudf/cudf/_lib/column.pyx create mode 100644 python/cudf/cudf/_lib/concat.pyx create mode 100644 python/cudf/cudf/_lib/copying.pxd create mode 100644 python/cudf/cudf/_lib/copying.pyx create mode 100644 python/cudf/cudf/_lib/cpp/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/cpp/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/aggregation.pxd create mode 100644 python/cudf/cudf/_lib/cpp/binaryop.pxd create mode 100644 python/cudf/cudf/_lib/cpp/column/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/column/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/column/column.pxd create mode 100644 python/cudf/cudf/_lib/cpp/column/column_factories.pxd create mode 100644 python/cudf/cudf/_lib/cpp/column/column_view.pxd create mode 100644 python/cudf/cudf/_lib/cpp/concatenate.pxd create mode 100644 python/cudf/cudf/_lib/cpp/contiguous_split.pxd create mode 100644 python/cudf/cudf/_lib/cpp/copying.pxd create mode 100644 python/cudf/cudf/_lib/cpp/copying.pyx create mode 100644 python/cudf/cudf/_lib/cpp/datetime.pxd create mode 100644 python/cudf/cudf/_lib/cpp/expressions.pxd create mode 100644 python/cudf/cudf/_lib/cpp/filling.pxd create mode 100644 python/cudf/cudf/_lib/cpp/groupby.pxd create mode 100644 python/cudf/cudf/_lib/cpp/hash.pxd create mode 100644 python/cudf/cudf/_lib/cpp/interop.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/io/arrow_io_source.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/avro.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/csv.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/data_sink.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/datasource.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/json.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/orc.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/orc_metadata.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/parquet.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/text.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/timezone.pxd create mode 100644 python/cudf/cudf/_lib/cpp/io/types.pxd create mode 100644 python/cudf/cudf/_lib/cpp/join.pxd create mode 100644 python/cudf/cudf/_lib/cpp/labeling.pxd create mode 100644 python/cudf/cudf/_lib/cpp/libcpp/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/libcpp/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/libcpp/functional.pxd create mode 100644 python/cudf/cudf/_lib/cpp/libcpp/memory.pxd create mode 100644 python/cudf/cudf/_lib/cpp/libcpp/optional.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/lists/combine.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/contains.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/count_elements.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/explode.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/extract.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/gather.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/lists_column_view.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/sorting.pxd create mode 100644 python/cudf/cudf/_lib/cpp/lists/stream_compaction.pxd create mode 100644 python/cudf/cudf/_lib/cpp/merge.pxd create mode 100644 python/cudf/cudf/_lib/cpp/null_mask.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/edit_distance.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/generate_ngrams.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/jaccard.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/minhash.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/ngrams_tokenize.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/normalize.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/replace.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/stemmer.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/subword_tokenize.pxd create mode 100644 python/cudf/cudf/_lib/cpp/nvtext/tokenize.pxd create mode 100644 python/cudf/cudf/_lib/cpp/partitioning.pxd create mode 100644 python/cudf/cudf/_lib/cpp/quantiles.pxd create mode 100644 python/cudf/cudf/_lib/cpp/reduce.pxd create mode 100644 python/cudf/cudf/_lib/cpp/replace.pxd create mode 100644 python/cudf/cudf/_lib/cpp/reshape.pxd create mode 100644 python/cudf/cudf/_lib/cpp/rolling.pxd create mode 100644 python/cudf/cudf/_lib/cpp/round.pxd create mode 100644 python/cudf/cudf/_lib/cpp/scalar/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/scalar/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/scalar/scalar.pxd create mode 100644 python/cudf/cudf/_lib/cpp/search.pxd create mode 100644 python/cudf/cudf/_lib/cpp/sorting.pxd create mode 100644 python/cudf/cudf/_lib/cpp/stream_compaction.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/strings/attributes.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/capitalize.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/case.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/char_types.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/combine.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/contains.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_booleans.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_datetime.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_durations.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_fixed_point.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_ipv4.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_lists.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/convert/convert_urls.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/extract.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/find.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/find_multiple.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/findall.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/json.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/padding.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/regex_flags.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/regex_program.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/repeat.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/replace.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/replace_re.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/side_type.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/split/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/split/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/strings/split/partition.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/split/split.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/strip.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/substring.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/translate.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings/wrap.pxd create mode 100644 python/cudf/cudf/_lib/cpp/strings_udf.pxd create mode 100644 python/cudf/cudf/_lib/cpp/table/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/table/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/table/table.pxd create mode 100644 python/cudf/cudf/_lib/cpp/table/table_view.pxd create mode 100644 python/cudf/cudf/_lib/cpp/transform.pxd create mode 100644 python/cudf/cudf/_lib/cpp/transpose.pxd create mode 100644 python/cudf/cudf/_lib/cpp/types.pxd create mode 100644 python/cudf/cudf/_lib/cpp/types.pyx create mode 100644 python/cudf/cudf/_lib/cpp/unary.pxd create mode 100644 python/cudf/cudf/_lib/cpp/utilities/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/utilities/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/utilities/host_span.pxd create mode 100644 python/cudf/cudf/_lib/cpp/wrappers/__init__.pxd create mode 100644 python/cudf/cudf/_lib/cpp/wrappers/__init__.py create mode 100644 python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd create mode 100644 python/cudf/cudf/_lib/cpp/wrappers/durations.pxd create mode 100644 python/cudf/cudf/_lib/cpp/wrappers/timestamps.pxd create mode 100644 python/cudf/cudf/_lib/csv.pyx create mode 100644 python/cudf/cudf/_lib/datetime.pyx create mode 100644 python/cudf/cudf/_lib/exception_handler.pxd create mode 100644 python/cudf/cudf/_lib/expressions.pxd create mode 100644 python/cudf/cudf/_lib/expressions.pyx create mode 100644 python/cudf/cudf/_lib/filling.pyx create mode 100644 python/cudf/cudf/_lib/groupby.pyx create mode 100644 python/cudf/cudf/_lib/hash.pyx create mode 100644 python/cudf/cudf/_lib/interop.pyx create mode 100644 python/cudf/cudf/_lib/io/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/io/__init__.pxd create mode 100644 python/cudf/cudf/_lib/io/__init__.py create mode 100644 python/cudf/cudf/_lib/io/datasource.pxd create mode 100644 python/cudf/cudf/_lib/io/datasource.pyx create mode 100644 python/cudf/cudf/_lib/io/utils.pxd create mode 100644 python/cudf/cudf/_lib/io/utils.pyx create mode 100644 python/cudf/cudf/_lib/join.pyx create mode 100644 python/cudf/cudf/_lib/json.pyx create mode 100644 python/cudf/cudf/_lib/labeling.pyx create mode 100644 python/cudf/cudf/_lib/lists.pyx create mode 100644 python/cudf/cudf/_lib/merge.pyx create mode 100644 python/cudf/cudf/_lib/null_mask.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/nvtext/__init__.pxd create mode 100644 python/cudf/cudf/_lib/nvtext/__init__.py create mode 100644 python/cudf/cudf/_lib/nvtext/edit_distance.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/jaccard.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/minhash.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/normalize.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/replace.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/stemmer.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx create mode 100644 python/cudf/cudf/_lib/nvtext/tokenize.pyx create mode 100644 python/cudf/cudf/_lib/orc.pyx create mode 100644 python/cudf/cudf/_lib/parquet.pyx create mode 100644 python/cudf/cudf/_lib/partitioning.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/pylibcudf/__init__.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/__init__.py create mode 100644 python/cudf/cudf/_lib/pylibcudf/column.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/column.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/copying.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/copying.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/table.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/table.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/types.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/types.pyx create mode 100644 python/cudf/cudf/_lib/pylibcudf/utils.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/utils.pyx create mode 100644 python/cudf/cudf/_lib/quantiles.pyx create mode 100644 python/cudf/cudf/_lib/reduce.pyx create mode 100644 python/cudf/cudf/_lib/replace.pyx create mode 100644 python/cudf/cudf/_lib/reshape.pyx create mode 100644 python/cudf/cudf/_lib/rolling.pyx create mode 100644 python/cudf/cudf/_lib/round.pyx create mode 100644 python/cudf/cudf/_lib/scalar.pxd create mode 100644 python/cudf/cudf/_lib/scalar.pyx create mode 100644 python/cudf/cudf/_lib/search.pyx create mode 100644 python/cudf/cudf/_lib/sort.pyx create mode 100644 python/cudf/cudf/_lib/stream_compaction.pyx create mode 100644 python/cudf/cudf/_lib/string_casting.pyx create mode 100644 python/cudf/cudf/_lib/strings/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/strings/__init__.pxd create mode 100644 python/cudf/cudf/_lib/strings/__init__.py create mode 100644 python/cudf/cudf/_lib/strings/attributes.pyx create mode 100644 python/cudf/cudf/_lib/strings/capitalize.pyx create mode 100644 python/cudf/cudf/_lib/strings/case.pyx create mode 100644 python/cudf/cudf/_lib/strings/char_types.pyx create mode 100644 python/cudf/cudf/_lib/strings/combine.pyx create mode 100644 python/cudf/cudf/_lib/strings/contains.pyx create mode 100644 python/cudf/cudf/_lib/strings/convert/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/strings/convert/__init__.pxd create mode 100644 python/cudf/cudf/_lib/strings/convert/__init__.py create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_floats.pyx create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_integers.pyx create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_lists.pyx create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_urls.pyx create mode 100644 python/cudf/cudf/_lib/strings/extract.pyx create mode 100644 python/cudf/cudf/_lib/strings/find.pyx create mode 100644 python/cudf/cudf/_lib/strings/find_multiple.pyx create mode 100644 python/cudf/cudf/_lib/strings/findall.pyx create mode 100644 python/cudf/cudf/_lib/strings/json.pyx create mode 100644 python/cudf/cudf/_lib/strings/padding.pyx create mode 100644 python/cudf/cudf/_lib/strings/repeat.pyx create mode 100644 python/cudf/cudf/_lib/strings/replace.pyx create mode 100644 python/cudf/cudf/_lib/strings/replace_re.pyx create mode 100644 python/cudf/cudf/_lib/strings/split/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/strings/split/__init__.pxd create mode 100644 python/cudf/cudf/_lib/strings/split/__init__.py create mode 100644 python/cudf/cudf/_lib/strings/split/partition.pyx create mode 100644 python/cudf/cudf/_lib/strings/split/split.pyx create mode 100644 python/cudf/cudf/_lib/strings/strip.pyx create mode 100644 python/cudf/cudf/_lib/strings/substring.pyx create mode 100644 python/cudf/cudf/_lib/strings/translate.pyx create mode 100644 python/cudf/cudf/_lib/strings/wrap.pyx create mode 100644 python/cudf/cudf/_lib/strings_udf.pyx create mode 100644 python/cudf/cudf/_lib/text.pyx create mode 100644 python/cudf/cudf/_lib/timezone.pyx create mode 100644 python/cudf/cudf/_lib/transform.pyx create mode 100644 python/cudf/cudf/_lib/transpose.pyx create mode 100644 python/cudf/cudf/_lib/types.pxd create mode 100644 python/cudf/cudf/_lib/types.pyx create mode 100644 python/cudf/cudf/_lib/unary.pyx create mode 100644 python/cudf/cudf/_lib/utils.pxd create mode 100644 python/cudf/cudf/_lib/utils.pyx create mode 100644 python/cudf/cudf/_typing.py create mode 100644 python/cudf/cudf/api/__init__.py create mode 100644 python/cudf/cudf/api/extensions/__init__.py create mode 100644 python/cudf/cudf/api/extensions/accessor.py create mode 100644 python/cudf/cudf/api/types.py create mode 100644 python/cudf/cudf/benchmarks/README.md create mode 100644 python/cudf/cudf/benchmarks/bench_cudf_io.py create mode 100644 python/cudf/cudf/benchmarks/conftest.py create mode 100644 python/cudf/cudf/benchmarks/get_datasets.py create mode 100644 python/cudf/cudf/comm/__init__.py create mode 100644 python/cudf/cudf/comm/serialize.py create mode 100644 python/cudf/cudf/core/__init__.py create mode 100644 python/cudf/cudf/core/_base_index.py create mode 100644 python/cudf/cudf/core/_compat.py create mode 100644 python/cudf/cudf/core/_internals/__init__.py create mode 100644 python/cudf/cudf/core/_internals/expressions.py create mode 100644 python/cudf/cudf/core/_internals/timezones.py create mode 100644 python/cudf/cudf/core/_internals/where.py create mode 100644 python/cudf/cudf/core/abc.py create mode 100644 python/cudf/cudf/core/algorithms.py create mode 100644 python/cudf/cudf/core/buffer/__init__.py create mode 100644 python/cudf/cudf/core/buffer/buffer.py create mode 100644 python/cudf/cudf/core/buffer/exposure_tracked_buffer.py create mode 100644 python/cudf/cudf/core/buffer/spill_manager.py create mode 100644 python/cudf/cudf/core/buffer/spillable_buffer.py create mode 100644 python/cudf/cudf/core/buffer/utils.py create mode 100644 python/cudf/cudf/core/column/__init__.py create mode 100644 python/cudf/cudf/core/column/categorical.py create mode 100644 python/cudf/cudf/core/column/column.py create mode 100644 python/cudf/cudf/core/column/datetime.py create mode 100644 python/cudf/cudf/core/column/decimal.py create mode 100644 python/cudf/cudf/core/column/interval.py create mode 100644 python/cudf/cudf/core/column/lists.py create mode 100644 python/cudf/cudf/core/column/methods.py create mode 100644 python/cudf/cudf/core/column/numerical.py create mode 100644 python/cudf/cudf/core/column/numerical_base.py create mode 100644 python/cudf/cudf/core/column/string.py create mode 100644 python/cudf/cudf/core/column/struct.py create mode 100644 python/cudf/cudf/core/column/timedelta.py create mode 100644 python/cudf/cudf/core/column_accessor.py create mode 100644 python/cudf/cudf/core/common.py create mode 100644 python/cudf/cudf/core/copy_types.py create mode 100644 python/cudf/cudf/core/cut.py create mode 100644 python/cudf/cudf/core/dataframe.py create mode 100644 python/cudf/cudf/core/df_protocol.py create mode 100644 python/cudf/cudf/core/dtypes.py create mode 100644 python/cudf/cudf/core/frame.py create mode 100644 python/cudf/cudf/core/groupby/__init__.py create mode 100644 python/cudf/cudf/core/groupby/groupby.py create mode 100644 python/cudf/cudf/core/index.py create mode 100644 python/cudf/cudf/core/indexed_frame.py create mode 100644 python/cudf/cudf/core/indexing_utils.py create mode 100644 python/cudf/cudf/core/join/__init__.py create mode 100644 python/cudf/cudf/core/join/_join_helpers.py create mode 100644 python/cudf/cudf/core/join/join.py create mode 100644 python/cudf/cudf/core/missing.py create mode 100644 python/cudf/cudf/core/mixins/__init__.py create mode 100644 python/cudf/cudf/core/mixins/binops.py create mode 100644 python/cudf/cudf/core/mixins/binops.pyi create mode 100644 python/cudf/cudf/core/mixins/mixin_factory.py create mode 100644 python/cudf/cudf/core/mixins/reductions.py create mode 100644 python/cudf/cudf/core/mixins/reductions.pyi create mode 100644 python/cudf/cudf/core/mixins/scans.py create mode 100644 python/cudf/cudf/core/mixins/scans.pyi create mode 100644 python/cudf/cudf/core/multiindex.py create mode 100644 python/cudf/cudf/core/resample.py create mode 100644 python/cudf/cudf/core/reshape.py create mode 100644 python/cudf/cudf/core/scalar.py create mode 100644 python/cudf/cudf/core/series.py create mode 100644 python/cudf/cudf/core/single_column_frame.py create mode 100644 python/cudf/cudf/core/subword_tokenizer.py create mode 100644 python/cudf/cudf/core/tokenize_vocabulary.py create mode 100644 python/cudf/cudf/core/tools/__init__.py create mode 100644 python/cudf/cudf/core/tools/datetimes.py create mode 100644 python/cudf/cudf/core/tools/numeric.py create mode 100644 python/cudf/cudf/core/udf/__init__.py create mode 100644 python/cudf/cudf/core/udf/_ops.py create mode 100644 python/cudf/cudf/core/udf/api.py create mode 100644 python/cudf/cudf/core/udf/groupby_lowering.py create mode 100644 python/cudf/cudf/core/udf/groupby_typing.py create mode 100644 python/cudf/cudf/core/udf/groupby_utils.py create mode 100644 python/cudf/cudf/core/udf/masked_lowering.py create mode 100644 python/cudf/cudf/core/udf/masked_typing.py create mode 100644 python/cudf/cudf/core/udf/row_function.py create mode 100644 python/cudf/cudf/core/udf/scalar_function.py create mode 100644 python/cudf/cudf/core/udf/strings_lowering.py create mode 100644 python/cudf/cudf/core/udf/strings_typing.py create mode 100644 python/cudf/cudf/core/udf/strings_utils.py create mode 100644 python/cudf/cudf/core/udf/templates.py create mode 100644 python/cudf/cudf/core/udf/utils.py create mode 100644 python/cudf/cudf/core/window/__init__.py create mode 100644 python/cudf/cudf/core/window/rolling.py create mode 100644 python/cudf/cudf/datasets.py create mode 100644 python/cudf/cudf/errors.py create mode 100644 python/cudf/cudf/io/__init__.py create mode 100644 python/cudf/cudf/io/avro.py create mode 100644 python/cudf/cudf/io/csv.py create mode 100644 python/cudf/cudf/io/dlpack.py create mode 100644 python/cudf/cudf/io/feather.py create mode 100644 python/cudf/cudf/io/hdf.py create mode 100644 python/cudf/cudf/io/json.py create mode 100644 python/cudf/cudf/io/orc.py create mode 100644 python/cudf/cudf/io/parquet.py create mode 100644 python/cudf/cudf/io/text.py create mode 100644 python/cudf/cudf/options.py create mode 100644 python/cudf/cudf/pandas/__init__.py create mode 100644 python/cudf/cudf/pandas/__main__.py create mode 100644 python/cudf/cudf/pandas/_wrappers/__init__.py create mode 100644 python/cudf/cudf/pandas/_wrappers/common.py create mode 100644 python/cudf/cudf/pandas/_wrappers/numpy.py create mode 100644 python/cudf/cudf/pandas/_wrappers/pandas.py create mode 100644 python/cudf/cudf/pandas/annotation.py create mode 100644 python/cudf/cudf/pandas/fast_slow_proxy.py create mode 100644 python/cudf/cudf/pandas/magics.py create mode 100644 python/cudf/cudf/pandas/module_accelerator.py create mode 100644 python/cudf/cudf/pandas/profiler.py create mode 100644 python/cudf/cudf/pandas/scripts/analyze-test-failures.py create mode 100644 python/cudf/cudf/pandas/scripts/conftest-patch.py create mode 100755 python/cudf/cudf/pandas/scripts/run-pandas-tests.sh create mode 100644 python/cudf/cudf/pandas/scripts/summarize-test-results.py create mode 100644 python/cudf/cudf/testing/__init__.py create mode 100644 python/cudf/cudf/testing/_utils.py create mode 100644 python/cudf/cudf/testing/dataset_generator.py create mode 100644 python/cudf/cudf/testing/testing.py create mode 100644 python/cudf/cudf/tests/conftest.py create mode 100644 python/cudf/cudf/tests/data/__init__.py create mode 100644 python/cudf/cudf/tests/data/avro/__init__.py create mode 100644 python/cudf/cudf/tests/data/avro/example.avro create mode 100644 python/cudf/cudf/tests/data/ipums.pkl create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.NestedStructDataFrame.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.NoIndStrm.IntWithNulls.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.NoIndStrm.StructAndIntWithNulls.TwoStripes.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.NoIndStrm.StructAndIntWithNulls.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.NoIndStrm.StructWithNoNulls.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.RLEv2.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Spark.EmptyDecompData.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Spark.NestedNotNullableStruct.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.apache_timestamp.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.boolean_corruption_PR_6636.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.boolean_corruption_PR_6702.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.decimal.multiple.values.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.decimal.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.decimal.runpos.issue.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.decimal.same.values.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.demo-12-zlib.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.emptyFile.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.gmt.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.int16.rle.size.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.int_decimal.precision_19.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.largeTimestamps.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.lima_timezone.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.nulls-at-end-snappy.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.testDate1900.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.testDate2038.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.testPySparkStruct.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.testSnappy.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.testStringAndBinaryStatistics.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.testStripeLevelStats.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.issue.orc create mode 100644 python/cudf/cudf/tests/data/orc/__init__.py create mode 100644 python/cudf/cudf/tests/data/orc/nodata.orc create mode 100644 python/cudf/cudf/tests/data/orc/uncompressed_snappy.orc create mode 100644 python/cudf/cudf/tests/data/parquet/__init__.py create mode 100644 python/cudf/cudf/tests/data/parquet/binary_decimal.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/brotli_int16.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/delta_encoding.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/fixed_len_byte_array.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/mixed_compression.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/nested-unsigned-malformed.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/nested_column_map.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/nested_decimal128_file.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/one_level_list.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/one_level_list2.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/one_level_list3.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/rle_boolean_encoding.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/spark_decimal.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/spark_timestamp.snappy.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/spark_zstd.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/trailing_nans.parquet create mode 100644 python/cudf/cudf/tests/data/parquet/usec_timestamp.parquet create mode 100644 python/cudf/cudf/tests/data/pkl/__init__.py create mode 100644 python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_0.16.pkl create mode 100644 python/cudf/cudf/tests/data/sas/cars.sas7bdat create mode 100644 python/cudf/cudf/tests/data/subword_tokenizer_data/__init__.py create mode 100644 python/cudf/cudf/tests/data/subword_tokenizer_data/bert_base_cased_sampled/__init__.py create mode 100644 python/cudf/cudf/tests/data/subword_tokenizer_data/bert_base_cased_sampled/vocab-hash.txt create mode 100644 python/cudf/cudf/tests/data/subword_tokenizer_data/bert_base_cased_sampled/vocab.txt create mode 100644 python/cudf/cudf/tests/data/subword_tokenizer_data/test_sentences.txt create mode 100644 python/cudf/cudf/tests/data/text/__init__.py create mode 100644 python/cudf/cudf/tests/data/text/chess.pgn create mode 100644 python/cudf/cudf/tests/data/text/chess.pgn.gz create mode 100644 python/cudf/cudf/tests/dataframe/__init__.py create mode 100644 python/cudf/cudf/tests/dataframe/test_attributes.py create mode 100644 python/cudf/cudf/tests/dataframe/test_binary_operations.py create mode 100644 python/cudf/cudf/tests/dataframe/test_combining.py create mode 100644 python/cudf/cudf/tests/dataframe/test_computation.py create mode 100644 python/cudf/cudf/tests/dataframe/test_constructing.py create mode 100644 python/cudf/cudf/tests/dataframe/test_conversion.py create mode 100644 python/cudf/cudf/tests/dataframe/test_function_application.py create mode 100644 python/cudf/cudf/tests/dataframe/test_indexing.py create mode 100644 python/cudf/cudf/tests/dataframe/test_io_serialization.py create mode 100644 python/cudf/cudf/tests/dataframe/test_missing.py create mode 100644 python/cudf/cudf/tests/dataframe/test_reindexing.py create mode 100644 python/cudf/cudf/tests/dataframe/test_reshaping.py create mode 100644 python/cudf/cudf/tests/dataframe/test_selecting.py create mode 100644 python/cudf/cudf/tests/dataframe/test_sorting.py create mode 100644 python/cudf/cudf/tests/dataframe/test_timeseries.py create mode 100644 python/cudf/cudf/tests/general_functions/__init__.py create mode 100644 python/cudf/cudf/tests/general_functions/test_conversion.py create mode 100644 python/cudf/cudf/tests/general_functions/test_data_manipulation.py create mode 100644 python/cudf/cudf/tests/general_functions/test_datetimelike.py create mode 100644 python/cudf/cudf/tests/general_utilities/__init__.py create mode 100644 python/cudf/cudf/tests/general_utilities/test_testing.py create mode 100644 python/cudf/cudf/tests/groupby/__init__.py create mode 100644 python/cudf/cudf/tests/groupby/test_computation.py create mode 100644 python/cudf/cudf/tests/groupby/test_function_application.py create mode 100644 python/cudf/cudf/tests/groupby/test_indexing.py create mode 100644 python/cudf/cudf/tests/groupby/test_stats.py create mode 100644 python/cudf/cudf/tests/indexes/__init__.py create mode 100644 python/cudf/cudf/tests/indexes/datetime/__init__.py create mode 100644 python/cudf/cudf/tests/indexes/datetime/test_components.py create mode 100644 python/cudf/cudf/tests/indexes/datetime/test_constructing.py create mode 100644 python/cudf/cudf/tests/indexes/datetime/test_conversion.py create mode 100644 python/cudf/cudf/tests/indexes/datetime/test_indexing.py create mode 100644 python/cudf/cudf/tests/indexes/datetime/test_time_specific.py create mode 100644 python/cudf/cudf/tests/indexes/multiindex/__init__.py create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_constructing.py create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_properties.py create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_selecting.py create mode 100644 python/cudf/cudf/tests/indexes/test_categorical.py create mode 100644 python/cudf/cudf/tests/indexes/test_combining.py create mode 100644 python/cudf/cudf/tests/indexes/test_computation.py create mode 100644 python/cudf/cudf/tests/indexes/test_constructing.py create mode 100644 python/cudf/cudf/tests/indexes/test_conversion.py create mode 100644 python/cudf/cudf/tests/indexes/test_interval.py create mode 100644 python/cudf/cudf/tests/indexes/test_memory_usage.py create mode 100644 python/cudf/cudf/tests/indexes/test_missing.py create mode 100644 python/cudf/cudf/tests/indexes/test_modifying.py create mode 100644 python/cudf/cudf/tests/indexes/test_multiindex_compat.py create mode 100644 python/cudf/cudf/tests/indexes/test_numeric.py create mode 100644 python/cudf/cudf/tests/indexes/test_properties.py create mode 100644 python/cudf/cudf/tests/indexes/test_selecting.py create mode 100644 python/cudf/cudf/tests/indexes/test_sorting.py create mode 100644 python/cudf/cudf/tests/indexes/test_time_specific.py create mode 100644 python/cudf/cudf/tests/indexes/timedelta/__init__.py create mode 100644 python/cudf/cudf/tests/indexes/timedelta/test_components.py create mode 100644 python/cudf/cudf/tests/indexes/timedelta/test_constructing.py create mode 100644 python/cudf/cudf/tests/indexes/timedelta/test_conversion.py create mode 100644 python/cudf/cudf/tests/input_output/__init__.py create mode 100644 python/cudf/cudf/tests/input_output/test_avro.py create mode 100644 python/cudf/cudf/tests/input_output/test_csv.py create mode 100644 python/cudf/cudf/tests/input_output/test_feather.py create mode 100644 python/cudf/cudf/tests/input_output/test_hdf5.py create mode 100644 python/cudf/cudf/tests/input_output/test_json.py create mode 100644 python/cudf/cudf/tests/input_output/test_orc.py create mode 100644 python/cudf/cudf/tests/input_output/test_parquet.py create mode 100644 python/cudf/cudf/tests/input_output/test_text.py create mode 100644 python/cudf/cudf/tests/lists/__init__.py create mode 100644 python/cudf/cudf/tests/lists/test_list_methods.py create mode 100644 python/cudf/cudf/tests/options/__init__.py create mode 100644 python/cudf/cudf/tests/options/test_options.py create mode 100644 python/cudf/cudf/tests/pytest.ini create mode 100644 python/cudf/cudf/tests/series/__init__.py create mode 100644 python/cudf/cudf/tests/series/test_accessors.py create mode 100644 python/cudf/cudf/tests/series/test_attributes.py create mode 100644 python/cudf/cudf/tests/series/test_binary_operations.py create mode 100644 python/cudf/cudf/tests/series/test_categorial.py create mode 100644 python/cudf/cudf/tests/series/test_combining.py create mode 100644 python/cudf/cudf/tests/series/test_computation.py create mode 100644 python/cudf/cudf/tests/series/test_constructing.py create mode 100644 python/cudf/cudf/tests/series/test_conversion.py create mode 100644 python/cudf/cudf/tests/series/test_datetimelike.py create mode 100644 python/cudf/cudf/tests/series/test_function_application.py create mode 100644 python/cudf/cudf/tests/series/test_indexing.py create mode 100644 python/cudf/cudf/tests/series/test_io_serialization.py create mode 100644 python/cudf/cudf/tests/series/test_missing.py create mode 100644 python/cudf/cudf/tests/series/test_reshaping.py create mode 100644 python/cudf/cudf/tests/series/test_selecting.py create mode 100644 python/cudf/cudf/tests/series/test_sorting.py create mode 100644 python/cudf/cudf/tests/series/test_timeseries.py create mode 100644 python/cudf/cudf/tests/strings/__init__.py create mode 100644 python/cudf/cudf/tests/strings/test_string_methods.py create mode 100644 python/cudf/cudf/tests/structs/__init__.py create mode 100644 python/cudf/cudf/tests/structs/test_struct_methods.py create mode 100644 python/cudf/cudf/tests/test_api_types.py create mode 100644 python/cudf/cudf/tests/test_apply_rows.py create mode 100644 python/cudf/cudf/tests/test_applymap.py create mode 100644 python/cudf/cudf/tests/test_array_function.py create mode 100644 python/cudf/cudf/tests/test_array_ufunc.py create mode 100644 python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py create mode 100644 python/cudf/cudf/tests/test_binops.py create mode 100644 python/cudf/cudf/tests/test_buffer.py create mode 100644 python/cudf/cudf/tests/test_categorical.py create mode 100644 python/cudf/cudf/tests/test_column.py create mode 100644 python/cudf/cudf/tests/test_column_accessor.py create mode 100644 python/cudf/cudf/tests/test_compile_udf.py create mode 100644 python/cudf/cudf/tests/test_concat.py create mode 100644 python/cudf/cudf/tests/test_contains.py create mode 100644 python/cudf/cudf/tests/test_copying.py create mode 100644 python/cudf/cudf/tests/test_csv.py create mode 100644 python/cudf/cudf/tests/test_cuda_apply.py create mode 100644 python/cudf/cudf/tests/test_cuda_array_interface.py create mode 100644 python/cudf/cudf/tests/test_custom_accessor.py create mode 100644 python/cudf/cudf/tests/test_cut.py create mode 100644 python/cudf/cudf/tests/test_dask.py create mode 100644 python/cudf/cudf/tests/test_dataframe.py create mode 100644 python/cudf/cudf/tests/test_dataframe_copy.py create mode 100644 python/cudf/cudf/tests/test_datasets.py create mode 100644 python/cudf/cudf/tests/test_datetime.py create mode 100644 python/cudf/cudf/tests/test_decimal.py create mode 100644 python/cudf/cudf/tests/test_df_protocol.py create mode 100644 python/cudf/cudf/tests/test_dlpack.py create mode 100644 python/cudf/cudf/tests/test_doctests.py create mode 100644 python/cudf/cudf/tests/test_dropna.py create mode 100644 python/cudf/cudf/tests/test_dtypes.py create mode 100644 python/cudf/cudf/tests/test_duplicates.py create mode 100644 python/cudf/cudf/tests/test_extension_compilation.py create mode 100644 python/cudf/cudf/tests/test_factorize.py create mode 100644 python/cudf/cudf/tests/test_feather.py create mode 100644 python/cudf/cudf/tests/test_gcs.py create mode 100644 python/cudf/cudf/tests/test_groupby.py create mode 100644 python/cudf/cudf/tests/test_hash_vocab.py create mode 100644 python/cudf/cudf/tests/test_hdf.py create mode 100644 python/cudf/cudf/tests/test_hdfs.py create mode 100644 python/cudf/cudf/tests/test_index.py create mode 100644 python/cudf/cudf/tests/test_indexing.py create mode 100644 python/cudf/cudf/tests/test_interpolate.py create mode 100644 python/cudf/cudf/tests/test_interval.py create mode 100644 python/cudf/cudf/tests/test_joining.py create mode 100644 python/cudf/cudf/tests/test_json.py create mode 100644 python/cudf/cudf/tests/test_list.py create mode 100644 python/cudf/cudf/tests/test_monotonic.py create mode 100644 python/cudf/cudf/tests/test_multiindex.py create mode 100644 python/cudf/cudf/tests/test_no_cuinit.py create mode 100644 python/cudf/cudf/tests/test_numba_import.py create mode 100644 python/cudf/cudf/tests/test_numerical.py create mode 100644 python/cudf/cudf/tests/test_numpy_interop.py create mode 100644 python/cudf/cudf/tests/test_offset.py create mode 100644 python/cudf/cudf/tests/test_onehot.py create mode 100644 python/cudf/cudf/tests/test_options.py create mode 100644 python/cudf/cudf/tests/test_orc.py create mode 100644 python/cudf/cudf/tests/test_pack.py create mode 100644 python/cudf/cudf/tests/test_pandas_interop.py create mode 100644 python/cudf/cudf/tests/test_parquet.py create mode 100644 python/cudf/cudf/tests/test_pickling.py create mode 100644 python/cudf/cudf/tests/test_quantiles.py create mode 100644 python/cudf/cudf/tests/test_query.py create mode 100644 python/cudf/cudf/tests/test_query_mask.py create mode 100644 python/cudf/cudf/tests/test_rank.py create mode 100644 python/cudf/cudf/tests/test_reductions.py create mode 100644 python/cudf/cudf/tests/test_replace.py create mode 100644 python/cudf/cudf/tests/test_repr.py create mode 100644 python/cudf/cudf/tests/test_resampling.py create mode 100644 python/cudf/cudf/tests/test_reshape.py create mode 100644 python/cudf/cudf/tests/test_rolling.py create mode 100644 python/cudf/cudf/tests/test_s3.py create mode 100644 python/cudf/cudf/tests/test_scalar.py create mode 100644 python/cudf/cudf/tests/test_scan.py create mode 100644 python/cudf/cudf/tests/test_search.py create mode 100644 python/cudf/cudf/tests/test_serialize.py create mode 100644 python/cudf/cudf/tests/test_series.py create mode 100644 python/cudf/cudf/tests/test_seriesmap.py create mode 100644 python/cudf/cudf/tests/test_setitem.py create mode 100644 python/cudf/cudf/tests/test_sorting.py create mode 100644 python/cudf/cudf/tests/test_sparse_df.py create mode 100644 python/cudf/cudf/tests/test_spilling.py create mode 100644 python/cudf/cudf/tests/test_stats.py create mode 100644 python/cudf/cudf/tests/test_string.py create mode 100644 python/cudf/cudf/tests/test_string_udfs.py create mode 100644 python/cudf/cudf/tests/test_struct.py create mode 100644 python/cudf/cudf/tests/test_testing.py create mode 100644 python/cudf/cudf/tests/test_timedelta.py create mode 100644 python/cudf/cudf/tests/test_transform.py create mode 100644 python/cudf/cudf/tests/test_udf_binops.py create mode 100644 python/cudf/cudf/tests/test_udf_masked_ops.py create mode 100644 python/cudf/cudf/tests/test_unaops.py create mode 100644 python/cudf/cudf/tests/text/__init__.py create mode 100644 python/cudf/cudf/tests/text/test_subword_tokenizer.py create mode 100644 python/cudf/cudf/tests/text/test_text_methods.py create mode 100644 python/cudf/cudf/tests/window/__init__.py create mode 100644 python/cudf/cudf/tests/window/test_rolling.py create mode 100644 python/cudf/cudf/utils/__init__.py create mode 100644 python/cudf/cudf/utils/_numba.py create mode 100644 python/cudf/cudf/utils/_ptxcompiler.py create mode 100644 python/cudf/cudf/utils/applyutils.py create mode 100755 python/cudf/cudf/utils/cudautils.py create mode 100644 python/cudf/cudf/utils/docutils.py create mode 100644 python/cudf/cudf/utils/dtypes.py create mode 100644 python/cudf/cudf/utils/gpu_utils.py create mode 100644 python/cudf/cudf/utils/hash_vocab_utils.py create mode 100644 python/cudf/cudf/utils/ioutils.py create mode 100644 python/cudf/cudf/utils/metadata/__init__.py create mode 100644 python/cudf/cudf/utils/metadata/orc_column_statistics.proto create mode 100644 python/cudf/cudf/utils/queryutils.py create mode 100644 python/cudf/cudf/utils/string.py create mode 100644 python/cudf/cudf/utils/utils.py create mode 100644 python/cudf/cudf_pandas_tests/_magics_cpu_test.py create mode 100755 python/cudf/cudf_pandas_tests/_magics_gpu_test.py create mode 100644 python/cudf/cudf_pandas_tests/test_array_function.py create mode 100644 python/cudf/cudf_pandas_tests/test_cudf_pandas.py create mode 100644 python/cudf/cudf_pandas_tests/test_cudf_pandas_cudf_interop.py create mode 100644 python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py create mode 100644 python/cudf/cudf_pandas_tests/test_magics.py create mode 100644 python/cudf/cudf_pandas_tests/test_profiler.py create mode 100644 python/cudf/pyproject.toml create mode 100644 python/cudf/setup.py create mode 100644 python/cudf/udf_cpp/CMakeLists.txt create mode 100644 python/cudf/udf_cpp/shim.cu create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/case.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/char_types.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/numeric.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/pad.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/replace.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/search.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/split.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/starts_with.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/strip.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_apis.hpp create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_string.cuh create mode 100644 python/cudf/udf_cpp/strings/include/cudf/strings/udf/udf_string.hpp create mode 100644 python/cudf/udf_cpp/strings/src/strings/udf/udf_apis.cu create mode 100644 python/cudf_kafka/cudf_kafka/__init__.py create mode 100644 python/cudf_kafka/cudf_kafka/_lib/__init__.pxd create mode 100644 python/cudf_kafka/cudf_kafka/_lib/kafka.pxd create mode 100644 python/cudf_kafka/cudf_kafka/_lib/kafka.pyx create mode 100644 python/cudf_kafka/pyproject.toml create mode 100644 python/cudf_kafka/setup.py create mode 100644 python/custreamz/.coveragerc create mode 120000 python/custreamz/LICENSE create mode 100644 python/custreamz/README.md create mode 100644 python/custreamz/custreamz/__init__.py create mode 100644 python/custreamz/custreamz/kafka.py create mode 100644 python/custreamz/custreamz/tests/__init__.py create mode 100644 python/custreamz/custreamz/tests/conftest.py create mode 100644 python/custreamz/custreamz/tests/test_dataframes.py create mode 100644 python/custreamz/custreamz/tests/test_kafka.py create mode 100644 python/custreamz/pyproject.toml create mode 100644 python/custreamz/setup.py create mode 100644 python/dask_cudf/.coveragerc create mode 120000 python/dask_cudf/LICENSE create mode 120000 python/dask_cudf/README.md create mode 100644 python/dask_cudf/dask_cudf/DASK_LICENSE.txt create mode 100644 python/dask_cudf/dask_cudf/__init__.py create mode 100644 python/dask_cudf/dask_cudf/accessors.py create mode 100644 python/dask_cudf/dask_cudf/backends.py create mode 100644 python/dask_cudf/dask_cudf/core.py create mode 100644 python/dask_cudf/dask_cudf/groupby.py create mode 100644 python/dask_cudf/dask_cudf/io/__init__.py create mode 100644 python/dask_cudf/dask_cudf/io/csv.py create mode 100644 python/dask_cudf/dask_cudf/io/json.py create mode 100644 python/dask_cudf/dask_cudf/io/orc.py create mode 100644 python/dask_cudf/dask_cudf/io/parquet.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/__init__.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/data/orc/sample.orc create mode 100644 python/dask_cudf/dask_cudf/io/tests/data/text/sample.pgn create mode 100644 python/dask_cudf/dask_cudf/io/tests/test_csv.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/test_json.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/test_orc.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/test_parquet.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/test_s3.py create mode 100644 python/dask_cudf/dask_cudf/io/tests/test_text.py create mode 100644 python/dask_cudf/dask_cudf/io/text.py create mode 100644 python/dask_cudf/dask_cudf/sorting.py create mode 100644 python/dask_cudf/dask_cudf/tests/__init__.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_accessor.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_applymap.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_binops.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_core.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_delayed_io.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_dispatch.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_distributed.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_groupby.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_join.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_onehot.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_reductions.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_sort.py create mode 100644 python/dask_cudf/dask_cudf/tests/test_struct.py create mode 100644 python/dask_cudf/dask_cudf/tests/utils.py create mode 100644 python/dask_cudf/pyproject.toml create mode 100644 python/dask_cudf/setup.py diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..26b9a5b --- /dev/null +++ b/.clang-format @@ -0,0 +1,155 @@ +--- +# Refer to the following link for the explanation of each params: +# http://releases.llvm.org/8.0.0/tools/clang/docs/ClangFormatStyleOptions.html +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: true +AlignConsecutiveBitFields: true +AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: true +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLambdasOnASingleLine: true +AllowShortLoopsOnASingleLine: false +# This is deprecated +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + # disabling the below splits, else, they'll just add to the vertical length of source files! + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +BreakAfterJavaFieldAnnotations: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: WebKit +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +# Kept the below 2 to be the same as `IndentWidth` to keep everything uniform +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + CanonicalDelimiter: '' + BasedOnStyle: google +# Enabling comment reflow causes doxygen comments to be messed up in their formats! +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: c++17 +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +# Be consistent with indent-width, even for people who use tab for indentation! +TabWidth: 2 +UseTab: Never diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..9578d32 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,22 @@ +#cpp code owners +cpp/ @rapidsai/cudf-cpp-codeowners +python/cudf/udf_cpp/ @rapidsai/cudf-cpp-codeowners + +#python code owners +python/ @rapidsai/cudf-python-codeowners +notebooks/ @rapidsai/cudf-python-codeowners +python/dask_cudf/ @rapidsai/cudf-dask-codeowners + +#cmake code owners +cpp/CMakeLists.txt @rapidsai/cudf-cmake-codeowners +cpp/libcudf_kafka/CMakeLists.txt @rapidsai/cudf-cmake-codeowners +**/cmake/ @rapidsai/cudf-cmake-codeowners + +#java code owners +java/ @rapidsai/cudf-java-codeowners + +#build/ops code owners +.github/ @rapidsai/ops-codeowners +/ci/ @rapidsai/ops-codeowners +conda/ @rapidsai/ops-codeowners +dependencies.yaml @rapidsai/ops-codeowners diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..bab613a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a bug report to help us improve cuDF +title: "[BUG]" +labels: "? - Needs Triage, bug" +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Steps/Code to reproduce bug** +Follow this guide http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports to craft a minimal bug report. This helps us reproduce the issue you're having and resolve the issue more quickly. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Environment overview (please complete the following information)** + - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)] + - Method of cuDF install: [conda, Docker, or from source] + - If method of install is [Docker], provide `docker pull` & `docker run` commands used + +**Environment details** +Please run and paste the output of the `cudf/print_env.sh` script here, to gather any other relevant environment details + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/documentation-request.md b/.github/ISSUE_TEMPLATE/documentation-request.md new file mode 100644 index 0000000..89a026f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation-request.md @@ -0,0 +1,35 @@ +--- +name: Documentation request +about: Report incorrect or needed documentation +title: "[DOC]" +labels: "? - Needs Triage, doc" +assignees: '' + +--- + +## Report incorrect documentation + +**Location of incorrect documentation** +Provide links and line numbers if applicable. + +**Describe the problems or issues found in the documentation** +A clear and concise description of what you found to be incorrect. + +**Steps taken to verify documentation is incorrect** +List any steps you have taken: + +**Suggested fix for documentation** +Detail proposed changes to fix the documentation if you have any. + +--- + +## Report needed documentation + +**Report needed documentation** +A clear and concise description of what documentation you believe it is needed and why. + +**Describe the documentation you'd like** +A clear and concise description of what you want to happen. + +**Steps taken to search for needed documentation** +List any steps you have taken: diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..b5bf48e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for cuDF +title: "[FEA]" +labels: "? - Needs Triage, feature request" +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I wish I could use cuDF to do [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context, code examples, or references to existing implementations about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/pandas_function_request.md b/.github/ISSUE_TEMPLATE/pandas_function_request.md new file mode 100644 index 0000000..1cecca7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/pandas_function_request.md @@ -0,0 +1,22 @@ +--- +name: Request a Missing Pandas Function +about: Request GPU support for a function executed on the CPU in pandas accelerator mode. +title: "[FEA]" +labels: "? - Needs Triage, feature request" +assignees: '' + +--- + +This issue template is intended to be used primarily for requests related to pandas accelerator mode. If you'd like to file a general cuDF feature request, please [click here](https://github.com/rapidsai/cudf/issues/new?assignees=&labels=%3F+-+Needs+Triage%2C+feature+request&projects=&template=feature_request.md&title=%5BFEA%5D). + + +**Missing Pandas Feature Request** +A clear and concise summary of the pandas function(s) you'd like to be able run with cuDF. + + +**Profiler Output** +If you used the profiler in pandas accelerator mode, please provide the full output of your profiling report. + + +**Additional context** +Add any other context, code examples, or references to existing implementations about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/submit-question.md b/.github/ISSUE_TEMPLATE/submit-question.md new file mode 100644 index 0000000..ca1bfeb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/submit-question.md @@ -0,0 +1,10 @@ +--- +name: Submit question +about: Ask a general question about cuDF +title: "[QST]" +labels: "? - Needs Triage, question" +assignees: '' + +--- + +**What is your question?** diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..301037c --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,9 @@ +## Description + + + + +## Checklist +- [ ] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). +- [ ] New or existing tests cover these changes. +- [ ] The documentation is up to date with these changes. diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml new file mode 100644 index 0000000..895ba83 --- /dev/null +++ b/.github/copy-pr-bot.yaml @@ -0,0 +1,4 @@ +# Configuration file for `copy-pr-bot` GitHub App +# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ + +enabled: true diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000..b0b0db9 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,21 @@ +# Documentation for config - https://github.com/actions/labeler#common-examples + +cuDF (Python): + - 'python/**' + - 'notebooks/**' + +libcudf: + - 'cpp/**' + +CMake: + - '**/CMakeLists.txt' + - '**/cmake/**' + +cuDF (Java): + - 'java/**' + +ci: + - 'ci/**' + +conda: + - 'conda/**' diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml new file mode 100644 index 0000000..d2ca789 --- /dev/null +++ b/.github/ops-bot.yaml @@ -0,0 +1,7 @@ +# This file controls which features from the `ops-bot` repository below are enabled. +# - https://github.com/rapidsai/ops-bot + +auto_merger: true +branch_checker: true +label_checker: true +release_drafter: true diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..666d884 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,109 @@ +name: build + +on: + push: + branches: + - "branch-*" + tags: + - v[0-9][0-9].[0-9][0-9].[0-9][0-9] + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + build_type: + type: string + default: nightly + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + cpp-build: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + python-build: + needs: [cpp-build] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + upload-conda: + needs: [cpp-build, python-build] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + skip_upload_pkgs: libcudf-example + docs-build: + if: github.ref_type == 'branch' + needs: python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + arch: "amd64" + branch: ${{ inputs.branch }} + build_type: ${{ inputs.build_type || 'branch' }} + container_image: "rapidsai/ci-conda:latest" + date: ${{ inputs.date }} + node_type: "gpu-v100-latest-1" + run_script: "ci/build_docs.sh" + sha: ${{ inputs.sha }} + wheel-build-cudf: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_cudf.sh + wheel-publish-cudf: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cudf + wheel-build-dask-cudf: + needs: wheel-publish-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_dask_cudf.sh + wheel-publish-dask-cudf: + needs: wheel-build-dask-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: dask_cudf diff --git a/.github/workflows/jni-docker-build.yml b/.github/workflows/jni-docker-build.yml new file mode 100644 index 0000000..0bdc409 --- /dev/null +++ b/.github/workflows/jni-docker-build.yml @@ -0,0 +1,53 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: JNI Docker Build + +on: + workflow_dispatch: # manual trigger only + +concurrency: + group: jni-docker-build-${{ github.ref }} + cancel-in-progress: true + +jobs: + docker-build: + if: github.repository == 'rapidsai/cudf' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }} + password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }} + + - name: Set ENVs + run: | + echo "IMAGE_NAME=rapidsai/cudf-jni-build" >> $GITHUB_ENV + echo "IMAGE_REF=${GITHUB_REF_NAME}" >> $GITHUB_ENV + + - name: Build and Push + uses: docker/build-push-action@v3 + with: + push: true + file: java/ci/Dockerfile.centos7 + tags: "${{ env.IMAGE_NAME }}:${{ env.IMAGE_REF }}" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 0000000..23956a0 --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,11 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +jobs: + triage: + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@main + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 0000000..abf5fcf --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,180 @@ +name: pr + +on: + push: + branches: + - "pull-request/[0-9]+" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pr-builder: + needs: + - checks + - conda-cpp-build + - conda-cpp-tests + - conda-python-build + - conda-python-cudf-tests + - conda-python-other-tests + - conda-java-tests + - conda-notebook-tests + - docs-build + - wheel-build-cudf + - wheel-tests-cudf + - wheel-build-dask-cudf + - wheel-tests-dask-cudf + - unit-tests-cudf-pandas + - pandas-tests + #- pandas-tests-diff + #- pandas-tests-diff-comment + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.10 + checks: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.10 + with: + enable_check_generated_files: false + conda-cpp-build: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 + with: + build_type: pull-request + conda-cpp-tests: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + with: + build_type: pull-request + conda-python-build: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + with: + build_type: pull-request + conda-python-cudf-tests: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + with: + build_type: pull-request + test_script: "ci/test_python_cudf.sh" + conda-python-other-tests: + # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + with: + build_type: pull-request + test_script: "ci/test_python_other.sh" + conda-java-tests: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_java.sh" + conda-notebook-tests: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_notebooks.sh" + docs-build: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/build_docs.sh" + wheel-build-cudf: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + with: + build_type: pull-request + script: "ci/build_wheel_cudf.sh" + wheel-tests-cudf: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + build_type: pull-request + script: ci/test_wheel_cudf.sh + wheel-build-dask-cudf: + needs: wheel-tests-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) + build_type: pull-request + script: "ci/build_wheel_dask_cudf.sh" + wheel-tests-dask-cudf: + needs: wheel-build-dask-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) + build_type: pull-request + script: ci/test_wheel_dask_cudf.sh + unit-tests-cudf-pandas: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) + build_type: pull-request + script: ci/cudf_pandas_scripts/run_tests.sh + pandas-tests: + # run the Pandas unit tests using PR branch + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.] + build_type: pull-request + script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr + #pandas-tests-diff: + # # diff the results of running the Pandas unit tests and publish a job summary + # needs: [pandas-tests-main, pandas-tests-pr] + # secrets: inherit + # # This branch exports a `job_output` output that the downstream job reads. + # uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@wence/fea/custom-job-output + # with: + # node_type: cpu4 + # build_type: pull-request + # run_script: ci/cudf_pandas_scripts/pandas-tests/diff.sh + #pandas-tests-diff-comment: + # # Post comment of pass/fail rate on PR + # runs-on: ubuntu-latest + # needs: pandas-tests-diff + # steps: + # - uses: actions/github-script@v6 + # with: + # script: | + # const branch = process.env.GITHUB_REF_NAME; + # const prBranchPattern = new RegExp("^pull-request/[0-9]+$"); + # if (!branch.match(prBranchPattern)) { + # throw new Error(`${branch} does not match PR branch pattern.`); + # } + # const summary_url = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + # const prNumber = branch.split("/")[1]; + # const summary_comment = `${{ needs.pandas-tests-diff.outputs.job_output }}`; + # github.rest.issues.createComment({ + # issue_number: prNumber, + # owner: context.repo.owner, + # repo: context.repo.repo, + # body: `${summary_comment}\n\nHere is [a link to the full test summary](${summary_url}).\n` + # }) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..49a9c73 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,120 @@ +name: test + +on: + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + +jobs: + conda-cpp-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + conda-cpp-memcheck-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_cpp_memcheck.sh" + conda-python-cudf-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + test_script: "ci/test_python_cudf.sh" + conda-python-other-tests: + # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + test_script: "ci/test_python_other.sh" + conda-java-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_java.sh" + conda-notebook-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_notebooks.sh" + wheel-tests-cudf: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_cudf.sh + wheel-tests-dask-cudf: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_dask_cudf.sh + unit-tests-cudf-pandas: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/cudf_pandas_scripts/run_tests.sh + pandas-tests: + # run the Pandas unit tests + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 + with: + matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.] + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + # pr mode uses the HEAD of the branch, which is also correct for nightlies + script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4a4a6a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,175 @@ +## Common +__pycache__ +*.py[cod] +*$py.class +*.a +*.o +*.so +*.dylib +.cache +.vscode +*.swp +*.pytest_cache +DartConfiguration.tcl +.DS_Store +*.manifest +*.spec +.nfs* +.clangd +compile_commands.json + +## Python build directories & artifacts +dask-worker-space/ +dist/ +cudf.egg-info/ +python/build +python/*/build +python/cudf/cudf-coverage.xml +python/cudf/*/_lib/**/*.cpp +python/cudf/*/_lib/**/*.h +python/cudf/*/_lib/.nfs* +python/cudf/*/_cuda/*.cpp +python/cudf/*.ipynb +python/cudf/.ipynb_checkpoints +python/*/record.txt +python/cudf/cudf/core/udf/*.ptx +python/cudf_kafka/*/_lib/**/*.cpp +python/cudf_kafka/*/_lib/**/*.h +python/custreamz/*/_lib/**/*.cpp +python/custreamz/*/_lib/**/*.h +.Python +env/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +_skbuild/ +*.egg-info/ +.installed.cfg +*.egg +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +junit-cudf.xml +*.cover +.hypothesis/ +test-results + +## Patching +*.orig +*.rej + +## C++ build directories & artifacts +CMakeFiles/ +Debug +build/ +cpp/build/ +cpp/include/cudf/ipc_generated/*.h +cpp/thirdparty/googletest/ + +## Eclipse IDE +.project +.cproject +.settings + +## IntelliJ IDE +.idea/ +.idea_modules/ +*.iml +*.ipr +*.iws + +## Doxygen +cpp/doxygen/html + +#Java +target + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +## VSCode IDE +.vscode + +# Dask +dask-worker-space/ + +# protobuf +**/*_pb2.py + +# Sphinx docs & build artifacts +docs/cudf/source/api_docs/generated/* +docs/cudf/source/user_guide/api_docs/api/* +docs/cudf/source/user_guide/example_output/* +docs/cudf/source/user_guide/cudf.*Dtype.*.rst +_html +_text +jupyter_execute + +# cibuildwheel +/wheelhouse diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a621047 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,160 @@ +# Copyright (c) 2019-2022, NVIDIA CORPORATION. + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + exclude: | + (?x)^( + ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* + ) + - id: end-of-file-fixer + exclude: | + (?x)^( + ^python/cudf/cudf/tests/data/subword_tokenizer_data/.* + ) + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + # Use the config file specific to each subproject so that each + # project can specify its own first/third-party packages. + args: ["--config-root=python/", "--resolve-all-configs"] + files: python/.* + types_or: [python, cython, pyi] + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + files: python/.* + # Explicitly specify the pyproject.toml at the repo root, not per-project. + args: ["--config", "pyproject.toml"] + - repo: https://github.com/MarcoGorelli/cython-lint + rev: v0.15.0 + hooks: + - id: cython-lint + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.3.0' + hooks: + - id: mypy + additional_dependencies: [types-cachetools] + args: ["--config-file=pyproject.toml", + "python/cudf/cudf", + "python/custreamz/custreamz", + "python/cudf_kafka/cudf_kafka", + "python/dask_cudf/dask_cudf"] + pass_filenames: false + - repo: https://github.com/PyCQA/pydocstyle + rev: 6.1.1 + hooks: + - id: pydocstyle + # https://github.com/PyCQA/pydocstyle/issues/603 + additional_dependencies: [toml] + args: ["--config=pyproject.toml"] + exclude: | + (?x)^( + ^python/cudf/cudf/pandas/scripts/.*| + ^python/cudf/cudf_pandas_tests/.* + ) + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.6.3 + hooks: + - id: nbqa-isort + # Use the cudf_kafka isort orderings in notebooks so that dask + # and RAPIDS packages have their own sections. + args: ["--settings-file=python/cudf_kafka/pyproject.toml"] + - id: nbqa-black + # Explicitly specify the pyproject.toml at the repo root, not per-project. + args: ["--config=pyproject.toml"] + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v16.0.6 + hooks: + - id: clang-format + types_or: [c, c++, cuda] + args: ["-fallback-style=none", "-style=file", "-i"] + - repo: https://github.com/sirosen/texthooks + rev: 0.4.0 + hooks: + - id: fix-smartquotes + exclude: | + (?x)^( + ^cpp/include/cudf_test/cxxopts.hpp| + ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*| + ^python/cudf/cudf/tests/text/test_text_methods.py + ) + - repo: local + hooks: + - id: no-deprecationwarning + name: no-deprecationwarning + description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)' + entry: '(category=|\s)DeprecationWarning[,)]' + language: pygrep + types_or: [python, cython] + - id: no-programmatic-xfail + name: no-programmatic-xfail + description: 'Enforce that pytest.xfail is not introduced (see dev docs for details)' + entry: 'pytest\.xfail' + language: pygrep + types: [python] + - id: cmake-format + name: cmake-format + entry: ./cpp/scripts/run-cmake-format.sh cmake-format + language: python + types: [cmake] + # Note that pre-commit autoupdate does not update the versions + # of dependencies, so we'll have to update this manually. + additional_dependencies: + - cmakelang==0.6.13 + verbose: true + require_serial: true + - id: cmake-lint + name: cmake-lint + entry: ./cpp/scripts/run-cmake-format.sh cmake-lint + language: python + types: [cmake] + # Note that pre-commit autoupdate does not update the versions + # of dependencies, so we'll have to update this manually. + additional_dependencies: + - cmakelang==0.6.13 + verbose: true + require_serial: true + - id: copyright-check + name: copyright-check + entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year + language: python + pass_filenames: false + additional_dependencies: [gitpython] + - id: doxygen-check + name: doxygen-check + entry: ./ci/checks/doxygen.sh + files: ^cpp/include/ + types_or: [file] + language: system + pass_filenames: false + verbose: true + - repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] + exclude: | + (?x)^( + .*test.*| + ^CHANGELOG.md$ + ) + - repo: https://github.com/rapidsai/dependency-file-generator + rev: v1.5.1 + hooks: + - id: rapids-dependency-file-generator + args: ["--clean"] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.278 + hooks: + - id: ruff + files: python/.*$ + + +default_language_version: + python: python3 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ecd547a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,6900 @@ +# cuDF 23.10.00 (11 Oct 2023) + +## 🚨 Breaking Changes + +- Expose stream parameter in public nvtext ngram APIs ([#14061](https://github.com/rapidsai/cudf/pull/14061)) [@davidwendt](https://github.com/davidwendt) +- Raise `MixedTypeError` when a column of mixed-dtype is being constructed ([#14050](https://github.com/rapidsai/cudf/pull/14050)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `NotImplementedError` for `MultiIndex.to_series` ([#14049](https://github.com/rapidsai/cudf/pull/14049)) [@galipremsagar](https://github.com/galipremsagar) +- Create table_input_metadata from a table_metadata ([#13920](https://github.com/rapidsai/cudf/pull/13920)) [@etseidl](https://github.com/etseidl) +- Enable RLE boolean encoding for v2 Parquet files ([#13886](https://github.com/rapidsai/cudf/pull/13886)) [@etseidl](https://github.com/etseidl) +- Change `NA` to `NaT` for `datetime` and `timedelta` types ([#13868](https://github.com/rapidsai/cudf/pull/13868)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `any`, `all` reduction behavior for `axis=None` and warn for other reductions ([#13831](https://github.com/rapidsai/cudf/pull/13831)) [@galipremsagar](https://github.com/galipremsagar) +- Add minhash support for MurmurHash3_x64_128 ([#13796](https://github.com/rapidsai/cudf/pull/13796)) [@davidwendt](https://github.com/davidwendt) +- Remove the libcudf cudf::offset_type type ([#13788](https://github.com/rapidsai/cudf/pull/13788)) [@davidwendt](https://github.com/davidwendt) +- Raise error when trying to join `datetime` and `timedelta` types with other types ([#13786](https://github.com/rapidsai/cudf/pull/13786)) [@galipremsagar](https://github.com/galipremsagar) +- Update to Cython 3.0.0 ([#13777](https://github.com/rapidsai/cudf/pull/13777)) [@vyasr](https://github.com/vyasr) +- Raise error on constructing an array from mixed type inputs ([#13768](https://github.com/rapidsai/cudf/pull/13768)) [@galipremsagar](https://github.com/galipremsagar) +- Enforce deprecations in `23.10` ([#13732](https://github.com/rapidsai/cudf/pull/13732)) [@galipremsagar](https://github.com/galipremsagar) +- Upgrade to arrow 12 ([#13728](https://github.com/rapidsai/cudf/pull/13728)) [@galipremsagar](https://github.com/galipremsagar) +- Remove Arrow dependency from the `datasource.hpp` public header ([#13698](https://github.com/rapidsai/cudf/pull/13698)) [@vuule](https://github.com/vuule) + +## 🐛 Bug Fixes + +- Fix inaccurate ceil/floor and inaccurate rescaling casts of fixed-point values. ([#14242](https://github.com/rapidsai/cudf/pull/14242)) [@bdice](https://github.com/bdice) +- Fix inaccuracy in decimal128 rounding. ([#14233](https://github.com/rapidsai/cudf/pull/14233)) [@bdice](https://github.com/bdice) +- Workaround for illegal instruction error in sm90 for warp instrinsics with mask ([#14201](https://github.com/rapidsai/cudf/pull/14201)) [@karthikeyann](https://github.com/karthikeyann) +- Fix pytorch related pytest ([#14198](https://github.com/rapidsai/cudf/pull/14198)) [@galipremsagar](https://github.com/galipremsagar) +- Pin to `aws-sdk-cpp<1.11` ([#14173](https://github.com/rapidsai/cudf/pull/14173)) [@pentschev](https://github.com/pentschev) +- Fix assert failure for range window functions ([#14168](https://github.com/rapidsai/cudf/pull/14168)) [@mythrocks](https://github.com/mythrocks) +- Fix Memcheck error found in JSON_TEST JsonReaderTest.ErrorStrings ([#14164](https://github.com/rapidsai/cudf/pull/14164)) [@karthikeyann](https://github.com/karthikeyann) +- Fix calls to copy_bitmask to pass stream parameter ([#14158](https://github.com/rapidsai/cudf/pull/14158)) [@davidwendt](https://github.com/davidwendt) +- Fix DataFrame from Series with different CategoricalIndexes ([#14157](https://github.com/rapidsai/cudf/pull/14157)) [@mroeschke](https://github.com/mroeschke) +- Pin to numpy<1.25 and numba<0.58 to avoid errors and deprecation warnings-as-errors. ([#14156](https://github.com/rapidsai/cudf/pull/14156)) [@bdice](https://github.com/bdice) +- Fix kernel launch error for cudf::io::orc::gpu::rowgroup_char_counts_kernel ([#14139](https://github.com/rapidsai/cudf/pull/14139)) [@davidwendt](https://github.com/davidwendt) +- Don't sort columns for DataFrame init from list of Series ([#14136](https://github.com/rapidsai/cudf/pull/14136)) [@mroeschke](https://github.com/mroeschke) +- Fix DataFrame.values with no columns but index ([#14134](https://github.com/rapidsai/cudf/pull/14134)) [@mroeschke](https://github.com/mroeschke) +- Avoid circular cimports in _lib/cpp/reduce.pxd ([#14125](https://github.com/rapidsai/cudf/pull/14125)) [@vyasr](https://github.com/vyasr) +- Add support for nested dict in `DataFrame` constructor ([#14119](https://github.com/rapidsai/cudf/pull/14119)) [@galipremsagar](https://github.com/galipremsagar) +- Restrict iterables of `DataFrame`'s as input to `DataFrame` constructor ([#14118](https://github.com/rapidsai/cudf/pull/14118)) [@galipremsagar](https://github.com/galipremsagar) +- Allow `numeric_only=True` for reduction operations on numeric types ([#14111](https://github.com/rapidsai/cudf/pull/14111)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve name of the column while initializing a `DataFrame` ([#14110](https://github.com/rapidsai/cudf/pull/14110)) [@galipremsagar](https://github.com/galipremsagar) +- Correct numerous 20054-D: dynamic initialization errors found on arm+12.2 ([#14108](https://github.com/rapidsai/cudf/pull/14108)) [@robertmaynard](https://github.com/robertmaynard) +- Drop `kwargs` from `Series.count` ([#14106](https://github.com/rapidsai/cudf/pull/14106)) [@galipremsagar](https://github.com/galipremsagar) +- Fix naming issues with `Index.to_frame` and `MultiIndex.to_frame` APIs ([#14105](https://github.com/rapidsai/cudf/pull/14105)) [@galipremsagar](https://github.com/galipremsagar) +- Only use memory resources that haven't been freed ([#14103](https://github.com/rapidsai/cudf/pull/14103)) [@robertmaynard](https://github.com/robertmaynard) +- Add support for `__round__` in `Series` and `DataFrame` ([#14099](https://github.com/rapidsai/cudf/pull/14099)) [@galipremsagar](https://github.com/galipremsagar) +- Validate ignore_index type in drop_duplicates ([#14098](https://github.com/rapidsai/cudf/pull/14098)) [@mroeschke](https://github.com/mroeschke) +- Fix renaming `Series` and `Index` ([#14080](https://github.com/rapidsai/cudf/pull/14080)) [@galipremsagar](https://github.com/galipremsagar) +- Raise NotImplementedError in to_datetime if Z (or tz component) in string ([#14074](https://github.com/rapidsai/cudf/pull/14074)) [@mroeschke](https://github.com/mroeschke) +- Raise NotImplementedError for datetime strings with UTC offset ([#14070](https://github.com/rapidsai/cudf/pull/14070)) [@mroeschke](https://github.com/mroeschke) +- Update pyarrow-related dispatch logic in dask_cudf ([#14069](https://github.com/rapidsai/cudf/pull/14069)) [@rjzamora](https://github.com/rjzamora) +- Use `conda mambabuild` rather than `mamba mambabuild` ([#14067](https://github.com/rapidsai/cudf/pull/14067)) [@wence-](https://github.com/wence-) +- Raise NotImplementedError in to_datetime with dayfirst without infer_format ([#14058](https://github.com/rapidsai/cudf/pull/14058)) [@mroeschke](https://github.com/mroeschke) +- Fix various issues in `Index.intersection` ([#14054](https://github.com/rapidsai/cudf/pull/14054)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `Index.difference` to match with pandas ([#14053](https://github.com/rapidsai/cudf/pull/14053)) [@galipremsagar](https://github.com/galipremsagar) +- Fix empty string column construction ([#14052](https://github.com/rapidsai/cudf/pull/14052)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `IntervalIndex.union` to preserve type-metadata ([#14051](https://github.com/rapidsai/cudf/pull/14051)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `MixedTypeError` when a column of mixed-dtype is being constructed ([#14050](https://github.com/rapidsai/cudf/pull/14050)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `NotImplementedError` for `MultiIndex.to_series` ([#14049](https://github.com/rapidsai/cudf/pull/14049)) [@galipremsagar](https://github.com/galipremsagar) +- Ignore compile_commands.json ([#14048](https://github.com/rapidsai/cudf/pull/14048)) [@harrism](https://github.com/harrism) +- Raise TypeError for any non-parseable argument in to_datetime ([#14044](https://github.com/rapidsai/cudf/pull/14044)) [@mroeschke](https://github.com/mroeschke) +- Raise NotImplementedError for to_datetime with z format ([#14037](https://github.com/rapidsai/cudf/pull/14037)) [@mroeschke](https://github.com/mroeschke) +- Implement `sort_remaining` for `sort_index` ([#14033](https://github.com/rapidsai/cudf/pull/14033)) [@wence-](https://github.com/wence-) +- Raise NotImplementedError for Categoricals with timezones ([#14032](https://github.com/rapidsai/cudf/pull/14032)) [@mroeschke](https://github.com/mroeschke) +- Temporary fix Parquet metadata with empty value string being ignored from writing ([#14026](https://github.com/rapidsai/cudf/pull/14026)) [@ttnghia](https://github.com/ttnghia) +- Preserve types of scalar being returned when possible in `quantile` ([#14014](https://github.com/rapidsai/cudf/pull/14014)) [@galipremsagar](https://github.com/galipremsagar) +- Fix return type of `MultiIndex.difference` ([#14009](https://github.com/rapidsai/cudf/pull/14009)) [@galipremsagar](https://github.com/galipremsagar) +- Raise an error when timezone subtypes are encountered in `pd.IntervalDtype` ([#14006](https://github.com/rapidsai/cudf/pull/14006)) [@galipremsagar](https://github.com/galipremsagar) +- Fix map column can not be non-nullable for java ([#14003](https://github.com/rapidsai/cudf/pull/14003)) [@res-life](https://github.com/res-life) +- Fix `name` selection in `Index.difference` and `Index.intersection` ([#13986](https://github.com/rapidsai/cudf/pull/13986)) [@galipremsagar](https://github.com/galipremsagar) +- Restore column type metadata with `dropna` to fix `factorize` API ([#13980](https://github.com/rapidsai/cudf/pull/13980)) [@galipremsagar](https://github.com/galipremsagar) +- Use thread_index_type to avoid out of bounds accesses in conditional joins ([#13971](https://github.com/rapidsai/cudf/pull/13971)) [@vyasr](https://github.com/vyasr) +- Fix `MultiIndex.to_numpy` to return numpy array with tuples ([#13966](https://github.com/rapidsai/cudf/pull/13966)) [@galipremsagar](https://github.com/galipremsagar) +- Use cudf::thread_index_type in get_json_object and tdigest kernels ([#13962](https://github.com/rapidsai/cudf/pull/13962)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix an issue with `IntervalIndex.repr` when null values are present ([#13958](https://github.com/rapidsai/cudf/pull/13958)) [@galipremsagar](https://github.com/galipremsagar) +- Fix type metadata issue preservation with `Column.unique` ([#13957](https://github.com/rapidsai/cudf/pull/13957)) [@galipremsagar](https://github.com/galipremsagar) +- Handle `Interval` scalars when passed in list-like inputs to `cudf.Index` ([#13956](https://github.com/rapidsai/cudf/pull/13956)) [@galipremsagar](https://github.com/galipremsagar) +- Fix setting of categories order when `dtype` is passed to a `CategoricalColumn` ([#13955](https://github.com/rapidsai/cudf/pull/13955)) [@galipremsagar](https://github.com/galipremsagar) +- Handle `as_index` in `GroupBy.apply` ([#13951](https://github.com/rapidsai/cudf/pull/13951)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Raise error for string types in `nsmallest` and `nlargest` ([#13946](https://github.com/rapidsai/cudf/pull/13946)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `index` of `Groupby.apply` results when it is performed on empty objects ([#13944](https://github.com/rapidsai/cudf/pull/13944)) [@galipremsagar](https://github.com/galipremsagar) +- Fix integer overflow in shim `device_sum` functions ([#13943](https://github.com/rapidsai/cudf/pull/13943)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix type mismatch in groupby reduction for empty objects ([#13942](https://github.com/rapidsai/cudf/pull/13942)) [@galipremsagar](https://github.com/galipremsagar) +- Fixed processed bytes calculation in APPLY_BOOLEAN_MASK benchmark. ([#13937](https://github.com/rapidsai/cudf/pull/13937)) [@Blonck](https://github.com/Blonck) +- Fix construction of `Grouping` objects ([#13932](https://github.com/rapidsai/cudf/pull/13932)) [@galipremsagar](https://github.com/galipremsagar) +- Fix an issue with `loc` when column names is `MultiIndex` ([#13929](https://github.com/rapidsai/cudf/pull/13929)) [@galipremsagar](https://github.com/galipremsagar) +- Fix handling of typecasting in `searchsorted` ([#13925](https://github.com/rapidsai/cudf/pull/13925)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve index `name` in `reindex` ([#13917](https://github.com/rapidsai/cudf/pull/13917)) [@galipremsagar](https://github.com/galipremsagar) +- Use `cudf::thread_index_type` in cuIO to prevent overflow in row indexing ([#13910](https://github.com/rapidsai/cudf/pull/13910)) [@vuule](https://github.com/vuule) +- Fix for encodings listed in the Parquet column chunk metadata ([#13907](https://github.com/rapidsai/cudf/pull/13907)) [@etseidl](https://github.com/etseidl) +- Use cudf::thread_index_type in concatenate.cu. ([#13906](https://github.com/rapidsai/cudf/pull/13906)) [@bdice](https://github.com/bdice) +- Use cudf::thread_index_type in replace.cu. ([#13905](https://github.com/rapidsai/cudf/pull/13905)) [@bdice](https://github.com/bdice) +- Add noSanitizer tag to Java reduction tests failing with sanitizer in CUDA 12 ([#13904](https://github.com/rapidsai/cudf/pull/13904)) [@jlowe](https://github.com/jlowe) +- Remove the internal use of the cudf's default stream in cuIO ([#13903](https://github.com/rapidsai/cudf/pull/13903)) [@vuule](https://github.com/vuule) +- Use cuda-nvtx-dev CUDA 12 package. ([#13901](https://github.com/rapidsai/cudf/pull/13901)) [@bdice](https://github.com/bdice) +- Use `thread_index_type` to avoid index overflow in grid-stride loops ([#13895](https://github.com/rapidsai/cudf/pull/13895)) [@PointKernel](https://github.com/PointKernel) +- Fix memory access error in cudf::shift for sliced strings ([#13894](https://github.com/rapidsai/cudf/pull/13894)) [@davidwendt](https://github.com/davidwendt) +- Raise error when trying to construct a `DataFrame` with mixed types ([#13889](https://github.com/rapidsai/cudf/pull/13889)) [@galipremsagar](https://github.com/galipremsagar) +- Return `nan` when one variable to be correlated has zero variance in JIT GroupBy Apply ([#13884](https://github.com/rapidsai/cudf/pull/13884)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Correctly detect the BOM mark in `read_csv` with compressed input ([#13881](https://github.com/rapidsai/cudf/pull/13881)) [@vuule](https://github.com/vuule) +- Check for the presence of all values in `MultiIndex.isin` ([#13879](https://github.com/rapidsai/cudf/pull/13879)) [@galipremsagar](https://github.com/galipremsagar) +- Fix nvtext::generate_character_ngrams performance regression for longer strings ([#13874](https://github.com/rapidsai/cudf/pull/13874)) [@davidwendt](https://github.com/davidwendt) +- Fix return type of `MultiIndex.levels` ([#13870](https://github.com/rapidsai/cudf/pull/13870)) [@galipremsagar](https://github.com/galipremsagar) +- Fix List's missing children metadata in JSON writer ([#13869](https://github.com/rapidsai/cudf/pull/13869)) [@karthikeyann](https://github.com/karthikeyann) +- Disable construction of Index when `freq` is set in pandas-compatibility mode ([#13857](https://github.com/rapidsai/cudf/pull/13857)) [@galipremsagar](https://github.com/galipremsagar) +- Fix an issue with fetching `NA` from a `TimedeltaColumn` ([#13853](https://github.com/rapidsai/cudf/pull/13853)) [@galipremsagar](https://github.com/galipremsagar) +- Simplify implementation of interval_range() and fix behaviour for floating `freq` ([#13844](https://github.com/rapidsai/cudf/pull/13844)) [@shwina](https://github.com/shwina) +- Fix binary operations between `Series` and `Index` ([#13842](https://github.com/rapidsai/cudf/pull/13842)) [@galipremsagar](https://github.com/galipremsagar) +- Update make_lists_column_from_scalar to use make_offsets_child_column utility ([#13841](https://github.com/rapidsai/cudf/pull/13841)) [@davidwendt](https://github.com/davidwendt) +- Fix read out of bounds in string concatenate ([#13838](https://github.com/rapidsai/cudf/pull/13838)) [@pentschev](https://github.com/pentschev) +- Raise error for more cases when `timezone-aware` data is passed to `as_column` ([#13835](https://github.com/rapidsai/cudf/pull/13835)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `any`, `all` reduction behavior for `axis=None` and warn for other reductions ([#13831](https://github.com/rapidsai/cudf/pull/13831)) [@galipremsagar](https://github.com/galipremsagar) +- Raise error when trying to construct time-zone aware timestamps ([#13830](https://github.com/rapidsai/cudf/pull/13830)) [@galipremsagar](https://github.com/galipremsagar) +- Fix cuFile I/O factories ([#13829](https://github.com/rapidsai/cudf/pull/13829)) [@vuule](https://github.com/vuule) +- DataFrame with namedtuples uses ._field as column names ([#13824](https://github.com/rapidsai/cudf/pull/13824)) [@mroeschke](https://github.com/mroeschke) +- Branch 23.10 merge 23.08 ([#13822](https://github.com/rapidsai/cudf/pull/13822)) [@vyasr](https://github.com/vyasr) +- Return a Series from JIT GroupBy apply, rather than a DataFrame ([#13820](https://github.com/rapidsai/cudf/pull/13820)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- No need to dlsym EnsureS3Finalized we can call it directly ([#13819](https://github.com/rapidsai/cudf/pull/13819)) [@robertmaynard](https://github.com/robertmaynard) +- Raise error when mixed types are being constructed ([#13816](https://github.com/rapidsai/cudf/pull/13816)) [@galipremsagar](https://github.com/galipremsagar) +- Fix unbounded sequence issue in `DataFrame` constructor ([#13811](https://github.com/rapidsai/cudf/pull/13811)) [@galipremsagar](https://github.com/galipremsagar) +- Fix Byte-Pair-Encoding usage of cuco static-map for storing merge-pairs ([#13807](https://github.com/rapidsai/cudf/pull/13807)) [@davidwendt](https://github.com/davidwendt) +- Fix for Parquet writer when requested pages per row is smaller than fragment size ([#13806](https://github.com/rapidsai/cudf/pull/13806)) [@etseidl](https://github.com/etseidl) +- Remove hangs from trying to construct un-bounded sequences ([#13799](https://github.com/rapidsai/cudf/pull/13799)) [@galipremsagar](https://github.com/galipremsagar) +- Bug/update libcudf to handle arrow12 changes ([#13794](https://github.com/rapidsai/cudf/pull/13794)) [@robertmaynard](https://github.com/robertmaynard) +- Update get_arrow to arrows 12 CMake target name of arrow::xsimd ([#13790](https://github.com/rapidsai/cudf/pull/13790)) [@robertmaynard](https://github.com/robertmaynard) +- Raise error when trying to join `datetime` and `timedelta` types with other types ([#13786](https://github.com/rapidsai/cudf/pull/13786)) [@galipremsagar](https://github.com/galipremsagar) +- Fix negative unary operation for boolean type ([#13780](https://github.com/rapidsai/cudf/pull/13780)) [@galipremsagar](https://github.com/galipremsagar) +- Fix contains(`in`) method for `Series` ([#13779](https://github.com/rapidsai/cudf/pull/13779)) [@galipremsagar](https://github.com/galipremsagar) +- Fix binary operation column ordering and missing column issues ([#13778](https://github.com/rapidsai/cudf/pull/13778)) [@galipremsagar](https://github.com/galipremsagar) +- Cast only time of day to nanos to avoid an overflow in Parquet INT96 write ([#13776](https://github.com/rapidsai/cudf/pull/13776)) [@gerashegalov](https://github.com/gerashegalov) +- Preserve names of column object in various APIs ([#13772](https://github.com/rapidsai/cudf/pull/13772)) [@galipremsagar](https://github.com/galipremsagar) +- Raise error on constructing an array from mixed type inputs ([#13768](https://github.com/rapidsai/cudf/pull/13768)) [@galipremsagar](https://github.com/galipremsagar) +- Fix construction of DataFrames from dict when columns are provided ([#13766](https://github.com/rapidsai/cudf/pull/13766)) [@wence-](https://github.com/wence-) +- Provide our own Cython declaration for make_unique ([#13746](https://github.com/rapidsai/cudf/pull/13746)) [@wence-](https://github.com/wence-) + +## 📖 Documentation + +- Fix typo in docstring: metadata. ([#14025](https://github.com/rapidsai/cudf/pull/14025)) [@bdice](https://github.com/bdice) +- Fix typo in parquet/page_decode.cuh ([#13849](https://github.com/rapidsai/cudf/pull/13849)) [@XinyuZeng](https://github.com/XinyuZeng) +- Simplify Python doc configuration ([#13826](https://github.com/rapidsai/cudf/pull/13826)) [@vyasr](https://github.com/vyasr) +- Update documentation to reflect recent changes in JSON reader and writer ([#13791](https://github.com/rapidsai/cudf/pull/13791)) [@vuule](https://github.com/vuule) +- Fix all warnings in Python docs ([#13789](https://github.com/rapidsai/cudf/pull/13789)) [@vyasr](https://github.com/vyasr) + +## 🚀 New Features + +- [Java] Add JNI bindings for `integers_to_hex` ([#14205](https://github.com/rapidsai/cudf/pull/14205)) [@razajafri](https://github.com/razajafri) +- Propagate errors from Parquet reader kernels back to host ([#14167](https://github.com/rapidsai/cudf/pull/14167)) [@vuule](https://github.com/vuule) +- JNI for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations ([#14154](https://github.com/rapidsai/cudf/pull/14154)) [@ttnghia](https://github.com/ttnghia) +- Expose streams in all public sorting APIs ([#14146](https://github.com/rapidsai/cudf/pull/14146)) [@vyasr](https://github.com/vyasr) +- Enable direct ingestion and production of Arrow scalars ([#14121](https://github.com/rapidsai/cudf/pull/14121)) [@vyasr](https://github.com/vyasr) +- Implement `GroupBy.value_counts` to match pandas API ([#14114](https://github.com/rapidsai/cudf/pull/14114)) [@stmio](https://github.com/stmio) +- Refactor parquet thrift reader ([#14097](https://github.com/rapidsai/cudf/pull/14097)) [@etseidl](https://github.com/etseidl) +- Refactor `hash_reduce_by_row` ([#14095](https://github.com/rapidsai/cudf/pull/14095)) [@ttnghia](https://github.com/ttnghia) +- Support negative preceding/following for ROW window functions ([#14093](https://github.com/rapidsai/cudf/pull/14093)) [@mythrocks](https://github.com/mythrocks) +- Support for progressive parquet chunked reading. ([#14079](https://github.com/rapidsai/cudf/pull/14079)) [@nvdbaranec](https://github.com/nvdbaranec) +- Implement `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations ([#14045](https://github.com/rapidsai/cudf/pull/14045)) [@ttnghia](https://github.com/ttnghia) +- Expose streams in public search APIs ([#14034](https://github.com/rapidsai/cudf/pull/14034)) [@vyasr](https://github.com/vyasr) +- Expose streams in public replace APIs ([#14010](https://github.com/rapidsai/cudf/pull/14010)) [@vyasr](https://github.com/vyasr) +- Add stream parameter to public cudf::strings::split APIs ([#13997](https://github.com/rapidsai/cudf/pull/13997)) [@davidwendt](https://github.com/davidwendt) +- Expose streams in public filling APIs ([#13990](https://github.com/rapidsai/cudf/pull/13990)) [@vyasr](https://github.com/vyasr) +- Expose streams in public concatenate APIs ([#13987](https://github.com/rapidsai/cudf/pull/13987)) [@vyasr](https://github.com/vyasr) +- Use HostMemoryAllocator in jni::allocate_host_buffer ([#13975](https://github.com/rapidsai/cudf/pull/13975)) [@gerashegalov](https://github.com/gerashegalov) +- Enable fractional null probability for hashing benchmark ([#13967](https://github.com/rapidsai/cudf/pull/13967)) [@Blonck](https://github.com/Blonck) +- Switch pylibcudf-enabled types to use enum class in Cython ([#13931](https://github.com/rapidsai/cudf/pull/13931)) [@vyasr](https://github.com/vyasr) +- Add nvtext::tokenize_with_vocabulary API ([#13930](https://github.com/rapidsai/cudf/pull/13930)) [@davidwendt](https://github.com/davidwendt) +- Rewrite `DataFrame.stack` to support multi level column names ([#13927](https://github.com/rapidsai/cudf/pull/13927)) [@isVoid](https://github.com/isVoid) +- Add HostMemoryAllocator interface ([#13924](https://github.com/rapidsai/cudf/pull/13924)) [@gerashegalov](https://github.com/gerashegalov) +- Global stream pool ([#13922](https://github.com/rapidsai/cudf/pull/13922)) [@etseidl](https://github.com/etseidl) +- Create table_input_metadata from a table_metadata ([#13920](https://github.com/rapidsai/cudf/pull/13920)) [@etseidl](https://github.com/etseidl) +- Translate column size overflow exception to JNI ([#13911](https://github.com/rapidsai/cudf/pull/13911)) [@mythrocks](https://github.com/mythrocks) +- Enable RLE boolean encoding for v2 Parquet files ([#13886](https://github.com/rapidsai/cudf/pull/13886)) [@etseidl](https://github.com/etseidl) +- Exclude some tests from running with the compute sanitizer ([#13872](https://github.com/rapidsai/cudf/pull/13872)) [@firestarman](https://github.com/firestarman) +- Expand statistics support in ORC writer ([#13848](https://github.com/rapidsai/cudf/pull/13848)) [@vuule](https://github.com/vuule) +- Register the memory mapped buffer in `datasource` to improve H2D throughput ([#13814](https://github.com/rapidsai/cudf/pull/13814)) [@vuule](https://github.com/vuule) +- Add cudf::strings::find function with target per row ([#13808](https://github.com/rapidsai/cudf/pull/13808)) [@davidwendt](https://github.com/davidwendt) +- Add minhash support for MurmurHash3_x64_128 ([#13796](https://github.com/rapidsai/cudf/pull/13796)) [@davidwendt](https://github.com/davidwendt) +- Remove unnecessary pointer copying in JIT GroupBy Apply ([#13792](https://github.com/rapidsai/cudf/pull/13792)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add 'poll' function to custreamz kafka consumer ([#13782](https://github.com/rapidsai/cudf/pull/13782)) [@jdye64](https://github.com/jdye64) +- Support `corr` in `GroupBy.apply` through the jit engine ([#13767](https://github.com/rapidsai/cudf/pull/13767)) [@shwina](https://github.com/shwina) +- Optionally write version 2 page headers in Parquet writer ([#13751](https://github.com/rapidsai/cudf/pull/13751)) [@etseidl](https://github.com/etseidl) +- Support more numeric types in `Groupby.apply` with `engine='jit'` ([#13729](https://github.com/rapidsai/cudf/pull/13729)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- [FEA] Add DELTA_BINARY_PACKED decoding support to Parquet reader ([#13637](https://github.com/rapidsai/cudf/pull/13637)) [@etseidl](https://github.com/etseidl) +- Read FIXED_LEN_BYTE_ARRAY as binary in parquet reader ([#13437](https://github.com/rapidsai/cudf/pull/13437)) [@PointKernel](https://github.com/PointKernel) + +## 🛠️ Improvements + +- Pin `dask` and `distributed` for `23.10` release ([#14225](https://github.com/rapidsai/cudf/pull/14225)) [@galipremsagar](https://github.com/galipremsagar) +- update rmm tag path ([#14195](https://github.com/rapidsai/cudf/pull/14195)) [@AyodeAwe](https://github.com/AyodeAwe) +- Disable `Recently Updated` Check ([#14193](https://github.com/rapidsai/cudf/pull/14193)) [@ajschmidt8](https://github.com/ajschmidt8) +- Move cpp/src/hash/hash_allocator.cuh to include/cudf/hashing/detail ([#14163](https://github.com/rapidsai/cudf/pull/14163)) [@davidwendt](https://github.com/davidwendt) +- Add Parquet reader benchmarks for row selection ([#14147](https://github.com/rapidsai/cudf/pull/14147)) [@vuule](https://github.com/vuule) +- Update image names ([#14145](https://github.com/rapidsai/cudf/pull/14145)) [@AyodeAwe](https://github.com/AyodeAwe) +- Support callables in DataFrame.assign ([#14142](https://github.com/rapidsai/cudf/pull/14142)) [@wence-](https://github.com/wence-) +- Reduce memory usage of as_categorical_column ([#14138](https://github.com/rapidsai/cudf/pull/14138)) [@wence-](https://github.com/wence-) +- Replace Python scalar conversions with libcudf ([#14124](https://github.com/rapidsai/cudf/pull/14124)) [@vyasr](https://github.com/vyasr) +- Update to clang 16.0.6. ([#14120](https://github.com/rapidsai/cudf/pull/14120)) [@bdice](https://github.com/bdice) +- Fix type of empty `Index` and raise warning in `Series` constructor ([#14116](https://github.com/rapidsai/cudf/pull/14116)) [@galipremsagar](https://github.com/galipremsagar) +- Add stream parameter to external dict APIs ([#14115](https://github.com/rapidsai/cudf/pull/14115)) [@SurajAralihalli](https://github.com/SurajAralihalli) +- Add fallback matrix for nvcomp. ([#14082](https://github.com/rapidsai/cudf/pull/14082)) [@bdice](https://github.com/bdice) +- [Java] Add recoverWithNull to JSONOptions and pass to Table.readJSON ([#14078](https://github.com/rapidsai/cudf/pull/14078)) [@andygrove](https://github.com/andygrove) +- Remove header tests ([#14072](https://github.com/rapidsai/cudf/pull/14072)) [@ajschmidt8](https://github.com/ajschmidt8) +- Refactor `contains_table` with cuco::static_set ([#14064](https://github.com/rapidsai/cudf/pull/14064)) [@PointKernel](https://github.com/PointKernel) +- Remove debug print in a Parquet test ([#14063](https://github.com/rapidsai/cudf/pull/14063)) [@vuule](https://github.com/vuule) +- Expose stream parameter in public nvtext ngram APIs ([#14061](https://github.com/rapidsai/cudf/pull/14061)) [@davidwendt](https://github.com/davidwendt) +- Expose stream parameter in public strings find APIs ([#14060](https://github.com/rapidsai/cudf/pull/14060)) [@davidwendt](https://github.com/davidwendt) +- Update doxygen to 1.9.1 ([#14059](https://github.com/rapidsai/cudf/pull/14059)) [@vyasr](https://github.com/vyasr) +- Remove the mr from the base fixture ([#14057](https://github.com/rapidsai/cudf/pull/14057)) [@vyasr](https://github.com/vyasr) +- Expose streams in public strings case APIs ([#14056](https://github.com/rapidsai/cudf/pull/14056)) [@davidwendt](https://github.com/davidwendt) +- Refactor libcudf indexalator to typed normalator ([#14043](https://github.com/rapidsai/cudf/pull/14043)) [@davidwendt](https://github.com/davidwendt) +- Use cudf::make_empty_column instead of column_view constructor ([#14030](https://github.com/rapidsai/cudf/pull/14030)) [@davidwendt](https://github.com/davidwendt) +- Remove quadratic runtime due to accessing Frame._dtypes in loop ([#14028](https://github.com/rapidsai/cudf/pull/14028)) [@wence-](https://github.com/wence-) +- Explicitly depend on zlib in conda recipes ([#14018](https://github.com/rapidsai/cudf/pull/14018)) [@wence-](https://github.com/wence-) +- Use grid_stride for stride computations. ([#13996](https://github.com/rapidsai/cudf/pull/13996)) [@bdice](https://github.com/bdice) +- Fix an issue where casting null-array to `object` dtype will result in a failure ([#13994](https://github.com/rapidsai/cudf/pull/13994)) [@galipremsagar](https://github.com/galipremsagar) +- Add tab as literal to cudf::test::to_string output ([#13993](https://github.com/rapidsai/cudf/pull/13993)) [@davidwendt](https://github.com/davidwendt) +- Enable `codes` dtype parity in pandas-compatibility mode for `factorize` API ([#13982](https://github.com/rapidsai/cudf/pull/13982)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `CategoricalIndex` ordering in `Groupby.agg` when pandas-compatibility mode is enabled ([#13978](https://github.com/rapidsai/cudf/pull/13978)) [@galipremsagar](https://github.com/galipremsagar) +- Produce a fatal error if cudf is unable to find pyarrow include directory ([#13976](https://github.com/rapidsai/cudf/pull/13976)) [@cwharris](https://github.com/cwharris) +- Use `thread_index_type` in `partitioning.cu` ([#13973](https://github.com/rapidsai/cudf/pull/13973)) [@divyegala](https://github.com/divyegala) +- Use `cudf::thread_index_type` in `merge.cu` ([#13972](https://github.com/rapidsai/cudf/pull/13972)) [@divyegala](https://github.com/divyegala) +- Use `copy-pr-bot` ([#13970](https://github.com/rapidsai/cudf/pull/13970)) [@ajschmidt8](https://github.com/ajschmidt8) +- Use cudf::thread_index_type in strings custom kernels ([#13968](https://github.com/rapidsai/cudf/pull/13968)) [@davidwendt](https://github.com/davidwendt) +- Add `bytes_per_second` to hash_partition benchmark ([#13965](https://github.com/rapidsai/cudf/pull/13965)) [@Blonck](https://github.com/Blonck) +- Added pinned pool reservation API for java ([#13964](https://github.com/rapidsai/cudf/pull/13964)) [@revans2](https://github.com/revans2) +- Simplify wheel build scripts and allow alphas of RAPIDS dependencies ([#13963](https://github.com/rapidsai/cudf/pull/13963)) [@vyasr](https://github.com/vyasr) +- Add `bytes_per_second` to copy_if_else benchmark ([#13960](https://github.com/rapidsai/cudf/pull/13960)) [@Blonck](https://github.com/Blonck) +- Add pandas compatible output to `Series.unique` ([#13959](https://github.com/rapidsai/cudf/pull/13959)) [@galipremsagar](https://github.com/galipremsagar) +- Add `bytes_per_second` to compiled binaryop benchmark ([#13938](https://github.com/rapidsai/cudf/pull/13938)) [@Blonck](https://github.com/Blonck) +- Unpin `dask` and `distributed` for `23.10` development ([#13935](https://github.com/rapidsai/cudf/pull/13935)) [@galipremsagar](https://github.com/galipremsagar) +- Make HostColumnVector.getRefCount public ([#13934](https://github.com/rapidsai/cudf/pull/13934)) [@abellina](https://github.com/abellina) +- Use cuco::static_set in JSON tree algorithm ([#13928](https://github.com/rapidsai/cudf/pull/13928)) [@karthikeyann](https://github.com/karthikeyann) +- Add java API to get size of host memory needed to copy column view ([#13919](https://github.com/rapidsai/cudf/pull/13919)) [@revans2](https://github.com/revans2) +- Use cudf::size_type instead of int32 where appropriate in nvtext functions ([#13915](https://github.com/rapidsai/cudf/pull/13915)) [@davidwendt](https://github.com/davidwendt) +- Enable hugepage for arrow host allocations ([#13914](https://github.com/rapidsai/cudf/pull/13914)) [@madsbk](https://github.com/madsbk) +- Improve performance of nvtext::edit_distance ([#13912](https://github.com/rapidsai/cudf/pull/13912)) [@davidwendt](https://github.com/davidwendt) +- Ensure cudf internals use pylibcudf in pure Python mode ([#13909](https://github.com/rapidsai/cudf/pull/13909)) [@vyasr](https://github.com/vyasr) +- Use `empty()` instead of `size()` where possible ([#13908](https://github.com/rapidsai/cudf/pull/13908)) [@vuule](https://github.com/vuule) +- [JNI] Adds HostColumnVector.EventHandler for spillability checks ([#13898](https://github.com/rapidsai/cudf/pull/13898)) [@abellina](https://github.com/abellina) +- Return `Timestamp` & `Timedelta` for fetching scalars in `DatetimeIndex` & `TimedeltaIndex` ([#13896](https://github.com/rapidsai/cudf/pull/13896)) [@galipremsagar](https://github.com/galipremsagar) +- Allow explicit `shuffle="p2p"` within dask-cudf API ([#13893](https://github.com/rapidsai/cudf/pull/13893)) [@rjzamora](https://github.com/rjzamora) +- Disable creation of `DatetimeIndex` when `freq` is passed to `cudf.date_range` ([#13890](https://github.com/rapidsai/cudf/pull/13890)) [@galipremsagar](https://github.com/galipremsagar) +- Bring parity with pandas for `datetime` & `timedelta` comparison operations ([#13877](https://github.com/rapidsai/cudf/pull/13877)) [@galipremsagar](https://github.com/galipremsagar) +- Change `NA` to `NaT` for `datetime` and `timedelta` types ([#13868](https://github.com/rapidsai/cudf/pull/13868)) [@galipremsagar](https://github.com/galipremsagar) +- Raise error when `astype(object)` is called in pandas compatibility mode ([#13862](https://github.com/rapidsai/cudf/pull/13862)) [@galipremsagar](https://github.com/galipremsagar) +- Fixes a performance regression in FST ([#13850](https://github.com/rapidsai/cudf/pull/13850)) [@elstehle](https://github.com/elstehle) +- Set native handles to null on close in Java wrapper classes ([#13818](https://github.com/rapidsai/cudf/pull/13818)) [@jlowe](https://github.com/jlowe) +- Avoid use of CUDF_EXPECTS in libcudf unit tests outside of helper functions with return values ([#13812](https://github.com/rapidsai/cudf/pull/13812)) [@vuule](https://github.com/vuule) +- Update `lists::contains` to experimental row comparator ([#13810](https://github.com/rapidsai/cudf/pull/13810)) [@divyegala](https://github.com/divyegala) +- Reduce `lists::contains` dispatches for scalars ([#13805](https://github.com/rapidsai/cudf/pull/13805)) [@divyegala](https://github.com/divyegala) +- Long string optimization for string column parsing in JSON reader ([#13803](https://github.com/rapidsai/cudf/pull/13803)) [@karthikeyann](https://github.com/karthikeyann) +- Raise NotImplementedError for pd.SparseDtype ([#13798](https://github.com/rapidsai/cudf/pull/13798)) [@mroeschke](https://github.com/mroeschke) +- Remove the libcudf cudf::offset_type type ([#13788](https://github.com/rapidsai/cudf/pull/13788)) [@davidwendt](https://github.com/davidwendt) +- Move Spark-indpendent Table debug to cudf Java ([#13783](https://github.com/rapidsai/cudf/pull/13783)) [@gerashegalov](https://github.com/gerashegalov) +- Update to Cython 3.0.0 ([#13777](https://github.com/rapidsai/cudf/pull/13777)) [@vyasr](https://github.com/vyasr) +- Refactor Parquet reader handling of V2 page header info ([#13775](https://github.com/rapidsai/cudf/pull/13775)) [@etseidl](https://github.com/etseidl) +- Branch 23.10 merge 23.08 ([#13773](https://github.com/rapidsai/cudf/pull/13773)) [@vyasr](https://github.com/vyasr) +- Restructure JSON code to correctly reflect legacy/experimental status ([#13757](https://github.com/rapidsai/cudf/pull/13757)) [@vuule](https://github.com/vuule) +- Branch 23.10 merge 23.08 ([#13753](https://github.com/rapidsai/cudf/pull/13753)) [@vyasr](https://github.com/vyasr) +- Enforce deprecations in `23.10` ([#13732](https://github.com/rapidsai/cudf/pull/13732)) [@galipremsagar](https://github.com/galipremsagar) +- Upgrade to arrow 12 ([#13728](https://github.com/rapidsai/cudf/pull/13728)) [@galipremsagar](https://github.com/galipremsagar) +- Refactors JSON reader's pushdown automaton ([#13716](https://github.com/rapidsai/cudf/pull/13716)) [@elstehle](https://github.com/elstehle) +- Remove Arrow dependency from the `datasource.hpp` public header ([#13698](https://github.com/rapidsai/cudf/pull/13698)) [@vuule](https://github.com/vuule) + +# cuDF 23.08.00 (9 Aug 2023) + +## 🚨 Breaking Changes + +- Enforce deprecations and add clarifications around existing deprecations ([#13710](https://github.com/rapidsai/cudf/pull/13710)) [@galipremsagar](https://github.com/galipremsagar) +- Separate MurmurHash32 from hash_functions.cuh ([#13681](https://github.com/rapidsai/cudf/pull/13681)) [@davidwendt](https://github.com/davidwendt) +- Avoid storing metadata in pointers in ORC and Parquet writers ([#13648](https://github.com/rapidsai/cudf/pull/13648)) [@vuule](https://github.com/vuule) +- Expose streams in all public copying APIs ([#13629](https://github.com/rapidsai/cudf/pull/13629)) [@vyasr](https://github.com/vyasr) +- Remove deprecated cudf::strings::slice_strings (by delimiter) functions ([#13628](https://github.com/rapidsai/cudf/pull/13628)) [@davidwendt](https://github.com/davidwendt) +- Remove deprecated cudf.set_allocator. ([#13591](https://github.com/rapidsai/cudf/pull/13591)) [@bdice](https://github.com/bdice) +- Change build.sh to use pip install instead of setup.py ([#13507](https://github.com/rapidsai/cudf/pull/13507)) [@vyasr](https://github.com/vyasr) +- Remove unused max_rows_tensor parameter from subword tokenizer ([#13463](https://github.com/rapidsai/cudf/pull/13463)) [@davidwendt](https://github.com/davidwendt) +- Fix decimal scale reductions in `_get_decimal_type` ([#13224](https://github.com/rapidsai/cudf/pull/13224)) [@charlesbluca](https://github.com/charlesbluca) + +## 🐛 Bug Fixes + +- Add CUDA version to cudf_kafka and libcudf-example build strings. ([#13769](https://github.com/rapidsai/cudf/pull/13769)) [@bdice](https://github.com/bdice) +- Fix typo in wheels-test.yaml. ([#13763](https://github.com/rapidsai/cudf/pull/13763)) [@bdice](https://github.com/bdice) +- Don't test strings shorter than the requested ngram size ([#13758](https://github.com/rapidsai/cudf/pull/13758)) [@vyasr](https://github.com/vyasr) +- Add CUDA version to custreamz build string. ([#13754](https://github.com/rapidsai/cudf/pull/13754)) [@bdice](https://github.com/bdice) +- Fix writing of ORC files with empty child string columns ([#13745](https://github.com/rapidsai/cudf/pull/13745)) [@vuule](https://github.com/vuule) +- Remove the erroneous "empty level" short-circuit from ORC reader ([#13722](https://github.com/rapidsai/cudf/pull/13722)) [@vuule](https://github.com/vuule) +- Fix character counting when writing sliced tables into ORC ([#13721](https://github.com/rapidsai/cudf/pull/13721)) [@vuule](https://github.com/vuule) +- Parquet uses row group row count if missing from header ([#13712](https://github.com/rapidsai/cudf/pull/13712)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix reading of RLE encoded boolean data from parquet files with V2 page headers ([#13707](https://github.com/rapidsai/cudf/pull/13707)) [@etseidl](https://github.com/etseidl) +- Fix a corner case of list lexicographic comparator ([#13701](https://github.com/rapidsai/cudf/pull/13701)) [@ttnghia](https://github.com/ttnghia) +- Fix combined filtering and column projection in `dask_cudf.read_parquet` ([#13697](https://github.com/rapidsai/cudf/pull/13697)) [@rjzamora](https://github.com/rjzamora) +- Revert fetch-rapids changes ([#13696](https://github.com/rapidsai/cudf/pull/13696)) [@vyasr](https://github.com/vyasr) +- Data generator - include offsets in the size estimate of list elments ([#13688](https://github.com/rapidsai/cudf/pull/13688)) [@vuule](https://github.com/vuule) +- Add `cuda-nvcc-impl` to `cudf` for `numba` CUDA 12 ([#13673](https://github.com/rapidsai/cudf/pull/13673)) [@jakirkham](https://github.com/jakirkham) +- Fix combined filtering and column projection in `read_parquet` ([#13666](https://github.com/rapidsai/cudf/pull/13666)) [@rjzamora](https://github.com/rjzamora) +- Use `thrust::identity` as hash functions for byte pair encoding ([#13665](https://github.com/rapidsai/cudf/pull/13665)) [@PointKernel](https://github.com/PointKernel) +- Fix loc-getitem ordering when index contains duplicate labels ([#13659](https://github.com/rapidsai/cudf/pull/13659)) [@wence-](https://github.com/wence-) +- [REVIEW] Introduce parity with pandas for `MultiIndex.loc` ordering & fix a bug in `Groupby` with `as_index` ([#13657](https://github.com/rapidsai/cudf/pull/13657)) [@galipremsagar](https://github.com/galipremsagar) +- Fix memcheck error found in nvtext tokenize functions ([#13649](https://github.com/rapidsai/cudf/pull/13649)) [@davidwendt](https://github.com/davidwendt) +- Fix `has_nonempty_nulls` ignoring column offset ([#13647](https://github.com/rapidsai/cudf/pull/13647)) [@ttnghia](https://github.com/ttnghia) +- [Java] Avoid double-free corruption in case of an Exception while creating a ColumnView ([#13645](https://github.com/rapidsai/cudf/pull/13645)) [@razajafri](https://github.com/razajafri) +- Fix memcheck error in ORC reader call to cudf::io::copy_uncompressed_kernel ([#13643](https://github.com/rapidsai/cudf/pull/13643)) [@davidwendt](https://github.com/davidwendt) +- Fix CUDA 12 conda environment to remove cubinlinker and ptxcompiler. ([#13636](https://github.com/rapidsai/cudf/pull/13636)) [@bdice](https://github.com/bdice) +- Fix inf/NaN comparisons for FLOAT orderby in window functions ([#13635](https://github.com/rapidsai/cudf/pull/13635)) [@mythrocks](https://github.com/mythrocks) +- Refactor `Index` search to simplify code and increase correctness ([#13625](https://github.com/rapidsai/cudf/pull/13625)) [@wence-](https://github.com/wence-) +- Fix compile warning for unused variable in split_re.cu ([#13621](https://github.com/rapidsai/cudf/pull/13621)) [@davidwendt](https://github.com/davidwendt) +- Fix tz_localize for dask_cudf Series ([#13610](https://github.com/rapidsai/cudf/pull/13610)) [@shwina](https://github.com/shwina) +- Fix issue with no decompressed data in ORC reader ([#13609](https://github.com/rapidsai/cudf/pull/13609)) [@vuule](https://github.com/vuule) +- Fix floating point window range extents. ([#13606](https://github.com/rapidsai/cudf/pull/13606)) [@mythrocks](https://github.com/mythrocks) +- Fix `localize(None)` for timezone-naive columns ([#13603](https://github.com/rapidsai/cudf/pull/13603)) [@shwina](https://github.com/shwina) +- Fixed a memory leak caused by Exception thrown while constructing a ColumnView ([#13597](https://github.com/rapidsai/cudf/pull/13597)) [@razajafri](https://github.com/razajafri) +- Handle nullptr return value from bitmask_or in distinct_count ([#13590](https://github.com/rapidsai/cudf/pull/13590)) [@wence-](https://github.com/wence-) +- Bring parity with pandas in Index.join ([#13589](https://github.com/rapidsai/cudf/pull/13589)) [@galipremsagar](https://github.com/galipremsagar) +- Fix cudf.melt when there are more than 255 columns ([#13588](https://github.com/rapidsai/cudf/pull/13588)) [@hcho3](https://github.com/hcho3) +- Fix memory issues in cuIO due to removal of memory padding ([#13586](https://github.com/rapidsai/cudf/pull/13586)) [@ttnghia](https://github.com/ttnghia) +- Fix Parquet multi-file reading ([#13584](https://github.com/rapidsai/cudf/pull/13584)) [@etseidl](https://github.com/etseidl) +- Fix memcheck error found in LISTS_TEST ([#13579](https://github.com/rapidsai/cudf/pull/13579)) [@davidwendt](https://github.com/davidwendt) +- Fix memcheck error found in STRINGS_TEST ([#13578](https://github.com/rapidsai/cudf/pull/13578)) [@davidwendt](https://github.com/davidwendt) +- Fix memcheck error found in INTEROP_TEST ([#13577](https://github.com/rapidsai/cudf/pull/13577)) [@davidwendt](https://github.com/davidwendt) +- Fix memcheck errors found in REDUCTION_TEST ([#13574](https://github.com/rapidsai/cudf/pull/13574)) [@davidwendt](https://github.com/davidwendt) +- Preemptive fix for hive-partitioning change in dask ([#13564](https://github.com/rapidsai/cudf/pull/13564)) [@rjzamora](https://github.com/rjzamora) +- Fix an issue with `dask_cudf.read_csv` when lines are needed to be skipped ([#13555](https://github.com/rapidsai/cudf/pull/13555)) [@galipremsagar](https://github.com/galipremsagar) +- Fix out-of-bounds memory write in cudf::dictionary::detail::concatenate ([#13554](https://github.com/rapidsai/cudf/pull/13554)) [@davidwendt](https://github.com/davidwendt) +- Fix the null mask size in json reader ([#13537](https://github.com/rapidsai/cudf/pull/13537)) [@karthikeyann](https://github.com/karthikeyann) +- Fix cudf::strings::strip for all-empty input column ([#13533](https://github.com/rapidsai/cudf/pull/13533)) [@davidwendt](https://github.com/davidwendt) +- Make sure to build without isolation or installing dependencies ([#13524](https://github.com/rapidsai/cudf/pull/13524)) [@vyasr](https://github.com/vyasr) +- Remove preload lib from CMake for now ([#13519](https://github.com/rapidsai/cudf/pull/13519)) [@vyasr](https://github.com/vyasr) +- Fix missing separator after null values in JSON writer ([#13503](https://github.com/rapidsai/cudf/pull/13503)) [@karthikeyann](https://github.com/karthikeyann) +- Ensure `single_lane_block_sum_reduce` is safe to call in a loop ([#13488](https://github.com/rapidsai/cudf/pull/13488)) [@wence-](https://github.com/wence-) +- Update all versions in pyproject.toml files. ([#13486](https://github.com/rapidsai/cudf/pull/13486)) [@bdice](https://github.com/bdice) +- Remove applying nvbench that doesn't exist in 23.08 ([#13484](https://github.com/rapidsai/cudf/pull/13484)) [@robertmaynard](https://github.com/robertmaynard) +- Fix chunked Parquet reader benchmark ([#13482](https://github.com/rapidsai/cudf/pull/13482)) [@vuule](https://github.com/vuule) +- Update JNI JSON reader column compatability for Spark ([#13477](https://github.com/rapidsai/cudf/pull/13477)) [@revans2](https://github.com/revans2) +- Fix unsanitized output of scan with strings ([#13455](https://github.com/rapidsai/cudf/pull/13455)) [@davidwendt](https://github.com/davidwendt) +- Reject functions without bytecode from `_can_be_jitted` in GroupBy Apply ([#13429](https://github.com/rapidsai/cudf/pull/13429)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix decimal scale reductions in `_get_decimal_type` ([#13224](https://github.com/rapidsai/cudf/pull/13224)) [@charlesbluca](https://github.com/charlesbluca) + +## 📖 Documentation + +- Fix doxygen groups for io data sources and sinks ([#13718](https://github.com/rapidsai/cudf/pull/13718)) [@davidwendt](https://github.com/davidwendt) +- Add pandas compatibility note to DataFrame.query docstring ([#13693](https://github.com/rapidsai/cudf/pull/13693)) [@beckernick](https://github.com/beckernick) +- Add pylibcudf to developer guide ([#13639](https://github.com/rapidsai/cudf/pull/13639)) [@vyasr](https://github.com/vyasr) +- Fix repeated words in doxygen text ([#13598](https://github.com/rapidsai/cudf/pull/13598)) [@karthikeyann](https://github.com/karthikeyann) +- Update docs for top-level API. ([#13592](https://github.com/rapidsai/cudf/pull/13592)) [@bdice](https://github.com/bdice) +- Fix the the doxygen text for cudf::concatenate and other places ([#13561](https://github.com/rapidsai/cudf/pull/13561)) [@davidwendt](https://github.com/davidwendt) +- Document stream validation approach used in testing ([#13556](https://github.com/rapidsai/cudf/pull/13556)) [@vyasr](https://github.com/vyasr) +- Cleanup doc repetitions in libcudf ([#13470](https://github.com/rapidsai/cudf/pull/13470)) [@karthikeyann](https://github.com/karthikeyann) + +## 🚀 New Features + +- Support `min` and `max` aggregations for list type in groupby and reduction ([#13676](https://github.com/rapidsai/cudf/pull/13676)) [@ttnghia](https://github.com/ttnghia) +- Add nvtext::jaccard_index API for strings columns ([#13669](https://github.com/rapidsai/cudf/pull/13669)) [@davidwendt](https://github.com/davidwendt) +- Add read_parquet_metadata libcudf API ([#13663](https://github.com/rapidsai/cudf/pull/13663)) [@karthikeyann](https://github.com/karthikeyann) +- Expose streams in all public copying APIs ([#13629](https://github.com/rapidsai/cudf/pull/13629)) [@vyasr](https://github.com/vyasr) +- Add XXHash_64 hash function to cudf ([#13612](https://github.com/rapidsai/cudf/pull/13612)) [@davidwendt](https://github.com/davidwendt) +- Java support: Floating point order-by columns for RANGE window functions ([#13595](https://github.com/rapidsai/cudf/pull/13595)) [@mythrocks](https://github.com/mythrocks) +- Use `cuco::static_map` to build string dictionaries in ORC writer ([#13580](https://github.com/rapidsai/cudf/pull/13580)) [@vuule](https://github.com/vuule) +- Add pylibcudf subpackage with gather implementation ([#13562](https://github.com/rapidsai/cudf/pull/13562)) [@vyasr](https://github.com/vyasr) +- Add JNI for `lists::concatenate_list_elements` ([#13547](https://github.com/rapidsai/cudf/pull/13547)) [@ttnghia](https://github.com/ttnghia) +- Enable nested types for `lists::concatenate_list_elements` ([#13545](https://github.com/rapidsai/cudf/pull/13545)) [@ttnghia](https://github.com/ttnghia) +- Add unicode encoding for string columns in JSON writer ([#13539](https://github.com/rapidsai/cudf/pull/13539)) [@karthikeyann](https://github.com/karthikeyann) +- Remove numba kernels from `find_index_of_val` ([#13517](https://github.com/rapidsai/cudf/pull/13517)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Floating point order-by columns for RANGE window functions ([#13512](https://github.com/rapidsai/cudf/pull/13512)) [@mythrocks](https://github.com/mythrocks) +- Parse column chunk metadata statistics in parquet reader ([#13472](https://github.com/rapidsai/cudf/pull/13472)) [@karthikeyann](https://github.com/karthikeyann) +- Add `abs` function to apply ([#13408](https://github.com/rapidsai/cudf/pull/13408)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- [FEA] AST filtering in parquet reader ([#13348](https://github.com/rapidsai/cudf/pull/13348)) [@karthikeyann](https://github.com/karthikeyann) +- [FEA] Adds option to recover from invalid JSON lines in JSON tokenizer ([#13344](https://github.com/rapidsai/cudf/pull/13344)) [@elstehle](https://github.com/elstehle) +- Ensure cccl packages don't clash with upstream version ([#13235](https://github.com/rapidsai/cudf/pull/13235)) [@robertmaynard](https://github.com/robertmaynard) +- Update `struct_minmax_util` to experimental row comparator ([#13069](https://github.com/rapidsai/cudf/pull/13069)) [@divyegala](https://github.com/divyegala) +- Add stream parameter to hashing APIs ([#12090](https://github.com/rapidsai/cudf/pull/12090)) [@vyasr](https://github.com/vyasr) + +## 🛠️ Improvements + +- Pin `dask` and `distributed` for `23.08` release ([#13802](https://github.com/rapidsai/cudf/pull/13802)) [@galipremsagar](https://github.com/galipremsagar) +- Relax protobuf pinnings. ([#13770](https://github.com/rapidsai/cudf/pull/13770)) [@bdice](https://github.com/bdice) +- Switch fully unbounded window functions to use aggregations ([#13727](https://github.com/rapidsai/cudf/pull/13727)) [@mythrocks](https://github.com/mythrocks) +- Switch to new wheel building pipeline ([#13723](https://github.com/rapidsai/cudf/pull/13723)) [@vyasr](https://github.com/vyasr) +- Revert CUDA 12.0 CI workflows to branch-23.08. ([#13719](https://github.com/rapidsai/cudf/pull/13719)) [@bdice](https://github.com/bdice) +- Adding identify minimum version requirement ([#13713](https://github.com/rapidsai/cudf/pull/13713)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Enforce deprecations and add clarifications around existing deprecations ([#13710](https://github.com/rapidsai/cudf/pull/13710)) [@galipremsagar](https://github.com/galipremsagar) +- Optimize ORC reader performance for list data ([#13708](https://github.com/rapidsai/cudf/pull/13708)) [@vyasr](https://github.com/vyasr) +- fix limit overflow message in a docstring ([#13703](https://github.com/rapidsai/cudf/pull/13703)) [@ahmet-uyar](https://github.com/ahmet-uyar) +- Alleviates JSON parser's need for multi-file sources to end with a newline ([#13702](https://github.com/rapidsai/cudf/pull/13702)) [@elstehle](https://github.com/elstehle) +- Update cython-lint and replace flake8 with ruff ([#13699](https://github.com/rapidsai/cudf/pull/13699)) [@vyasr](https://github.com/vyasr) +- Add `__dask_tokenize__` definitions to cudf classes ([#13695](https://github.com/rapidsai/cudf/pull/13695)) [@rjzamora](https://github.com/rjzamora) +- Convert libcudf hashing benchmarks to nvbench ([#13694](https://github.com/rapidsai/cudf/pull/13694)) [@davidwendt](https://github.com/davidwendt) +- Separate MurmurHash32 from hash_functions.cuh ([#13681](https://github.com/rapidsai/cudf/pull/13681)) [@davidwendt](https://github.com/davidwendt) +- Improve performance of cudf::strings::split on whitespace ([#13680](https://github.com/rapidsai/cudf/pull/13680)) [@davidwendt](https://github.com/davidwendt) +- Allow ORC and Parquet writers to write nullable columns without nulls as non-nullable ([#13675](https://github.com/rapidsai/cudf/pull/13675)) [@vuule](https://github.com/vuule) +- Raise a NotImplementedError in to_datetime when utc is passed ([#13670](https://github.com/rapidsai/cudf/pull/13670)) [@shwina](https://github.com/shwina) +- Add rmm_mode parameter to nvbench base fixture ([#13668](https://github.com/rapidsai/cudf/pull/13668)) [@davidwendt](https://github.com/davidwendt) +- Fix multiindex loc ordering in pandas-compat mode ([#13660](https://github.com/rapidsai/cudf/pull/13660)) [@wence-](https://github.com/wence-) +- Add nvtext hash_character_ngrams function ([#13654](https://github.com/rapidsai/cudf/pull/13654)) [@davidwendt](https://github.com/davidwendt) +- Avoid storing metadata in pointers in ORC and Parquet writers ([#13648](https://github.com/rapidsai/cudf/pull/13648)) [@vuule](https://github.com/vuule) +- Acquire spill lock in to/from_arrow ([#13646](https://github.com/rapidsai/cudf/pull/13646)) [@shwina](https://github.com/shwina) +- Expose stable versions of libcudf sort routines ([#13634](https://github.com/rapidsai/cudf/pull/13634)) [@wence-](https://github.com/wence-) +- Separate out hash_test.cpp source for each hash API ([#13633](https://github.com/rapidsai/cudf/pull/13633)) [@davidwendt](https://github.com/davidwendt) +- Remove deprecated cudf::strings::slice_strings (by delimiter) functions ([#13628](https://github.com/rapidsai/cudf/pull/13628)) [@davidwendt](https://github.com/davidwendt) +- Create separate libcudf hash APIs for each supported hash function ([#13626](https://github.com/rapidsai/cudf/pull/13626)) [@davidwendt](https://github.com/davidwendt) +- Add convert_dtypes API ([#13623](https://github.com/rapidsai/cudf/pull/13623)) [@shwina](https://github.com/shwina) +- Clean up cupy in dependencies.yaml. ([#13617](https://github.com/rapidsai/cudf/pull/13617)) [@bdice](https://github.com/bdice) +- Use cuda-version to constrain cudatoolkit. ([#13615](https://github.com/rapidsai/cudf/pull/13615)) [@bdice](https://github.com/bdice) +- Add murmurhash3_x64_128 function to libcudf ([#13604](https://github.com/rapidsai/cudf/pull/13604)) [@davidwendt](https://github.com/davidwendt) +- Performance improvement for cudf::strings::like ([#13594](https://github.com/rapidsai/cudf/pull/13594)) [@davidwendt](https://github.com/davidwendt) +- Remove deprecated cudf.set_allocator. ([#13591](https://github.com/rapidsai/cudf/pull/13591)) [@bdice](https://github.com/bdice) +- Clean up cudf device atomic with `cuda::atomic_ref` ([#13583](https://github.com/rapidsai/cudf/pull/13583)) [@PointKernel](https://github.com/PointKernel) +- Add java bindings for distinct count ([#13573](https://github.com/rapidsai/cudf/pull/13573)) [@revans2](https://github.com/revans2) +- Use nvcomp conda package. ([#13566](https://github.com/rapidsai/cudf/pull/13566)) [@bdice](https://github.com/bdice) +- Add exception to string_scalar if input string exceeds size_type ([#13560](https://github.com/rapidsai/cudf/pull/13560)) [@davidwendt](https://github.com/davidwendt) +- Add dispatch for `cudf.Dataframe` to/from `pyarrow.Table` conversion ([#13558](https://github.com/rapidsai/cudf/pull/13558)) [@rjzamora](https://github.com/rjzamora) +- Get rid of `cuco::pair_type` aliases ([#13553](https://github.com/rapidsai/cudf/pull/13553)) [@PointKernel](https://github.com/PointKernel) +- Introduce parity with pandas when `sort=False` in `Groupby` ([#13551](https://github.com/rapidsai/cudf/pull/13551)) [@galipremsagar](https://github.com/galipremsagar) +- Update CMake in docker to 3.26.4 ([#13550](https://github.com/rapidsai/cudf/pull/13550)) [@NvTimLiu](https://github.com/NvTimLiu) +- Clarify source of error message in stream testing. ([#13541](https://github.com/rapidsai/cudf/pull/13541)) [@bdice](https://github.com/bdice) +- Deprecate `strings_to_categorical` in `cudf.read_parquet` ([#13540](https://github.com/rapidsai/cudf/pull/13540)) [@galipremsagar](https://github.com/galipremsagar) +- Update to CMake 3.26.4 ([#13538](https://github.com/rapidsai/cudf/pull/13538)) [@vyasr](https://github.com/vyasr) +- s3 folder naming fix ([#13536](https://github.com/rapidsai/cudf/pull/13536)) [@AyodeAwe](https://github.com/AyodeAwe) +- Implement iloc-getitem using parse-don't-validate approach ([#13534](https://github.com/rapidsai/cudf/pull/13534)) [@wence-](https://github.com/wence-) +- Make synchronization explicit in the names of `hostdevice_*` copying APIs ([#13530](https://github.com/rapidsai/cudf/pull/13530)) [@ttnghia](https://github.com/ttnghia) +- Add benchmark (Google Benchmark) dependency to conda packages. ([#13528](https://github.com/rapidsai/cudf/pull/13528)) [@bdice](https://github.com/bdice) +- Add libcufile to dependencies.yaml. ([#13523](https://github.com/rapidsai/cudf/pull/13523)) [@bdice](https://github.com/bdice) +- Fix some memoization logic in groupby/sort/sort_helper.cu ([#13521](https://github.com/rapidsai/cudf/pull/13521)) [@davidwendt](https://github.com/davidwendt) +- Use sizes_to_offsets_iterator in cudf::gather for strings ([#13520](https://github.com/rapidsai/cudf/pull/13520)) [@davidwendt](https://github.com/davidwendt) +- use rapids-upload-docs script ([#13518](https://github.com/rapidsai/cudf/pull/13518)) [@AyodeAwe](https://github.com/AyodeAwe) +- Support UTF-8 BOM in CSV reader ([#13516](https://github.com/rapidsai/cudf/pull/13516)) [@davidwendt](https://github.com/davidwendt) +- Move stream-related test configuration to CMake ([#13513](https://github.com/rapidsai/cudf/pull/13513)) [@vyasr](https://github.com/vyasr) +- Implement `cudf.option_context` ([#13511](https://github.com/rapidsai/cudf/pull/13511)) [@galipremsagar](https://github.com/galipremsagar) +- Unpin `dask` and `distributed` for development ([#13508](https://github.com/rapidsai/cudf/pull/13508)) [@galipremsagar](https://github.com/galipremsagar) +- Change build.sh to use pip install instead of setup.py ([#13507](https://github.com/rapidsai/cudf/pull/13507)) [@vyasr](https://github.com/vyasr) +- Use test default stream ([#13506](https://github.com/rapidsai/cudf/pull/13506)) [@vyasr](https://github.com/vyasr) +- Remove documentation build scripts for Jenkins ([#13495](https://github.com/rapidsai/cudf/pull/13495)) [@ajschmidt8](https://github.com/ajschmidt8) +- Use east const in include files ([#13494](https://github.com/rapidsai/cudf/pull/13494)) [@karthikeyann](https://github.com/karthikeyann) +- Use east const in src files ([#13493](https://github.com/rapidsai/cudf/pull/13493)) [@karthikeyann](https://github.com/karthikeyann) +- Use east const in tests files ([#13492](https://github.com/rapidsai/cudf/pull/13492)) [@karthikeyann](https://github.com/karthikeyann) +- Use east const in benchmarks files ([#13491](https://github.com/rapidsai/cudf/pull/13491)) [@karthikeyann](https://github.com/karthikeyann) +- Performance improvement for nvtext tokenize/token functions ([#13480](https://github.com/rapidsai/cudf/pull/13480)) [@davidwendt](https://github.com/davidwendt) +- Add pd.Float*Dtype to Avro and ORC mappings ([#13475](https://github.com/rapidsai/cudf/pull/13475)) [@mroeschke](https://github.com/mroeschke) +- Use pandas public APIs where available ([#13467](https://github.com/rapidsai/cudf/pull/13467)) [@mroeschke](https://github.com/mroeschke) +- Allow pd.ArrowDtype in cudf.from_pandas ([#13465](https://github.com/rapidsai/cudf/pull/13465)) [@mroeschke](https://github.com/mroeschke) +- Rework libcudf regex benchmarks with nvbench ([#13464](https://github.com/rapidsai/cudf/pull/13464)) [@davidwendt](https://github.com/davidwendt) +- Remove unused max_rows_tensor parameter from subword tokenizer ([#13463](https://github.com/rapidsai/cudf/pull/13463)) [@davidwendt](https://github.com/davidwendt) +- Separate io-text and nvtext pytests into different files ([#13435](https://github.com/rapidsai/cudf/pull/13435)) [@davidwendt](https://github.com/davidwendt) +- Add a move_to function to cudf::string_view::const_iterator ([#13428](https://github.com/rapidsai/cudf/pull/13428)) [@davidwendt](https://github.com/davidwendt) +- Allow newer scikit-build ([#13424](https://github.com/rapidsai/cudf/pull/13424)) [@vyasr](https://github.com/vyasr) +- Refactor sort_by_values to sort_values, drop indices from return values. ([#13419](https://github.com/rapidsai/cudf/pull/13419)) [@bdice](https://github.com/bdice) +- Inline Cython exception handler ([#13411](https://github.com/rapidsai/cudf/pull/13411)) [@vyasr](https://github.com/vyasr) +- Init JNI version 23.08.0-SNAPSHOT ([#13401](https://github.com/rapidsai/cudf/pull/13401)) [@pxLi](https://github.com/pxLi) +- Refactor ORC reader ([#13396](https://github.com/rapidsai/cudf/pull/13396)) [@ttnghia](https://github.com/ttnghia) +- JNI: Remove cleaned objects in memory cleaner ([#13378](https://github.com/rapidsai/cudf/pull/13378)) [@res-life](https://github.com/res-life) +- Add tests of currently unsupported indexing ([#13338](https://github.com/rapidsai/cudf/pull/13338)) [@wence-](https://github.com/wence-) +- Performance improvement for some libcudf regex functions for long strings ([#13322](https://github.com/rapidsai/cudf/pull/13322)) [@davidwendt](https://github.com/davidwendt) +- Exposure Tracked Buffer (first step towards unifying copy-on-write and spilling) ([#13307](https://github.com/rapidsai/cudf/pull/13307)) [@madsbk](https://github.com/madsbk) +- Write string data directly to column_buffer in Parquet reader ([#13302](https://github.com/rapidsai/cudf/pull/13302)) [@etseidl](https://github.com/etseidl) +- Add stacktrace into cudf exception types ([#13298](https://github.com/rapidsai/cudf/pull/13298)) [@ttnghia](https://github.com/ttnghia) +- cuDF: Build CUDA 12 packages ([#12922](https://github.com/rapidsai/cudf/pull/12922)) [@bdice](https://github.com/bdice) + +# cuDF 23.06.00 (7 Jun 2023) + +## 🚨 Breaking Changes + +- Fix batch processing for parquet writer ([#13438](https://github.com/rapidsai/cudf/pull/13438)) [@ttnghia](https://github.com/ttnghia) +- Use <NA> instead of null to match pandas. ([#13415](https://github.com/rapidsai/cudf/pull/13415)) [@bdice](https://github.com/bdice) +- Remove UNKNOWN_NULL_COUNT ([#13372](https://github.com/rapidsai/cudf/pull/13372)) [@vyasr](https://github.com/vyasr) +- Remove default UNKNOWN_NULL_COUNT from cudf::column member functions ([#13341](https://github.com/rapidsai/cudf/pull/13341)) [@davidwendt](https://github.com/davidwendt) +- Use std::overflow_error when output would exceed column size limit ([#13323](https://github.com/rapidsai/cudf/pull/13323)) [@davidwendt](https://github.com/davidwendt) +- Remove null mask and null count from column_view constructors ([#13311](https://github.com/rapidsai/cudf/pull/13311)) [@vyasr](https://github.com/vyasr) +- Change default value of the `observed=` argument in groupby to `True` to reflect the actual behaviour ([#13296](https://github.com/rapidsai/cudf/pull/13296)) [@shwina](https://github.com/shwina) +- Throw error if UNINITIALIZED is passed to cudf::state_null_count ([#13292](https://github.com/rapidsai/cudf/pull/13292)) [@davidwendt](https://github.com/davidwendt) +- Remove default null-count parameter from cudf::make_strings_column factory ([#13227](https://github.com/rapidsai/cudf/pull/13227)) [@davidwendt](https://github.com/davidwendt) +- Remove UNKNOWN_NULL_COUNT where it can be easily computed ([#13205](https://github.com/rapidsai/cudf/pull/13205)) [@vyasr](https://github.com/vyasr) +- Update minimum Python version to Python 3.9 ([#13196](https://github.com/rapidsai/cudf/pull/13196)) [@shwina](https://github.com/shwina) +- Refactor contiguous_split API into contiguous_split.hpp ([#13186](https://github.com/rapidsai/cudf/pull/13186)) [@abellina](https://github.com/abellina) +- Cleanup Parquet chunked writer ([#13094](https://github.com/rapidsai/cudf/pull/13094)) [@ttnghia](https://github.com/ttnghia) +- Cleanup ORC chunked writer ([#13091](https://github.com/rapidsai/cudf/pull/13091)) [@ttnghia](https://github.com/ttnghia) +- Raise `NotImplementedError` when attempting to construct cuDF objects from timezone-aware datetimes ([#13086](https://github.com/rapidsai/cudf/pull/13086)) [@shwina](https://github.com/shwina) +- Remove deprecated regex functions from libcudf ([#13067](https://github.com/rapidsai/cudf/pull/13067)) [@davidwendt](https://github.com/davidwendt) +- [REVIEW] Upgrade to `arrow-11` ([#12757](https://github.com/rapidsai/cudf/pull/12757)) [@galipremsagar](https://github.com/galipremsagar) +- Implement Python drop_duplicates with cudf::stable_distinct. ([#11656](https://github.com/rapidsai/cudf/pull/11656)) [@brandon-b-miller](https://github.com/brandon-b-miller) + +## 🐛 Bug Fixes + +- Fix valid count computation in offset_bitmask_binop kernel ([#13489](https://github.com/rapidsai/cudf/pull/13489)) [@davidwendt](https://github.com/davidwendt) +- Fix writing of ORC files with empty rowgroups ([#13466](https://github.com/rapidsai/cudf/pull/13466)) [@vuule](https://github.com/vuule) +- Fix cudf::repeat logic when count is zero ([#13459](https://github.com/rapidsai/cudf/pull/13459)) [@davidwendt](https://github.com/davidwendt) +- Fix batch processing for parquet writer ([#13438](https://github.com/rapidsai/cudf/pull/13438)) [@ttnghia](https://github.com/ttnghia) +- Fix invalid use of std::exclusive_scan in Parquet writer ([#13434](https://github.com/rapidsai/cudf/pull/13434)) [@etseidl](https://github.com/etseidl) +- Patch numba if it is imported first to ensure minor version compatibility works. ([#13433](https://github.com/rapidsai/cudf/pull/13433)) [@bdice](https://github.com/bdice) +- Fix cudf::strings::replace_with_backrefs hang on empty match result ([#13418](https://github.com/rapidsai/cudf/pull/13418)) [@davidwendt](https://github.com/davidwendt) +- Use <NA> instead of null to match pandas. ([#13415](https://github.com/rapidsai/cudf/pull/13415)) [@bdice](https://github.com/bdice) +- Fix tokenize with non-space delimiter ([#13403](https://github.com/rapidsai/cudf/pull/13403)) [@shwina](https://github.com/shwina) +- Fix groupby head/tail for empty dataframe ([#13398](https://github.com/rapidsai/cudf/pull/13398)) [@shwina](https://github.com/shwina) +- Default to closed="right" in `IntervalIndex` constructor ([#13394](https://github.com/rapidsai/cudf/pull/13394)) [@shwina](https://github.com/shwina) +- Correctly reorder and reindex scan groupbys with null keys ([#13389](https://github.com/rapidsai/cudf/pull/13389)) [@wence-](https://github.com/wence-) +- Fix unused argument errors in nvcc 11.5 ([#13387](https://github.com/rapidsai/cudf/pull/13387)) [@abellina](https://github.com/abellina) +- Updates needed to work with jitify that leverages libcudacxx ([#13383](https://github.com/rapidsai/cudf/pull/13383)) [@robertmaynard](https://github.com/robertmaynard) +- Fix unused parameter warning/error in parquet/page_data.cu ([#13367](https://github.com/rapidsai/cudf/pull/13367)) [@davidwendt](https://github.com/davidwendt) +- Fix page size estimation in Parquet writer ([#13364](https://github.com/rapidsai/cudf/pull/13364)) [@etseidl](https://github.com/etseidl) +- Fix subword_tokenize error when input contains no tokens ([#13320](https://github.com/rapidsai/cudf/pull/13320)) [@davidwendt](https://github.com/davidwendt) +- Support gcc 12 as the C++ compiler ([#13316](https://github.com/rapidsai/cudf/pull/13316)) [@robertmaynard](https://github.com/robertmaynard) +- Correctly set bitmask size in `from_column_view` ([#13315](https://github.com/rapidsai/cudf/pull/13315)) [@wence-](https://github.com/wence-) +- Fix approach to detecting assignment for gte/lte operators ([#13285](https://github.com/rapidsai/cudf/pull/13285)) [@vyasr](https://github.com/vyasr) +- Fix parquet schema interpretation issue ([#13277](https://github.com/rapidsai/cudf/pull/13277)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix 64bit shift bug in avro reader ([#13276](https://github.com/rapidsai/cudf/pull/13276)) [@karthikeyann](https://github.com/karthikeyann) +- Fix unused variables/parameters in parquet/writer_impl.cu ([#13263](https://github.com/rapidsai/cudf/pull/13263)) [@davidwendt](https://github.com/davidwendt) +- Clean up buffers in case AssertionError ([#13262](https://github.com/rapidsai/cudf/pull/13262)) [@razajafri](https://github.com/razajafri) +- Allow empty input table in ast `compute_column` ([#13245](https://github.com/rapidsai/cudf/pull/13245)) [@wence-](https://github.com/wence-) +- Fix structs_column_wrapper constructors to copy input column wrappers ([#13243](https://github.com/rapidsai/cudf/pull/13243)) [@davidwendt](https://github.com/davidwendt) +- Fix the row index stream order in ORC reader ([#13242](https://github.com/rapidsai/cudf/pull/13242)) [@vuule](https://github.com/vuule) +- Make `is_decompression_disabled` and `is_compression_disabled` thread-safe ([#13240](https://github.com/rapidsai/cudf/pull/13240)) [@vuule](https://github.com/vuule) +- Add [[maybe_unused]] to nvbench environment. ([#13219](https://github.com/rapidsai/cudf/pull/13219)) [@bdice](https://github.com/bdice) +- Fix race in ORC string dictionary creation ([#13214](https://github.com/rapidsai/cudf/pull/13214)) [@revans2](https://github.com/revans2) +- Add scalar argtypes to udf cache keys ([#13194](https://github.com/rapidsai/cudf/pull/13194)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix unused parameter warning/error in grouped_rolling.cu ([#13192](https://github.com/rapidsai/cudf/pull/13192)) [@davidwendt](https://github.com/davidwendt) +- Avoid skbuild 0.17.2 which affected the cmake -DPython_LIBRARY string ([#13188](https://github.com/rapidsai/cudf/pull/13188)) [@sevagh](https://github.com/sevagh) +- Fix `hostdevice_vector::subspan` ([#13187](https://github.com/rapidsai/cudf/pull/13187)) [@ttnghia](https://github.com/ttnghia) +- Use custom nvbench entry point to ensure `cudf::nvbench_base_fixture` usage ([#13183](https://github.com/rapidsai/cudf/pull/13183)) [@robertmaynard](https://github.com/robertmaynard) +- Fix slice_strings to return empty strings for stop < start indices ([#13178](https://github.com/rapidsai/cudf/pull/13178)) [@davidwendt](https://github.com/davidwendt) +- Allow compilation with any GTest version 1.11+ ([#13153](https://github.com/rapidsai/cudf/pull/13153)) [@robertmaynard](https://github.com/robertmaynard) +- Fix a few clang-format style check errors ([#13146](https://github.com/rapidsai/cudf/pull/13146)) [@davidwendt](https://github.com/davidwendt) +- [REVIEW] Fix `Series` and `DataFrame` constructors to validate index lengths ([#13122](https://github.com/rapidsai/cudf/pull/13122)) [@galipremsagar](https://github.com/galipremsagar) +- Fix hash join when the input tables have nulls on only one side ([#13120](https://github.com/rapidsai/cudf/pull/13120)) [@ttnghia](https://github.com/ttnghia) +- Fix GPU_ARCHS setting in Java CMake build and CMAKE_CUDA_ARCHITECTURES in Python package build. ([#13117](https://github.com/rapidsai/cudf/pull/13117)) [@davidwendt](https://github.com/davidwendt) +- Adds checks to make sure json reader won't overflow ([#13115](https://github.com/rapidsai/cudf/pull/13115)) [@elstehle](https://github.com/elstehle) +- Fix `null_count` of columns returned by `chunked_parquet_reader` ([#13111](https://github.com/rapidsai/cudf/pull/13111)) [@vuule](https://github.com/vuule) +- Fixes sliced list and struct column bug in JSON chunked writer ([#13108](https://github.com/rapidsai/cudf/pull/13108)) [@karthikeyann](https://github.com/karthikeyann) +- [REVIEW] Fix missing confluent kafka version ([#13101](https://github.com/rapidsai/cudf/pull/13101)) [@galipremsagar](https://github.com/galipremsagar) +- Use make_empty_lists_column instead of make_empty_column(type_id::LIST) ([#13099](https://github.com/rapidsai/cudf/pull/13099)) [@davidwendt](https://github.com/davidwendt) +- Raise `NotImplementedError` when attempting to construct cuDF objects from timezone-aware datetimes ([#13086](https://github.com/rapidsai/cudf/pull/13086)) [@shwina](https://github.com/shwina) +- Fix column selection `read_parquet` benchmarks ([#13082](https://github.com/rapidsai/cudf/pull/13082)) [@vuule](https://github.com/vuule) +- Fix bugs in iterative groupby apply algorithm ([#13078](https://github.com/rapidsai/cudf/pull/13078)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add algorithm include in data_sink.hpp ([#13068](https://github.com/rapidsai/cudf/pull/13068)) [@ahendriksen](https://github.com/ahendriksen) +- Fix tests/identify_stream_usage.cpp ([#13066](https://github.com/rapidsai/cudf/pull/13066)) [@ahendriksen](https://github.com/ahendriksen) +- Prevent overflow with `skip_rows` in ORC and Parquet readers ([#13063](https://github.com/rapidsai/cudf/pull/13063)) [@vuule](https://github.com/vuule) +- Add except declaration in Cython interface for regex_program::create ([#13054](https://github.com/rapidsai/cudf/pull/13054)) [@davidwendt](https://github.com/davidwendt) +- [REVIEW] Fix branch version in CI scripts ([#13029](https://github.com/rapidsai/cudf/pull/13029)) [@galipremsagar](https://github.com/galipremsagar) +- Fix OOB memory access in CSV reader when reading without NA values ([#13011](https://github.com/rapidsai/cudf/pull/13011)) [@vuule](https://github.com/vuule) +- Fix read_avro() skip_rows and num_rows. ([#12912](https://github.com/rapidsai/cudf/pull/12912)) [@tpn](https://github.com/tpn) +- Purge nonempty nulls from byte_cast list outputs. ([#11971](https://github.com/rapidsai/cudf/pull/11971)) [@bdice](https://github.com/bdice) +- Fix consumption of CPU-backed interchange protocol dataframes ([#11392](https://github.com/rapidsai/cudf/pull/11392)) [@shwina](https://github.com/shwina) + +## 🚀 New Features + +- Remove numba JIT kernel usage from dataframe copy tests ([#13385](https://github.com/rapidsai/cudf/pull/13385)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add JNI for ORC/Parquet writer compression statistics ([#13376](https://github.com/rapidsai/cudf/pull/13376)) [@ttnghia](https://github.com/ttnghia) +- Use _compile_or_get in JIT groupby apply ([#13350](https://github.com/rapidsai/cudf/pull/13350)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- cuDF numba cuda 12 updates ([#13337](https://github.com/rapidsai/cudf/pull/13337)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add tz_convert method to convert between timestamps ([#13328](https://github.com/rapidsai/cudf/pull/13328)) [@shwina](https://github.com/shwina) +- Optionally return compression statistics from ORC and Parquet writers ([#13294](https://github.com/rapidsai/cudf/pull/13294)) [@vuule](https://github.com/vuule) +- Support the case=False argument to str.contains ([#13290](https://github.com/rapidsai/cudf/pull/13290)) [@shwina](https://github.com/shwina) +- Add an event handler for ColumnVector.close ([#13279](https://github.com/rapidsai/cudf/pull/13279)) [@abellina](https://github.com/abellina) +- JNI api for cudf::chunked_pack ([#13278](https://github.com/rapidsai/cudf/pull/13278)) [@abellina](https://github.com/abellina) +- Implement a chunked_pack API ([#13260](https://github.com/rapidsai/cudf/pull/13260)) [@abellina](https://github.com/abellina) +- Update cudf recipes to use GTest version to >=1.13 ([#13207](https://github.com/rapidsai/cudf/pull/13207)) [@robertmaynard](https://github.com/robertmaynard) +- JNI changes for range-extents in window functions. ([#13199](https://github.com/rapidsai/cudf/pull/13199)) [@mythrocks](https://github.com/mythrocks) +- Add support for DatetimeTZDtype and tz_localize ([#13163](https://github.com/rapidsai/cudf/pull/13163)) [@shwina](https://github.com/shwina) +- Add IS_NULL operator to AST ([#13145](https://github.com/rapidsai/cudf/pull/13145)) [@karthikeyann](https://github.com/karthikeyann) +- STRING order-by column for RANGE window functions ([#13143](https://github.com/rapidsai/cudf/pull/13143)) [@mythrocks](https://github.com/mythrocks) +- Update `contains_table` to experimental row hasher and equality comparator ([#13119](https://github.com/rapidsai/cudf/pull/13119)) [@divyegala](https://github.com/divyegala) +- Automatically select `GroupBy.apply` algorithm based on if the UDF is jittable ([#13113](https://github.com/rapidsai/cudf/pull/13113)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Refactor Parquet chunked writer ([#13076](https://github.com/rapidsai/cudf/pull/13076)) [@ttnghia](https://github.com/ttnghia) +- Add Python bindings for string literal support in AST ([#13073](https://github.com/rapidsai/cudf/pull/13073)) [@karthikeyann](https://github.com/karthikeyann) +- Add Java bindings for string literal support in AST ([#13072](https://github.com/rapidsai/cudf/pull/13072)) [@karthikeyann](https://github.com/karthikeyann) +- Add string scalar support in AST ([#13061](https://github.com/rapidsai/cudf/pull/13061)) [@karthikeyann](https://github.com/karthikeyann) +- Log cuIO warnings using the libcudf logger ([#13043](https://github.com/rapidsai/cudf/pull/13043)) [@vuule](https://github.com/vuule) +- Update `mixed_join` to use experimental row hasher and comparator ([#13028](https://github.com/rapidsai/cudf/pull/13028)) [@divyegala](https://github.com/divyegala) +- Support structs of lists in row lexicographic comparator ([#13005](https://github.com/rapidsai/cudf/pull/13005)) [@ttnghia](https://github.com/ttnghia) +- Adding `hostdevice_span` that is a span createable from `hostdevice_vector` ([#12981](https://github.com/rapidsai/cudf/pull/12981)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add nvtext::minhash function ([#12961](https://github.com/rapidsai/cudf/pull/12961)) [@davidwendt](https://github.com/davidwendt) +- Support lists of structs in row lexicographic comparator ([#12953](https://github.com/rapidsai/cudf/pull/12953)) [@ttnghia](https://github.com/ttnghia) +- Update `join` to use experimental row hasher and comparator ([#12787](https://github.com/rapidsai/cudf/pull/12787)) [@divyegala](https://github.com/divyegala) +- Implement Python drop_duplicates with cudf::stable_distinct. ([#11656](https://github.com/rapidsai/cudf/pull/11656)) [@brandon-b-miller](https://github.com/brandon-b-miller) + +## 🛠️ Improvements + +- Drop extraneous dependencies from cudf conda recipe. ([#13406](https://github.com/rapidsai/cudf/pull/13406)) [@bdice](https://github.com/bdice) +- Handle some corner-cases in indexing with boolean masks ([#13402](https://github.com/rapidsai/cudf/pull/13402)) [@wence-](https://github.com/wence-) +- Add cudf::stable_distinct public API, tests, and benchmarks. ([#13392](https://github.com/rapidsai/cudf/pull/13392)) [@bdice](https://github.com/bdice) +- [JNI] Pass this ColumnVector to the onClosed event handler ([#13386](https://github.com/rapidsai/cudf/pull/13386)) [@abellina](https://github.com/abellina) +- Fix JNI method with mismatched parameter list ([#13384](https://github.com/rapidsai/cudf/pull/13384)) [@ttnghia](https://github.com/ttnghia) +- Split up experimental_row_operator_tests.cu to improve its compile time ([#13382](https://github.com/rapidsai/cudf/pull/13382)) [@davidwendt](https://github.com/davidwendt) +- Deprecate cudf::strings::slice_strings APIs that accept delimiters ([#13373](https://github.com/rapidsai/cudf/pull/13373)) [@davidwendt](https://github.com/davidwendt) +- Remove UNKNOWN_NULL_COUNT ([#13372](https://github.com/rapidsai/cudf/pull/13372)) [@vyasr](https://github.com/vyasr) +- Move some nvtext benchmarks to nvbench ([#13368](https://github.com/rapidsai/cudf/pull/13368)) [@davidwendt](https://github.com/davidwendt) +- run docs nightly too ([#13366](https://github.com/rapidsai/cudf/pull/13366)) [@AyodeAwe](https://github.com/AyodeAwe) +- Add warning for default `dtype` parameter in `get_dummies` ([#13365](https://github.com/rapidsai/cudf/pull/13365)) [@galipremsagar](https://github.com/galipremsagar) +- Add log messages about kvikIO compatibility mode ([#13363](https://github.com/rapidsai/cudf/pull/13363)) [@vuule](https://github.com/vuule) +- Switch back to using primary shared-action-workflows branch ([#13362](https://github.com/rapidsai/cudf/pull/13362)) [@vyasr](https://github.com/vyasr) +- Deprecate `StringIndex` and use `Index` instead ([#13361](https://github.com/rapidsai/cudf/pull/13361)) [@galipremsagar](https://github.com/galipremsagar) +- Ensure columns have valid null counts in CUDF JNI. ([#13355](https://github.com/rapidsai/cudf/pull/13355)) [@mythrocks](https://github.com/mythrocks) +- Expunge most uses of `TypeVar(bound="Foo")` ([#13346](https://github.com/rapidsai/cudf/pull/13346)) [@wence-](https://github.com/wence-) +- Remove all references to UNKNOWN_NULL_COUNT in Python ([#13345](https://github.com/rapidsai/cudf/pull/13345)) [@vyasr](https://github.com/vyasr) +- Improve `distinct_count` with `cuco::static_set` ([#13343](https://github.com/rapidsai/cudf/pull/13343)) [@PointKernel](https://github.com/PointKernel) +- Fix `contiguous_split` performance ([#13342](https://github.com/rapidsai/cudf/pull/13342)) [@ttnghia](https://github.com/ttnghia) +- Remove default UNKNOWN_NULL_COUNT from cudf::column member functions ([#13341](https://github.com/rapidsai/cudf/pull/13341)) [@davidwendt](https://github.com/davidwendt) +- Update mypy to 1.3 ([#13340](https://github.com/rapidsai/cudf/pull/13340)) [@wence-](https://github.com/wence-) +- [Java] Purge non-empty nulls when setting validity ([#13335](https://github.com/rapidsai/cudf/pull/13335)) [@razajafri](https://github.com/razajafri) +- Add row-wise filtering step to `read_parquet` ([#13334](https://github.com/rapidsai/cudf/pull/13334)) [@rjzamora](https://github.com/rjzamora) +- Performance improvement for nvtext::minhash ([#13333](https://github.com/rapidsai/cudf/pull/13333)) [@davidwendt](https://github.com/davidwendt) +- Fix some libcudf functions to set the null count on returning columns ([#13331](https://github.com/rapidsai/cudf/pull/13331)) [@davidwendt](https://github.com/davidwendt) +- Change cudf::detail::concatenate_masks to return null-count ([#13330](https://github.com/rapidsai/cudf/pull/13330)) [@davidwendt](https://github.com/davidwendt) +- Move `meta` calculation in `dask_cudf.read_parquet` ([#13327](https://github.com/rapidsai/cudf/pull/13327)) [@rjzamora](https://github.com/rjzamora) +- Changes to support Numpy >= 1.24 ([#13325](https://github.com/rapidsai/cudf/pull/13325)) [@shwina](https://github.com/shwina) +- Use std::overflow_error when output would exceed column size limit ([#13323](https://github.com/rapidsai/cudf/pull/13323)) [@davidwendt](https://github.com/davidwendt) +- Clean up `distinct_count` benchmark ([#13321](https://github.com/rapidsai/cudf/pull/13321)) [@PointKernel](https://github.com/PointKernel) +- Fix gtest pinning to 1.13.0. ([#13319](https://github.com/rapidsai/cudf/pull/13319)) [@bdice](https://github.com/bdice) +- Remove null mask and null count from column_view constructors ([#13311](https://github.com/rapidsai/cudf/pull/13311)) [@vyasr](https://github.com/vyasr) +- Address feedback from 13289 ([#13306](https://github.com/rapidsai/cudf/pull/13306)) [@vyasr](https://github.com/vyasr) +- Change default value of the `observed=` argument in groupby to `True` to reflect the actual behaviour ([#13296](https://github.com/rapidsai/cudf/pull/13296)) [@shwina](https://github.com/shwina) +- First check for `BaseDtype` when infering the data type of an arbitrary object ([#13295](https://github.com/rapidsai/cudf/pull/13295)) [@shwina](https://github.com/shwina) +- Throw error if UNINITIALIZED is passed to cudf::state_null_count ([#13292](https://github.com/rapidsai/cudf/pull/13292)) [@davidwendt](https://github.com/davidwendt) +- Support CUDA 12.0 for pip wheels ([#13289](https://github.com/rapidsai/cudf/pull/13289)) [@divyegala](https://github.com/divyegala) +- Refactor `transform_lists_of_structs` in `row_operators.cu` ([#13288](https://github.com/rapidsai/cudf/pull/13288)) [@ttnghia](https://github.com/ttnghia) +- Branch 23.06 merge 23.04 ([#13286](https://github.com/rapidsai/cudf/pull/13286)) [@vyasr](https://github.com/vyasr) +- Update cupy dependency ([#13284](https://github.com/rapidsai/cudf/pull/13284)) [@vyasr](https://github.com/vyasr) +- Performance improvement in cudf::strings::join_strings for long strings ([#13283](https://github.com/rapidsai/cudf/pull/13283)) [@davidwendt](https://github.com/davidwendt) +- Fix unused variables and functions ([#13275](https://github.com/rapidsai/cudf/pull/13275)) [@karthikeyann](https://github.com/karthikeyann) +- Fix integer overflow in `partition` `scatter_map` construction ([#13272](https://github.com/rapidsai/cudf/pull/13272)) [@wence-](https://github.com/wence-) +- Numba 0.57 compatibility fixes ([#13271](https://github.com/rapidsai/cudf/pull/13271)) [@gmarkall](https://github.com/gmarkall) +- Performance improvement in cudf::strings::all_characters_of_type ([#13259](https://github.com/rapidsai/cudf/pull/13259)) [@davidwendt](https://github.com/davidwendt) +- Remove default null-count parameter from some libcudf factory functions ([#13258](https://github.com/rapidsai/cudf/pull/13258)) [@davidwendt](https://github.com/davidwendt) +- Roll our own generate_string() because mimesis' has gone away ([#13257](https://github.com/rapidsai/cudf/pull/13257)) [@shwina](https://github.com/shwina) +- Build wheels using new single image workflow ([#13249](https://github.com/rapidsai/cudf/pull/13249)) [@vyasr](https://github.com/vyasr) +- Enable sccache hits from local builds ([#13248](https://github.com/rapidsai/cudf/pull/13248)) [@AyodeAwe](https://github.com/AyodeAwe) +- Revert to branch-23.06 for shared-action-workflows ([#13247](https://github.com/rapidsai/cudf/pull/13247)) [@shwina](https://github.com/shwina) +- Introduce `pandas_compatible` option in `cudf` ([#13241](https://github.com/rapidsai/cudf/pull/13241)) [@galipremsagar](https://github.com/galipremsagar) +- Add metadata_builder helper class ([#13232](https://github.com/rapidsai/cudf/pull/13232)) [@abellina](https://github.com/abellina) +- Use libkvikio conda packages in libcudf, add explicit libcufile dependency. ([#13231](https://github.com/rapidsai/cudf/pull/13231)) [@bdice](https://github.com/bdice) +- Remove default null-count parameter from cudf::make_strings_column factory ([#13227](https://github.com/rapidsai/cudf/pull/13227)) [@davidwendt](https://github.com/davidwendt) +- Performance improvement in cudf::strings::find/rfind for long strings ([#13226](https://github.com/rapidsai/cudf/pull/13226)) [@davidwendt](https://github.com/davidwendt) +- Add chunked reader benchmark ([#13223](https://github.com/rapidsai/cudf/pull/13223)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Set the null count in output columns in the CSV reader ([#13221](https://github.com/rapidsai/cudf/pull/13221)) [@vuule](https://github.com/vuule) +- Skip Non-Empty nulls tests for the nightly build just like we skip CuFileTest and CudaFatalTest ([#13213](https://github.com/rapidsai/cudf/pull/13213)) [@razajafri](https://github.com/razajafri) +- Fix string_scalar stream usage in write_json.cu ([#13212](https://github.com/rapidsai/cudf/pull/13212)) [@davidwendt](https://github.com/davidwendt) +- Use canonicalized name for dlopen'd libraries (libcufile) ([#13210](https://github.com/rapidsai/cudf/pull/13210)) [@shwina](https://github.com/shwina) +- Refactor pinned memory vector and ORC+Parquet writers ([#13206](https://github.com/rapidsai/cudf/pull/13206)) [@ttnghia](https://github.com/ttnghia) +- Remove UNKNOWN_NULL_COUNT where it can be easily computed ([#13205](https://github.com/rapidsai/cudf/pull/13205)) [@vyasr](https://github.com/vyasr) +- Optimization to decoding of parquet level streams ([#13203](https://github.com/rapidsai/cudf/pull/13203)) [@nvdbaranec](https://github.com/nvdbaranec) +- Clean up and simplify `gpuDecideCompression` ([#13202](https://github.com/rapidsai/cudf/pull/13202)) [@vuule](https://github.com/vuule) +- Use std::array for a statically sized vector in `create_serialized_trie` ([#13201](https://github.com/rapidsai/cudf/pull/13201)) [@vuule](https://github.com/vuule) +- Update minimum Python version to Python 3.9 ([#13196](https://github.com/rapidsai/cudf/pull/13196)) [@shwina](https://github.com/shwina) +- Refactor contiguous_split API into contiguous_split.hpp ([#13186](https://github.com/rapidsai/cudf/pull/13186)) [@abellina](https://github.com/abellina) +- Remove usage of rapids-get-rapids-version-from-git ([#13184](https://github.com/rapidsai/cudf/pull/13184)) [@jjacobelli](https://github.com/jjacobelli) +- Enable mixed-dtype decimal/scalar binary operations ([#13171](https://github.com/rapidsai/cudf/pull/13171)) [@shwina](https://github.com/shwina) +- Split up unique_count.cu to improve build time ([#13169](https://github.com/rapidsai/cudf/pull/13169)) [@davidwendt](https://github.com/davidwendt) +- Use nvtx3 includes in string examples. ([#13165](https://github.com/rapidsai/cudf/pull/13165)) [@bdice](https://github.com/bdice) +- Change some .cu gtest files to .cpp ([#13155](https://github.com/rapidsai/cudf/pull/13155)) [@davidwendt](https://github.com/davidwendt) +- Remove wheel pytest verbosity ([#13151](https://github.com/rapidsai/cudf/pull/13151)) [@sevagh](https://github.com/sevagh) +- Fix libcudf to always pass null-count to set_null_mask ([#13149](https://github.com/rapidsai/cudf/pull/13149)) [@davidwendt](https://github.com/davidwendt) +- Fix gtests to always pass null-count to set_null_mask calls ([#13148](https://github.com/rapidsai/cudf/pull/13148)) [@davidwendt](https://github.com/davidwendt) +- Optimize JSON writer ([#13144](https://github.com/rapidsai/cudf/pull/13144)) [@karthikeyann](https://github.com/karthikeyann) +- Performance improvement for libcudf upper/lower conversion for long strings ([#13142](https://github.com/rapidsai/cudf/pull/13142)) [@davidwendt](https://github.com/davidwendt) +- [REVIEW] Deprecate `pad` and `backfill` methods ([#13140](https://github.com/rapidsai/cudf/pull/13140)) [@galipremsagar](https://github.com/galipremsagar) +- Use CTAD instead of functions in ProtobufReader ([#13135](https://github.com/rapidsai/cudf/pull/13135)) [@vuule](https://github.com/vuule) +- Remove more instances of `UNKNOWN_NULL_COUNT` ([#13134](https://github.com/rapidsai/cudf/pull/13134)) [@vyasr](https://github.com/vyasr) +- Update clang-format to 16.0.1. ([#13133](https://github.com/rapidsai/cudf/pull/13133)) [@bdice](https://github.com/bdice) +- Add log messages about cuIO's nvCOMP and cuFile use ([#13132](https://github.com/rapidsai/cudf/pull/13132)) [@vuule](https://github.com/vuule) +- Branch 23.06 merge 23.04 ([#13131](https://github.com/rapidsai/cudf/pull/13131)) [@vyasr](https://github.com/vyasr) +- Compute null-count in cudf::detail::slice ([#13124](https://github.com/rapidsai/cudf/pull/13124)) [@davidwendt](https://github.com/davidwendt) +- Use ARC V2 self-hosted runners for GPU jobs ([#13123](https://github.com/rapidsai/cudf/pull/13123)) [@jjacobelli](https://github.com/jjacobelli) +- Set null-count in linked_column_view conversion operator ([#13121](https://github.com/rapidsai/cudf/pull/13121)) [@davidwendt](https://github.com/davidwendt) +- Adding ifdefs around nvcc-specific pragmas ([#13110](https://github.com/rapidsai/cudf/pull/13110)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add null-count parameter to json experimental parse_data utility ([#13107](https://github.com/rapidsai/cudf/pull/13107)) [@davidwendt](https://github.com/davidwendt) +- Remove uses-setup-env-vars ([#13105](https://github.com/rapidsai/cudf/pull/13105)) [@vyasr](https://github.com/vyasr) +- Explicitly compute null count in concatenate APIs ([#13104](https://github.com/rapidsai/cudf/pull/13104)) [@vyasr](https://github.com/vyasr) +- Replace unnecessary uses of `UNKNOWN_NULL_COUNT` ([#13102](https://github.com/rapidsai/cudf/pull/13102)) [@vyasr](https://github.com/vyasr) +- Performance improvement for cudf::string_view::find functions ([#13100](https://github.com/rapidsai/cudf/pull/13100)) [@davidwendt](https://github.com/davidwendt) +- Use `.element()` instead of `.data()` for window range calculations ([#13095](https://github.com/rapidsai/cudf/pull/13095)) [@mythrocks](https://github.com/mythrocks) +- Cleanup Parquet chunked writer ([#13094](https://github.com/rapidsai/cudf/pull/13094)) [@ttnghia](https://github.com/ttnghia) +- Fix unused variable error/warning in page_data.cu ([#13093](https://github.com/rapidsai/cudf/pull/13093)) [@davidwendt](https://github.com/davidwendt) +- Cleanup ORC chunked writer ([#13091](https://github.com/rapidsai/cudf/pull/13091)) [@ttnghia](https://github.com/ttnghia) +- Remove using namespace cudf; from libcudf gtests source ([#13089](https://github.com/rapidsai/cudf/pull/13089)) [@davidwendt](https://github.com/davidwendt) +- Change cudf::test::make_null_mask to also return null-count ([#13081](https://github.com/rapidsai/cudf/pull/13081)) [@davidwendt](https://github.com/davidwendt) +- Resolved automerger from `branch-23.04` to `branch-23.06` ([#13080](https://github.com/rapidsai/cudf/pull/13080)) [@galipremsagar](https://github.com/galipremsagar) +- Assert for non-empty nulls ([#13071](https://github.com/rapidsai/cudf/pull/13071)) [@razajafri](https://github.com/razajafri) +- Remove deprecated regex functions from libcudf ([#13067](https://github.com/rapidsai/cudf/pull/13067)) [@davidwendt](https://github.com/davidwendt) +- Refactor `cudf::detail::sorted_order` ([#13062](https://github.com/rapidsai/cudf/pull/13062)) [@ttnghia](https://github.com/ttnghia) +- Improve performance of slice_strings for long strings ([#13057](https://github.com/rapidsai/cudf/pull/13057)) [@davidwendt](https://github.com/davidwendt) +- Reduce shared memory usage in gpuComputePageSizes by 50% ([#13047](https://github.com/rapidsai/cudf/pull/13047)) [@nvdbaranec](https://github.com/nvdbaranec) +- [REVIEW] Add notes to performance comparisons notebook ([#13044](https://github.com/rapidsai/cudf/pull/13044)) [@galipremsagar](https://github.com/galipremsagar) +- Enable binary operations between scalars and columns of differing decimal types ([#13034](https://github.com/rapidsai/cudf/pull/13034)) [@shwina](https://github.com/shwina) +- Remove console output from some libcudf gtests ([#13027](https://github.com/rapidsai/cudf/pull/13027)) [@davidwendt](https://github.com/davidwendt) +- Remove underscore in build string. ([#13025](https://github.com/rapidsai/cudf/pull/13025)) [@bdice](https://github.com/bdice) +- Bump up JNI version 23.06.0-SNAPSHOT ([#13021](https://github.com/rapidsai/cudf/pull/13021)) [@pxLi](https://github.com/pxLi) +- Fix auto merger from `branch-23.04` to `branch-23.06` ([#13009](https://github.com/rapidsai/cudf/pull/13009)) [@galipremsagar](https://github.com/galipremsagar) +- Reduce peak memory use when writing compressed ORC files. ([#12963](https://github.com/rapidsai/cudf/pull/12963)) [@vuule](https://github.com/vuule) +- Add nvtx annotatations to groupby methods ([#12941](https://github.com/rapidsai/cudf/pull/12941)) [@wence-](https://github.com/wence-) +- Compute column sizes in Parquet preprocess with single kernel ([#12931](https://github.com/rapidsai/cudf/pull/12931)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Add Python bindings for time zone data (TZiF) reader ([#12826](https://github.com/rapidsai/cudf/pull/12826)) [@shwina](https://github.com/shwina) +- Optimize set-like operations ([#12769](https://github.com/rapidsai/cudf/pull/12769)) [@ttnghia](https://github.com/ttnghia) +- [REVIEW] Upgrade to `arrow-11` ([#12757](https://github.com/rapidsai/cudf/pull/12757)) [@galipremsagar](https://github.com/galipremsagar) +- Add empty test files for test reorganization ([#12288](https://github.com/rapidsai/cudf/pull/12288)) [@shwina](https://github.com/shwina) + +# cuDF 23.04.00 (6 Apr 2023) + +## 🚨 Breaking Changes + +- Pin `dask` and `distributed` for release ([#13070](https://github.com/rapidsai/cudf/pull/13070)) [@galipremsagar](https://github.com/galipremsagar) +- Declare a different name for nan_equality.UNEQUAL to prevent Cython warnings. ([#12947](https://github.com/rapidsai/cudf/pull/12947)) [@bdice](https://github.com/bdice) +- Update minimum `pandas` and `numpy` pinnings ([#12887](https://github.com/rapidsai/cudf/pull/12887)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `names` & `dtype` in `Index.copy` ([#12825](https://github.com/rapidsai/cudf/pull/12825)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `Index.is_*` methods ([#12820](https://github.com/rapidsai/cudf/pull/12820)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `datetime_is_numeric` from `describe` ([#12818](https://github.com/rapidsai/cudf/pull/12818)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `na_sentinel` in `factorize` ([#12817](https://github.com/rapidsai/cudf/pull/12817)) [@galipremsagar](https://github.com/galipremsagar) +- Make string methods return a Series with a useful Index ([#12814](https://github.com/rapidsai/cudf/pull/12814)) [@shwina](https://github.com/shwina) +- Produce useful guidance on overflow error in `to_csv` ([#12705](https://github.com/rapidsai/cudf/pull/12705)) [@wence-](https://github.com/wence-) +- Move `strings_udf` code into cuDF ([#12669](https://github.com/rapidsai/cudf/pull/12669)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Remove cudf::strings::repeat_strings_output_sizes and optional parameter from cudf::strings::repeat_strings ([#12609](https://github.com/rapidsai/cudf/pull/12609)) [@davidwendt](https://github.com/davidwendt) +- Replace message parsing with throwing more specific exceptions ([#12426](https://github.com/rapidsai/cudf/pull/12426)) [@vyasr](https://github.com/vyasr) + +## 🐛 Bug Fixes + +- Fix memcheck script to execute only _TEST files found in bin/gtests/libcudf ([#13006](https://github.com/rapidsai/cudf/pull/13006)) [@davidwendt](https://github.com/davidwendt) +- Fix `DataFrame` constructor to broadcast scalar inputs properly ([#12997](https://github.com/rapidsai/cudf/pull/12997)) [@galipremsagar](https://github.com/galipremsagar) +- Drop `force_nullable_schema` from chunked parquet writer ([#12996](https://github.com/rapidsai/cudf/pull/12996)) [@galipremsagar](https://github.com/galipremsagar) +- Fix gtest column utility comparator diff reporting ([#12995](https://github.com/rapidsai/cudf/pull/12995)) [@davidwendt](https://github.com/davidwendt) +- Handle index names while performing `groupby` ([#12992](https://github.com/rapidsai/cudf/pull/12992)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `__setitem__` on string columns when the scalar value ends in a null byte ([#12991](https://github.com/rapidsai/cudf/pull/12991)) [@wence-](https://github.com/wence-) +- Fix `sort_values` when column is all empty strings ([#12988](https://github.com/rapidsai/cudf/pull/12988)) [@eriknw](https://github.com/eriknw) +- Remove unused variable and fix memory issue in ORC writer ([#12984](https://github.com/rapidsai/cudf/pull/12984)) [@ttnghia](https://github.com/ttnghia) +- Pre-emptive fix for upstream `dask.dataframe.read_parquet` changes ([#12983](https://github.com/rapidsai/cudf/pull/12983)) [@rjzamora](https://github.com/rjzamora) +- Remove MANIFEST.in use auto-generated one for sdists and package_data for wheels ([#12960](https://github.com/rapidsai/cudf/pull/12960)) [@vyasr](https://github.com/vyasr) +- Update to use rapids-export(COMPONENTS) feature. ([#12959](https://github.com/rapidsai/cudf/pull/12959)) [@robertmaynard](https://github.com/robertmaynard) +- cudftestutil supports static gtest dependencies ([#12957](https://github.com/rapidsai/cudf/pull/12957)) [@robertmaynard](https://github.com/robertmaynard) +- Include gtest in build environment. ([#12956](https://github.com/rapidsai/cudf/pull/12956)) [@vyasr](https://github.com/vyasr) +- Correctly handle scalar indices in `Index.__getitem__` ([#12955](https://github.com/rapidsai/cudf/pull/12955)) [@wence-](https://github.com/wence-) +- Avoid building cython twice ([#12945](https://github.com/rapidsai/cudf/pull/12945)) [@galipremsagar](https://github.com/galipremsagar) +- Fix set index error for Series rolling window operations ([#12942](https://github.com/rapidsai/cudf/pull/12942)) [@galipremsagar](https://github.com/galipremsagar) +- Fix calculation of null counts for Parquet statistics ([#12938](https://github.com/rapidsai/cudf/pull/12938)) [@etseidl](https://github.com/etseidl) +- Preserve integer dtype of hive-partitioned column containing nulls ([#12930](https://github.com/rapidsai/cudf/pull/12930)) [@rjzamora](https://github.com/rjzamora) +- Use get_current_device_resource for intermediate allocations in COLLECT_LIST window code ([#12927](https://github.com/rapidsai/cudf/pull/12927)) [@karthikeyann](https://github.com/karthikeyann) +- Mark dlpack tensor deleter as noexcept to match PyCapsule_Destructor signature. ([#12921](https://github.com/rapidsai/cudf/pull/12921)) [@bdice](https://github.com/bdice) +- Fix conda recipe post-link.sh typo ([#12916](https://github.com/rapidsai/cudf/pull/12916)) [@pentschev](https://github.com/pentschev) +- min_rows and num_rows are swapped in ComputePageSizes declaration in Parquet reader ([#12886](https://github.com/rapidsai/cudf/pull/12886)) [@etseidl](https://github.com/etseidl) +- Expect cupy to now support bool arrays for dlpack. ([#12883](https://github.com/rapidsai/cudf/pull/12883)) [@vyasr](https://github.com/vyasr) +- Use python -m pytest for nightly wheel tests ([#12871](https://github.com/rapidsai/cudf/pull/12871)) [@bdice](https://github.com/bdice) +- Parquet writer column_size() should return a size_t ([#12870](https://github.com/rapidsai/cudf/pull/12870)) [@etseidl](https://github.com/etseidl) +- Fix cudf::hash_partition kernel launch error with decimal128 types ([#12863](https://github.com/rapidsai/cudf/pull/12863)) [@davidwendt](https://github.com/davidwendt) +- Fix an issue with parquet chunked reader undercounting string lengths. ([#12859](https://github.com/rapidsai/cudf/pull/12859)) [@nvdbaranec](https://github.com/nvdbaranec) +- Remove tokenizers pre-install pinning. ([#12854](https://github.com/rapidsai/cudf/pull/12854)) [@vyasr](https://github.com/vyasr) +- Fix parquet `RangeIndex` bug ([#12838](https://github.com/rapidsai/cudf/pull/12838)) [@rjzamora](https://github.com/rjzamora) +- Remove KAFKA_HOST_TEST from compute-sanitizer check ([#12831](https://github.com/rapidsai/cudf/pull/12831)) [@davidwendt](https://github.com/davidwendt) +- Make string methods return a Series with a useful Index ([#12814](https://github.com/rapidsai/cudf/pull/12814)) [@shwina](https://github.com/shwina) +- Tell cudf_kafka to use header-only fmt ([#12796](https://github.com/rapidsai/cudf/pull/12796)) [@vyasr](https://github.com/vyasr) +- Add `GroupBy.dtypes` ([#12783](https://github.com/rapidsai/cudf/pull/12783)) [@galipremsagar](https://github.com/galipremsagar) +- Fix a leak in a test and clarify some test names ([#12781](https://github.com/rapidsai/cudf/pull/12781)) [@revans2](https://github.com/revans2) +- Fix bug in all-null list due to join_list_elements special handling ([#12767](https://github.com/rapidsai/cudf/pull/12767)) [@karthikeyann](https://github.com/karthikeyann) +- Add try/except for expected null-schema error in read_parquet ([#12756](https://github.com/rapidsai/cudf/pull/12756)) [@rjzamora](https://github.com/rjzamora) +- Throw an exception if an unsupported page encoding is detected in Parquet reader ([#12754](https://github.com/rapidsai/cudf/pull/12754)) [@etseidl](https://github.com/etseidl) +- Fix a bug with `num_keys` in `_scatter_by_slice` ([#12749](https://github.com/rapidsai/cudf/pull/12749)) [@thomcom](https://github.com/thomcom) +- Bump pinned rapids wheel deps to 23.4 ([#12735](https://github.com/rapidsai/cudf/pull/12735)) [@sevagh](https://github.com/sevagh) +- Rework logic in cudf::strings::split_record to improve performance ([#12729](https://github.com/rapidsai/cudf/pull/12729)) [@davidwendt](https://github.com/davidwendt) +- Add `always_nullable` flag to Dremel encoding ([#12727](https://github.com/rapidsai/cudf/pull/12727)) [@divyegala](https://github.com/divyegala) +- Fix memcheck read error in compound segmented reduce ([#12722](https://github.com/rapidsai/cudf/pull/12722)) [@davidwendt](https://github.com/davidwendt) +- Fix faulty conditional logic in JIT `GroupBy.apply` ([#12706](https://github.com/rapidsai/cudf/pull/12706)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Produce useful guidance on overflow error in `to_csv` ([#12705](https://github.com/rapidsai/cudf/pull/12705)) [@wence-](https://github.com/wence-) +- Handle parquet list data corner case ([#12698](https://github.com/rapidsai/cudf/pull/12698)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix missing trailing comma in json writer ([#12688](https://github.com/rapidsai/cudf/pull/12688)) [@karthikeyann](https://github.com/karthikeyann) +- Remove child fom newCudaAsyncMemoryResource ([#12681](https://github.com/rapidsai/cudf/pull/12681)) [@abellina](https://github.com/abellina) +- Handle bool types in `round` API ([#12670](https://github.com/rapidsai/cudf/pull/12670)) [@galipremsagar](https://github.com/galipremsagar) +- Ensure all of device bitmask is initialized in from_arrow ([#12668](https://github.com/rapidsai/cudf/pull/12668)) [@wence-](https://github.com/wence-) +- Fix `from_arrow` to load a sliced arrow table ([#12665](https://github.com/rapidsai/cudf/pull/12665)) [@galipremsagar](https://github.com/galipremsagar) +- Fix dask-cudf read_parquet bug for multi-file aggregation ([#12663](https://github.com/rapidsai/cudf/pull/12663)) [@rjzamora](https://github.com/rjzamora) +- Fix AllocateLikeTest gtests reading uninitialized null-mask ([#12643](https://github.com/rapidsai/cudf/pull/12643)) [@davidwendt](https://github.com/davidwendt) +- Fix `find_common_dtype` and `values` to handle complex dtypes ([#12537](https://github.com/rapidsai/cudf/pull/12537)) [@galipremsagar](https://github.com/galipremsagar) +- Fix fetching of MultiIndex values when a label is passed ([#12521](https://github.com/rapidsai/cudf/pull/12521)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `Series` comparison vs scalars ([#12519](https://github.com/rapidsai/cudf/pull/12519)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Allow casting from `UDFString` back to `StringView` to call methods in `strings_udf` ([#12363](https://github.com/rapidsai/cudf/pull/12363)) [@brandon-b-miller](https://github.com/brandon-b-miller) + +## 📖 Documentation + +- Fix `GroupBy.apply` doc examples rendering ([#12994](https://github.com/rapidsai/cudf/pull/12994)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- add sphinx building and s3 uploading for dask-cudf docs ([#12982](https://github.com/rapidsai/cudf/pull/12982)) [@quasiben](https://github.com/quasiben) +- Add developer documentation forbidding default parameters in detail APIs ([#12978](https://github.com/rapidsai/cudf/pull/12978)) [@vyasr](https://github.com/vyasr) +- Add README symlink for dask-cudf. ([#12946](https://github.com/rapidsai/cudf/pull/12946)) [@bdice](https://github.com/bdice) +- Remove return type from [@return doxygen tags ([#12908](https://github.com/rapidsai/cudf/pull/12908)) @davidwendt](https://github.com/return doxygen tags ([#12908](https://github.com/rapidsai/cudf/pull/12908)) @davidwendt) +- Fix docs build to be `pydata-sphinx-theme=0.13.0` compatible ([#12874](https://github.com/rapidsai/cudf/pull/12874)) [@galipremsagar](https://github.com/galipremsagar) +- Add skeleton API and prose documentation for dask-cudf ([#12725](https://github.com/rapidsai/cudf/pull/12725)) [@wence-](https://github.com/wence-) +- Enable doctests for GroupBy methods ([#12658](https://github.com/rapidsai/cudf/pull/12658)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add comment about CUB patch for SegmentedSortInt.Bool gtest ([#12611](https://github.com/rapidsai/cudf/pull/12611)) [@davidwendt](https://github.com/davidwendt) + +## 🚀 New Features + +- Add JNI method for strings::replace multi variety ([#12979](https://github.com/rapidsai/cudf/pull/12979)) [@NVnavkumar](https://github.com/NVnavkumar) +- Add nunique aggregation support for cudf::segmented_reduce ([#12972](https://github.com/rapidsai/cudf/pull/12972)) [@davidwendt](https://github.com/davidwendt) +- Refactor orc chunked writer ([#12949](https://github.com/rapidsai/cudf/pull/12949)) [@ttnghia](https://github.com/ttnghia) +- Make Parquet writer `nullable` option application to single table writes ([#12933](https://github.com/rapidsai/cudf/pull/12933)) [@vuule](https://github.com/vuule) +- Refactor `io::orc::ProtobufWriter` ([#12877](https://github.com/rapidsai/cudf/pull/12877)) [@ttnghia](https://github.com/ttnghia) +- Make timezone table independent from ORC ([#12805](https://github.com/rapidsai/cudf/pull/12805)) [@vuule](https://github.com/vuule) +- Cache JIT `GroupBy.apply` functions ([#12802](https://github.com/rapidsai/cudf/pull/12802)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Implement initial support for avro logical types ([#6482) (#12788](https://github.com/rapidsai/cudf/pull/6482) (#12788)) [@tpn](https://github.com/tpn) +- Update `tests/column_utilities` to use `experimental::equality` row comparator ([#12777](https://github.com/rapidsai/cudf/pull/12777)) [@divyegala](https://github.com/divyegala) +- Update `distinct/unique_count` to `experimental::row` hasher/comparator ([#12776](https://github.com/rapidsai/cudf/pull/12776)) [@divyegala](https://github.com/divyegala) +- Update `hash_partition` to use `experimental::row::row_hasher` ([#12761](https://github.com/rapidsai/cudf/pull/12761)) [@divyegala](https://github.com/divyegala) +- Update `is_sorted` to use `experimental::row::lexicographic` ([#12752](https://github.com/rapidsai/cudf/pull/12752)) [@divyegala](https://github.com/divyegala) +- Update default data source in cuio reader benchmarks ([#12740](https://github.com/rapidsai/cudf/pull/12740)) [@PointKernel](https://github.com/PointKernel) +- Reenable stream identification library in CI ([#12714](https://github.com/rapidsai/cudf/pull/12714)) [@vyasr](https://github.com/vyasr) +- Add `regex_program` strings splitting java APIs and tests ([#12713](https://github.com/rapidsai/cudf/pull/12713)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Add `regex_program` strings replacing java APIs and tests ([#12701](https://github.com/rapidsai/cudf/pull/12701)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Add `regex_program` strings extract java APIs and tests ([#12699](https://github.com/rapidsai/cudf/pull/12699)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Variable fragment sizes for Parquet writer ([#12685](https://github.com/rapidsai/cudf/pull/12685)) [@etseidl](https://github.com/etseidl) +- Add segmented reduction support for fixed-point types ([#12680](https://github.com/rapidsai/cudf/pull/12680)) [@davidwendt](https://github.com/davidwendt) +- Move `strings_udf` code into cuDF ([#12669](https://github.com/rapidsai/cudf/pull/12669)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add `regex_program` searching APIs and related java classes ([#12666](https://github.com/rapidsai/cudf/pull/12666)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Add logging to libcudf ([#12637](https://github.com/rapidsai/cudf/pull/12637)) [@vuule](https://github.com/vuule) +- Add compound aggregations to cudf::segmented_reduce ([#12573](https://github.com/rapidsai/cudf/pull/12573)) [@davidwendt](https://github.com/davidwendt) +- Convert `rank` to use to experimental row comparators ([#12481](https://github.com/rapidsai/cudf/pull/12481)) [@divyegala](https://github.com/divyegala) +- Use rapids-cmake parallel testing feature ([#12451](https://github.com/rapidsai/cudf/pull/12451)) [@robertmaynard](https://github.com/robertmaynard) +- Enable detection of undesired stream usage ([#12089](https://github.com/rapidsai/cudf/pull/12089)) [@vyasr](https://github.com/vyasr) + +## 🛠️ Improvements + +- Pin `dask` and `distributed` for release ([#13070](https://github.com/rapidsai/cudf/pull/13070)) [@galipremsagar](https://github.com/galipremsagar) +- Pin cupy in wheel tests to supported versions ([#13041](https://github.com/rapidsai/cudf/pull/13041)) [@vyasr](https://github.com/vyasr) +- Pin numba version ([#13001](https://github.com/rapidsai/cudf/pull/13001)) [@vyasr](https://github.com/vyasr) +- Rework gtests SequenceTest to remove using namepace cudf ([#12985](https://github.com/rapidsai/cudf/pull/12985)) [@davidwendt](https://github.com/davidwendt) +- Stop setting package version attribute in wheels ([#12977](https://github.com/rapidsai/cudf/pull/12977)) [@vyasr](https://github.com/vyasr) +- Move detail reduction functions to cudf::reduction::detail namespace ([#12971](https://github.com/rapidsai/cudf/pull/12971)) [@davidwendt](https://github.com/davidwendt) +- Remove default detail mrs: part7 ([#12970](https://github.com/rapidsai/cudf/pull/12970)) [@vyasr](https://github.com/vyasr) +- Remove default detail mrs: part6 ([#12969](https://github.com/rapidsai/cudf/pull/12969)) [@vyasr](https://github.com/vyasr) +- Remove default detail mrs: part5 ([#12968](https://github.com/rapidsai/cudf/pull/12968)) [@vyasr](https://github.com/vyasr) +- Remove default detail mrs: part4 ([#12967](https://github.com/rapidsai/cudf/pull/12967)) [@vyasr](https://github.com/vyasr) +- Remove default detail mrs: part3 ([#12966](https://github.com/rapidsai/cudf/pull/12966)) [@vyasr](https://github.com/vyasr) +- Remove default detail mrs: part2 ([#12965](https://github.com/rapidsai/cudf/pull/12965)) [@vyasr](https://github.com/vyasr) +- Remove default detail mrs: part1 ([#12964](https://github.com/rapidsai/cudf/pull/12964)) [@vyasr](https://github.com/vyasr) +- Add `force_nullable_schema` parameter to Parquet writer. ([#12952](https://github.com/rapidsai/cudf/pull/12952)) [@galipremsagar](https://github.com/galipremsagar) +- Declare a different name for nan_equality.UNEQUAL to prevent Cython warnings. ([#12947](https://github.com/rapidsai/cudf/pull/12947)) [@bdice](https://github.com/bdice) +- Remove remaining default stream parameters ([#12943](https://github.com/rapidsai/cudf/pull/12943)) [@vyasr](https://github.com/vyasr) +- Fix cudf::segmented_reduce gtest for ANY aggregation ([#12940](https://github.com/rapidsai/cudf/pull/12940)) [@davidwendt](https://github.com/davidwendt) +- Implement `groupby.head` and `groupby.tail` ([#12939](https://github.com/rapidsai/cudf/pull/12939)) [@wence-](https://github.com/wence-) +- Fix libcudf gtests to pass null-count=0 for empty validity masks ([#12923](https://github.com/rapidsai/cudf/pull/12923)) [@davidwendt](https://github.com/davidwendt) +- Migrate parquet encoding to use experimental row operators ([#12918](https://github.com/rapidsai/cudf/pull/12918)) [@PointKernel](https://github.com/PointKernel) +- Fix benchmarks coded in namespace cudf and using namespace cudf ([#12915](https://github.com/rapidsai/cudf/pull/12915)) [@karthikeyann](https://github.com/karthikeyann) +- Fix io/text gtests coded in namespace cudf::test ([#12914](https://github.com/rapidsai/cudf/pull/12914)) [@karthikeyann](https://github.com/karthikeyann) +- Pass `SCCACHE_S3_USE_SSL` to conda builds ([#12910](https://github.com/rapidsai/cudf/pull/12910)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix FST, JSON gtests & benchmarks coded in namespace cudf::test ([#12907](https://github.com/rapidsai/cudf/pull/12907)) [@karthikeyann](https://github.com/karthikeyann) +- Generate pyproject dependencies using dfg ([#12906](https://github.com/rapidsai/cudf/pull/12906)) [@vyasr](https://github.com/vyasr) +- Update libcudf counting functions to specify cudf::size_type ([#12904](https://github.com/rapidsai/cudf/pull/12904)) [@davidwendt](https://github.com/davidwendt) +- Fix `moto` env vars & pass `AWS_SESSION_TOKEN` to conda builds ([#12902](https://github.com/rapidsai/cudf/pull/12902)) [@ajschmidt8](https://github.com/ajschmidt8) +- Rewrite CSV writer benchmark with nvbench ([#12901](https://github.com/rapidsai/cudf/pull/12901)) [@PointKernel](https://github.com/PointKernel) +- Rework some code logic to reduce iterator and comparator inlining to improve compile time ([#12900](https://github.com/rapidsai/cudf/pull/12900)) [@davidwendt](https://github.com/davidwendt) +- Deprecate `line_terminator` in favor of `lineterminator` in `to_csv` ([#12896](https://github.com/rapidsai/cudf/pull/12896)) [@wence-](https://github.com/wence-) +- Add `stream` and `mr` parameters for `structs::detail::flatten_nested_columns` ([#12892](https://github.com/rapidsai/cudf/pull/12892)) [@ttnghia](https://github.com/ttnghia) +- Deprecate libcudf regex APIs accepting pattern strings directly ([#12891](https://github.com/rapidsai/cudf/pull/12891)) [@davidwendt](https://github.com/davidwendt) +- Remove default parameters from detail headers in include ([#12888](https://github.com/rapidsai/cudf/pull/12888)) [@vyasr](https://github.com/vyasr) +- Update minimum `pandas` and `numpy` pinnings ([#12887](https://github.com/rapidsai/cudf/pull/12887)) [@galipremsagar](https://github.com/galipremsagar) +- Implement `groupby.sample` ([#12882](https://github.com/rapidsai/cudf/pull/12882)) [@wence-](https://github.com/wence-) +- Update JNI build ENV default to gcc 11 ([#12881](https://github.com/rapidsai/cudf/pull/12881)) [@pxLi](https://github.com/pxLi) +- Change return type of `cudf::structs::detail::flatten_nested_columns` to smart pointer ([#12878](https://github.com/rapidsai/cudf/pull/12878)) [@ttnghia](https://github.com/ttnghia) +- Fix passing seed parameter to MurmurHash3_32 in cudf::hash() function ([#12875](https://github.com/rapidsai/cudf/pull/12875)) [@davidwendt](https://github.com/davidwendt) +- Remove manual artifact upload step in CI ([#12869](https://github.com/rapidsai/cudf/pull/12869)) [@ajschmidt8](https://github.com/ajschmidt8) +- Update to GCC 11 ([#12868](https://github.com/rapidsai/cudf/pull/12868)) [@bdice](https://github.com/bdice) +- Fix null hive-partition behavior in dask-cudf parquet ([#12866](https://github.com/rapidsai/cudf/pull/12866)) [@rjzamora](https://github.com/rjzamora) +- Update to protobuf>=4.21.6,<4.22. ([#12864](https://github.com/rapidsai/cudf/pull/12864)) [@bdice](https://github.com/bdice) +- Update RMM allocators ([#12861](https://github.com/rapidsai/cudf/pull/12861)) [@pentschev](https://github.com/pentschev) +- Improve performance for replace-multi for long strings ([#12858](https://github.com/rapidsai/cudf/pull/12858)) [@davidwendt](https://github.com/davidwendt) +- Drop Python 3.7 handling for pickle protocol 4 ([#12857](https://github.com/rapidsai/cudf/pull/12857)) [@jakirkham](https://github.com/jakirkham) +- Migrate as much as possible to pyproject.toml ([#12850](https://github.com/rapidsai/cudf/pull/12850)) [@vyasr](https://github.com/vyasr) +- Enable nbqa pre-commit hooks for isort and black. ([#12848](https://github.com/rapidsai/cudf/pull/12848)) [@bdice](https://github.com/bdice) +- Setting a threshold for KvikIO IO ([#12841](https://github.com/rapidsai/cudf/pull/12841)) [@madsbk](https://github.com/madsbk) +- Update datasets download URL ([#12840](https://github.com/rapidsai/cudf/pull/12840)) [@jjacobelli](https://github.com/jjacobelli) +- Make docs builds less verbose ([#12836](https://github.com/rapidsai/cudf/pull/12836)) [@AyodeAwe](https://github.com/AyodeAwe) +- Consolidate linter configs into pyproject.toml ([#12834](https://github.com/rapidsai/cudf/pull/12834)) [@vyasr](https://github.com/vyasr) +- Deprecate `names` & `dtype` in `Index.copy` ([#12825](https://github.com/rapidsai/cudf/pull/12825)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `inplace` parameters in categorical methods ([#12824](https://github.com/rapidsai/cudf/pull/12824)) [@galipremsagar](https://github.com/galipremsagar) +- Add optional text file support to ninja-log utility ([#12823](https://github.com/rapidsai/cudf/pull/12823)) [@davidwendt](https://github.com/davidwendt) +- Deprecate `Index.is_*` methods ([#12820](https://github.com/rapidsai/cudf/pull/12820)) [@galipremsagar](https://github.com/galipremsagar) +- Add dfg as a pre-commit hook ([#12819](https://github.com/rapidsai/cudf/pull/12819)) [@vyasr](https://github.com/vyasr) +- Deprecate `datetime_is_numeric` from `describe` ([#12818](https://github.com/rapidsai/cudf/pull/12818)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `na_sentinel` in `factorize` ([#12817](https://github.com/rapidsai/cudf/pull/12817)) [@galipremsagar](https://github.com/galipremsagar) +- Shuffling read into a sub function in parquet read ([#12809](https://github.com/rapidsai/cudf/pull/12809)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fixing parquet coalescing of reads ([#12808](https://github.com/rapidsai/cudf/pull/12808)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- CI: Remove specification of manual stage for check_style.sh script. ([#12803](https://github.com/rapidsai/cudf/pull/12803)) [@csadorf](https://github.com/csadorf) +- Add compute-sanitizer github workflow action to nightly tests ([#12800](https://github.com/rapidsai/cudf/pull/12800)) [@davidwendt](https://github.com/davidwendt) +- Enable groupby std and variance aggregation types in libcudf Debug build ([#12799](https://github.com/rapidsai/cudf/pull/12799)) [@davidwendt](https://github.com/davidwendt) +- Expose seed argument to hash_values ([#12795](https://github.com/rapidsai/cudf/pull/12795)) [@ayushdg](https://github.com/ayushdg) +- Fix groupby gtests coded in namespace cudf::test ([#12784](https://github.com/rapidsai/cudf/pull/12784)) [@davidwendt](https://github.com/davidwendt) +- Improve performance for cudf::strings::count_characters for long strings ([#12779](https://github.com/rapidsai/cudf/pull/12779)) [@davidwendt](https://github.com/davidwendt) +- Deallocate encoded data in ORC writer immediately after compression ([#12770](https://github.com/rapidsai/cudf/pull/12770)) [@vuule](https://github.com/vuule) +- Stop force pulling fmt in nvbench. ([#12768](https://github.com/rapidsai/cudf/pull/12768)) [@vyasr](https://github.com/vyasr) +- Remove now redundant cuda initialization ([#12758](https://github.com/rapidsai/cudf/pull/12758)) [@vyasr](https://github.com/vyasr) +- Adds JSON reader, writer io benchmark ([#12753](https://github.com/rapidsai/cudf/pull/12753)) [@karthikeyann](https://github.com/karthikeyann) +- Use test paths relative to package directory. ([#12751](https://github.com/rapidsai/cudf/pull/12751)) [@bdice](https://github.com/bdice) +- Add build metrics report as artifact to cpp-build workflow ([#12750](https://github.com/rapidsai/cudf/pull/12750)) [@davidwendt](https://github.com/davidwendt) +- Add JNI methods for detecting and purging non-empty nulls from LIST and STRUCT ([#12742](https://github.com/rapidsai/cudf/pull/12742)) [@razajafri](https://github.com/razajafri) +- Stop using versioneer to manage versions ([#12741](https://github.com/rapidsai/cudf/pull/12741)) [@vyasr](https://github.com/vyasr) +- Reduce error handling verbosity in CI tests scripts ([#12738](https://github.com/rapidsai/cudf/pull/12738)) [@AjayThorve](https://github.com/AjayThorve) +- Reduce the number of test cases in multibyte_split benchmark ([#12737](https://github.com/rapidsai/cudf/pull/12737)) [@PointKernel](https://github.com/PointKernel) +- Update shared workflow branches ([#12733](https://github.com/rapidsai/cudf/pull/12733)) [@ajschmidt8](https://github.com/ajschmidt8) +- JNI switches to nested JSON reader ([#12732](https://github.com/rapidsai/cudf/pull/12732)) [@res-life](https://github.com/res-life) +- Changing `cudf::io::source_info` to use `cudf::host_span<std::byte>` in a non-breaking form ([#12730](https://github.com/rapidsai/cudf/pull/12730)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add nvbench environment class for initializing RMM in benchmarks ([#12728](https://github.com/rapidsai/cudf/pull/12728)) [@davidwendt](https://github.com/davidwendt) +- Split C++ and Python build dependencies into separate lists. ([#12724](https://github.com/rapidsai/cudf/pull/12724)) [@bdice](https://github.com/bdice) +- Add build dependencies to Java tests. ([#12723](https://github.com/rapidsai/cudf/pull/12723)) [@bdice](https://github.com/bdice) +- Allow setting the seed argument for hash partition ([#12715](https://github.com/rapidsai/cudf/pull/12715)) [@firestarman](https://github.com/firestarman) +- Remove gpuCI scripts. ([#12712](https://github.com/rapidsai/cudf/pull/12712)) [@bdice](https://github.com/bdice) +- Unpin `dask` and `distributed` for development ([#12710](https://github.com/rapidsai/cudf/pull/12710)) [@galipremsagar](https://github.com/galipremsagar) +- `partition_by_hash()`: use `_split()` ([#12704](https://github.com/rapidsai/cudf/pull/12704)) [@madsbk](https://github.com/madsbk) +- Remove DataFrame.quantiles from docs. ([#12684](https://github.com/rapidsai/cudf/pull/12684)) [@bdice](https://github.com/bdice) +- Fast path for `experimental::row::equality` ([#12676](https://github.com/rapidsai/cudf/pull/12676)) [@divyegala](https://github.com/divyegala) +- Move date to build string in `conda` recipe ([#12661](https://github.com/rapidsai/cudf/pull/12661)) [@ajschmidt8](https://github.com/ajschmidt8) +- Refactor reduction logic for fixed-point types ([#12652](https://github.com/rapidsai/cudf/pull/12652)) [@davidwendt](https://github.com/davidwendt) +- Pay off some JNI RMM API tech debt ([#12632](https://github.com/rapidsai/cudf/pull/12632)) [@revans2](https://github.com/revans2) +- Merge `copy-on-write` feature branch into `branch-23.04` ([#12619](https://github.com/rapidsai/cudf/pull/12619)) [@galipremsagar](https://github.com/galipremsagar) +- Remove cudf::strings::repeat_strings_output_sizes and optional parameter from cudf::strings::repeat_strings ([#12609](https://github.com/rapidsai/cudf/pull/12609)) [@davidwendt](https://github.com/davidwendt) +- Pin cuda-nvrtc. ([#12606](https://github.com/rapidsai/cudf/pull/12606)) [@bdice](https://github.com/bdice) +- Remove cudf::test::print calls from libcudf gtests ([#12604](https://github.com/rapidsai/cudf/pull/12604)) [@davidwendt](https://github.com/davidwendt) +- Init JNI version 23.04.0-SNAPSHOT ([#12599](https://github.com/rapidsai/cudf/pull/12599)) [@pxLi](https://github.com/pxLi) +- Add performance benchmarks to user facing docs ([#12595](https://github.com/rapidsai/cudf/pull/12595)) [@galipremsagar](https://github.com/galipremsagar) +- Add docs build job ([#12592](https://github.com/rapidsai/cudf/pull/12592)) [@AyodeAwe](https://github.com/AyodeAwe) +- Replace message parsing with throwing more specific exceptions ([#12426](https://github.com/rapidsai/cudf/pull/12426)) [@vyasr](https://github.com/vyasr) +- Support conversion to/from cudf in dask.dataframe.core.to_backend ([#12380](https://github.com/rapidsai/cudf/pull/12380)) [@rjzamora](https://github.com/rjzamora) + +# cuDF 23.02.00 (9 Feb 2023) + +## 🚨 Breaking Changes + +- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar) +- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar) +- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule) +- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule) +- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar) +- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann) +- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar) +- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule) +- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann) +- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr) +- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann) +- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia) +- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia) +- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice) +- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-) + +## 🐛 Bug Fixes + +- Fix a mask data corruption in UDF ([#12647](https://github.com/rapidsai/cudf/pull/12647)) [@galipremsagar](https://github.com/galipremsagar) +- pre-commit: Update isort version to 5.12.0 ([#12645](https://github.com/rapidsai/cudf/pull/12645)) [@wence-](https://github.com/wence-) +- tests: Skip cuInit tests if cuda-gdb is not found or not working ([#12644](https://github.com/rapidsai/cudf/pull/12644)) [@wence-](https://github.com/wence-) +- Revert regex program java APIs and tests ([#12639](https://github.com/rapidsai/cudf/pull/12639)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Fix leaks in ColumnVectorTest ([#12625](https://github.com/rapidsai/cudf/pull/12625)) [@jlowe](https://github.com/jlowe) +- Handle when spillable buffers own each other ([#12607](https://github.com/rapidsai/cudf/pull/12607)) [@madsbk](https://github.com/madsbk) +- Fix incorrect null counts for sliced columns in JCudfSerialization ([#12589](https://github.com/rapidsai/cudf/pull/12589)) [@jlowe](https://github.com/jlowe) +- lists: Transfer dtypes correctly through list.get ([#12586](https://github.com/rapidsai/cudf/pull/12586)) [@wence-](https://github.com/wence-) +- timedelta: Don't go via float intermediates for floordiv ([#12585](https://github.com/rapidsai/cudf/pull/12585)) [@wence-](https://github.com/wence-) +- Fixing BUG, `get_next_chunk()` should use the blocking function `device_read()` ([#12584](https://github.com/rapidsai/cudf/pull/12584)) [@madsbk](https://github.com/madsbk) +- Make JNI QuoteStyle accessible outside ai.rapids.cudf ([#12572](https://github.com/rapidsai/cudf/pull/12572)) [@mythrocks](https://github.com/mythrocks) +- `partition_by_hash()`: support index ([#12554](https://github.com/rapidsai/cudf/pull/12554)) [@madsbk](https://github.com/madsbk) +- Mixed Join benchmark bug due to wrong conditional column ([#12553](https://github.com/rapidsai/cudf/pull/12553)) [@divyegala](https://github.com/divyegala) +- Update List Lexicographical Comparator ([#12538](https://github.com/rapidsai/cudf/pull/12538)) [@divyegala](https://github.com/divyegala) +- Dynamically read PTX version ([#12534](https://github.com/rapidsai/cudf/pull/12534)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- build.sh switch to use `RAPIDS` magic value ([#12525](https://github.com/rapidsai/cudf/pull/12525)) [@robertmaynard](https://github.com/robertmaynard) +- Loosen runtime arrow pinning ([#12522](https://github.com/rapidsai/cudf/pull/12522)) [@vyasr](https://github.com/vyasr) +- Enable metadata transfer for complex types in transpose ([#12491](https://github.com/rapidsai/cudf/pull/12491)) [@galipremsagar](https://github.com/galipremsagar) +- Fix issues with parquet chunked reader ([#12488](https://github.com/rapidsai/cudf/pull/12488)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix missing metadata transfer in concat for `ListColumn` ([#12487](https://github.com/rapidsai/cudf/pull/12487)) [@galipremsagar](https://github.com/galipremsagar) +- Rename libcudf substring source files to slice ([#12484](https://github.com/rapidsai/cudf/pull/12484)) [@davidwendt](https://github.com/davidwendt) +- Fix compile issue with arrow 10 ([#12465](https://github.com/rapidsai/cudf/pull/12465)) [@ttnghia](https://github.com/ttnghia) +- Fix List offsets bug in mixed type list column in nested JSON reader ([#12447](https://github.com/rapidsai/cudf/pull/12447)) [@karthikeyann](https://github.com/karthikeyann) +- Fix xfail incompatibilities ([#12423](https://github.com/rapidsai/cudf/pull/12423)) [@vyasr](https://github.com/vyasr) +- Fix bug in Parquet column index encoding ([#12404](https://github.com/rapidsai/cudf/pull/12404)) [@etseidl](https://github.com/etseidl) +- When building Arrow shared look for a shared OpenSSL ([#12396](https://github.com/rapidsai/cudf/pull/12396)) [@robertmaynard](https://github.com/robertmaynard) +- Fix get_json_object to return empty column on empty input ([#12384](https://github.com/rapidsai/cudf/pull/12384)) [@davidwendt](https://github.com/davidwendt) +- Pin arrow 9 in testing dependencies to prevent conda solve issues ([#12377](https://github.com/rapidsai/cudf/pull/12377)) [@vyasr](https://github.com/vyasr) +- Fix reductions any/all return value for empty input ([#12374](https://github.com/rapidsai/cudf/pull/12374)) [@davidwendt](https://github.com/davidwendt) +- Fix debug compile errors in parquet.hpp ([#12372](https://github.com/rapidsai/cudf/pull/12372)) [@davidwendt](https://github.com/davidwendt) +- Purge non-empty nulls in `cudf::make_lists_column` ([#12370](https://github.com/rapidsai/cudf/pull/12370)) [@ttnghia](https://github.com/ttnghia) +- Use correct memory resource in io::make_column ([#12364](https://github.com/rapidsai/cudf/pull/12364)) [@vyasr](https://github.com/vyasr) +- Add code to detect possible malformed page data in parquet files. ([#12360](https://github.com/rapidsai/cudf/pull/12360)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fail loudly to avoid data corruption with unsupported input in `read_orc` ([#12325](https://github.com/rapidsai/cudf/pull/12325)) [@vuule](https://github.com/vuule) +- Fix NumericPairIteratorTest for float values ([#12306](https://github.com/rapidsai/cudf/pull/12306)) [@davidwendt](https://github.com/davidwendt) +- Fixes memory allocation in nested JSON tokenizer ([#12300](https://github.com/rapidsai/cudf/pull/12300)) [@elstehle](https://github.com/elstehle) +- Reconstruct dtypes correctly for list aggs of struct columns ([#12290](https://github.com/rapidsai/cudf/pull/12290)) [@wence-](https://github.com/wence-) +- Fix regex \A and \Z to strictly match string begin/end ([#12282](https://github.com/rapidsai/cudf/pull/12282)) [@davidwendt](https://github.com/davidwendt) +- Fix compile issue in `json_chunked_reader.cpp` ([#12280](https://github.com/rapidsai/cudf/pull/12280)) [@ttnghia](https://github.com/ttnghia) +- Change reductions any/all to return valid values for empty input ([#12279](https://github.com/rapidsai/cudf/pull/12279)) [@davidwendt](https://github.com/davidwendt) +- Only exclude join keys that are indices from key columns ([#12271](https://github.com/rapidsai/cudf/pull/12271)) [@wence-](https://github.com/wence-) +- Fix spill to device limit ([#12252](https://github.com/rapidsai/cudf/pull/12252)) [@madsbk](https://github.com/madsbk) +- Correct behaviour of sort in `concat` for singleton concatenations ([#12247](https://github.com/rapidsai/cudf/pull/12247)) [@wence-](https://github.com/wence-) +- Purge non-empty nulls for `superimpose_nulls` and `push_down_nulls` ([#12239](https://github.com/rapidsai/cudf/pull/12239)) [@ttnghia](https://github.com/ttnghia) +- Patch CUB DeviceSegmentedSort and remove workaround ([#12234](https://github.com/rapidsai/cudf/pull/12234)) [@davidwendt](https://github.com/davidwendt) +- Fix memory leak in udf_string::assign(&&) function ([#12206](https://github.com/rapidsai/cudf/pull/12206)) [@davidwendt](https://github.com/davidwendt) +- Workaround thrust-copy-if limit in json get_tree_representation ([#12190](https://github.com/rapidsai/cudf/pull/12190)) [@davidwendt](https://github.com/davidwendt) +- Fix page size calculation in Parquet writer ([#12182](https://github.com/rapidsai/cudf/pull/12182)) [@etseidl](https://github.com/etseidl) +- Add cudf::detail::sizes_to_offsets_iterator to allow checking overflow in offsets ([#12180](https://github.com/rapidsai/cudf/pull/12180)) [@davidwendt](https://github.com/davidwendt) +- Workaround thrust-copy-if limit in wordpiece-tokenizer ([#12168](https://github.com/rapidsai/cudf/pull/12168)) [@davidwendt](https://github.com/davidwendt) +- Floor division uses integer division for integral arguments ([#12131](https://github.com/rapidsai/cudf/pull/12131)) [@wence-](https://github.com/wence-) + +## 📖 Documentation + +- Fix link to NVTX ([#12598](https://github.com/rapidsai/cudf/pull/12598)) [@sameerz](https://github.com/sameerz) +- Include missing groupby functions in documentation ([#12580](https://github.com/rapidsai/cudf/pull/12580)) [@quasiben](https://github.com/quasiben) +- Fix documentation author ([#12527](https://github.com/rapidsai/cudf/pull/12527)) [@bdice](https://github.com/bdice) +- Update libcudf reduction docs for casting output types ([#12526](https://github.com/rapidsai/cudf/pull/12526)) [@davidwendt](https://github.com/davidwendt) +- Add JSON reader page in user guide ([#12499](https://github.com/rapidsai/cudf/pull/12499)) [@GregoryKimball](https://github.com/GregoryKimball) +- Link unsupported iteration API docstrings ([#12482](https://github.com/rapidsai/cudf/pull/12482)) [@galipremsagar](https://github.com/galipremsagar) +- `strings_udf` doc update ([#12469](https://github.com/rapidsai/cudf/pull/12469)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Update cudf_assert docs with correct NDEBUG behavior ([#12464](https://github.com/rapidsai/cudf/pull/12464)) [@robertmaynard](https://github.com/robertmaynard) +- Update pre-commit hooks guide ([#12395](https://github.com/rapidsai/cudf/pull/12395)) [@bdice](https://github.com/bdice) +- Update test docs to not use detail comparison utilities ([#12332](https://github.com/rapidsai/cudf/pull/12332)) [@PointKernel](https://github.com/PointKernel) +- Fix doxygen description for regex_program::compute_working_memory_size ([#12329](https://github.com/rapidsai/cudf/pull/12329)) [@davidwendt](https://github.com/davidwendt) +- Add eval to docs. ([#12322](https://github.com/rapidsai/cudf/pull/12322)) [@vyasr](https://github.com/vyasr) +- Turn on xfail_strict=true ([#12244](https://github.com/rapidsai/cudf/pull/12244)) [@wence-](https://github.com/wence-) +- Update 10 minutes to cuDF ([#12114](https://github.com/rapidsai/cudf/pull/12114)) [@wence-](https://github.com/wence-) + +## 🚀 New Features + +- Use kvikIO as the default IO backend ([#12574](https://github.com/rapidsai/cudf/pull/12574)) [@vuule](https://github.com/vuule) +- Use `has_nonempty_nulls` instead of `may_contain_non_empty_nulls` in `superimpose_nulls` and `push_down_nulls` ([#12560](https://github.com/rapidsai/cudf/pull/12560)) [@ttnghia](https://github.com/ttnghia) +- Add strings methods removeprefix and removesuffix ([#12557](https://github.com/rapidsai/cudf/pull/12557)) [@davidwendt](https://github.com/davidwendt) +- Add `regex_program` java APIs and unit tests ([#12548](https://github.com/rapidsai/cudf/pull/12548)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Default `cudf::io::read_json` to nested JSON parser ([#12544](https://github.com/rapidsai/cudf/pull/12544)) [@vuule](https://github.com/vuule) +- Make string quoting optional on CSV write ([#12539](https://github.com/rapidsai/cudf/pull/12539)) [@mythrocks](https://github.com/mythrocks) +- Use new nvCOMP API to optimize the compression temp memory size ([#12533](https://github.com/rapidsai/cudf/pull/12533)) [@vuule](https://github.com/vuule) +- Support "values" orient (array of arrays) in Nested JSON reader ([#12498](https://github.com/rapidsai/cudf/pull/12498)) [@karthikeyann](https://github.com/karthikeyann) +- `one_hot_encode` to use experimental row comparators ([#12478](https://github.com/rapidsai/cudf/pull/12478)) [@divyegala](https://github.com/divyegala) +- Support %W and %w format specifiers in cudf::strings::to_timestamps ([#12475](https://github.com/rapidsai/cudf/pull/12475)) [@davidwendt](https://github.com/davidwendt) +- Add JSON Writer ([#12474](https://github.com/rapidsai/cudf/pull/12474)) [@karthikeyann](https://github.com/karthikeyann) +- Refactor `thrust_copy_if` into `cudf::detail::copy_if_safe` ([#12455](https://github.com/rapidsai/cudf/pull/12455)) [@ttnghia](https://github.com/ttnghia) +- Add trailing comma support for nested JSON reader ([#12448](https://github.com/rapidsai/cudf/pull/12448)) [@karthikeyann](https://github.com/karthikeyann) +- Extract `tokenize_json.hpp` detail header from `src/io/json/nested_json.hpp` ([#12432](https://github.com/rapidsai/cudf/pull/12432)) [@ttnghia](https://github.com/ttnghia) +- JNI bindings to write CSV ([#12425](https://github.com/rapidsai/cudf/pull/12425)) [@mythrocks](https://github.com/mythrocks) +- Nested JSON depth benchmark ([#12371](https://github.com/rapidsai/cudf/pull/12371)) [@karthikeyann](https://github.com/karthikeyann) +- Implement `lists::reverse` ([#12336](https://github.com/rapidsai/cudf/pull/12336)) [@ttnghia](https://github.com/ttnghia) +- Use `device_read` in experimental `read_json` ([#12314](https://github.com/rapidsai/cudf/pull/12314)) [@vuule](https://github.com/vuule) +- Implement JNI for `strings::reverse` ([#12283](https://github.com/rapidsai/cudf/pull/12283)) [@ttnghia](https://github.com/ttnghia) +- Null element for parsing error in numeric types in JSON, CSV reader ([#12272](https://github.com/rapidsai/cudf/pull/12272)) [@karthikeyann](https://github.com/karthikeyann) +- Add cudf::strings:like function with multiple patterns ([#12269](https://github.com/rapidsai/cudf/pull/12269)) [@davidwendt](https://github.com/davidwendt) +- Add environment variable to control host memory allocation in `hostdevice_vector` ([#12251](https://github.com/rapidsai/cudf/pull/12251)) [@vuule](https://github.com/vuule) +- Add cudf::strings::reverse function ([#12227](https://github.com/rapidsai/cudf/pull/12227)) [@davidwendt](https://github.com/davidwendt) +- Selectively use dictionary encoding in Parquet writer ([#12211](https://github.com/rapidsai/cudf/pull/12211)) [@etseidl](https://github.com/etseidl) +- Support `replace` in `strings_udf` ([#12207](https://github.com/rapidsai/cudf/pull/12207)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add support to read binary encoded decimals in parquet ([#12205](https://github.com/rapidsai/cudf/pull/12205)) [@PointKernel](https://github.com/PointKernel) +- Support regex EOL where the string ends with a new-line character ([#12181](https://github.com/rapidsai/cudf/pull/12181)) [@davidwendt](https://github.com/davidwendt) +- Updating `stream_compaction/unique` to use new row comparators ([#12159](https://github.com/rapidsai/cudf/pull/12159)) [@divyegala](https://github.com/divyegala) +- Add device buffer datasource ([#12024](https://github.com/rapidsai/cudf/pull/12024)) [@PointKernel](https://github.com/PointKernel) +- Implement groupby apply with JIT ([#11452](https://github.com/rapidsai/cudf/pull/11452)) [@bwyogatama](https://github.com/bwyogatama) + +## 🛠️ Improvements + +- Update shared workflow branches ([#12696](https://github.com/rapidsai/cudf/pull/12696)) [@ajschmidt8](https://github.com/ajschmidt8) +- Pin `dask` and `distributed` for release ([#12695](https://github.com/rapidsai/cudf/pull/12695)) [@galipremsagar](https://github.com/galipremsagar) +- Don't upload `libcudf-example` to Anaconda.org ([#12671](https://github.com/rapidsai/cudf/pull/12671)) [@ajschmidt8](https://github.com/ajschmidt8) +- Pin wheel dependencies to same RAPIDS release ([#12659](https://github.com/rapidsai/cudf/pull/12659)) [@sevagh](https://github.com/sevagh) +- Use CTK 118/cp310 branch of wheel workflows ([#12602](https://github.com/rapidsai/cudf/pull/12602)) [@sevagh](https://github.com/sevagh) +- Change ways to access `ptr` in `Buffer` ([#12587](https://github.com/rapidsai/cudf/pull/12587)) [@galipremsagar](https://github.com/galipremsagar) +- Version a parquet writer xfail ([#12579](https://github.com/rapidsai/cudf/pull/12579)) [@galipremsagar](https://github.com/galipremsagar) +- Remove column names ([#12578](https://github.com/rapidsai/cudf/pull/12578)) [@vuule](https://github.com/vuule) +- Parquet reader optimization to address V100 regression. ([#12577](https://github.com/rapidsai/cudf/pull/12577)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add support for `category` dtypes in CSV reader ([#12571](https://github.com/rapidsai/cudf/pull/12571)) [@galipremsagar](https://github.com/galipremsagar) +- Remove `spill_lock` parameter from `SpillableBuffer.get_ptr()` ([#12564](https://github.com/rapidsai/cudf/pull/12564)) [@madsbk](https://github.com/madsbk) +- Optimize `cudf::make_lists_column` ([#12547](https://github.com/rapidsai/cudf/pull/12547)) [@ttnghia](https://github.com/ttnghia) +- Remove `cudf::strings::repeat_strings_output_sizes` from Java and JNI ([#12546](https://github.com/rapidsai/cudf/pull/12546)) [@ttnghia](https://github.com/ttnghia) +- Test that cuInit is not called when RAPIDS_NO_INITIALIZE is set ([#12545](https://github.com/rapidsai/cudf/pull/12545)) [@wence-](https://github.com/wence-) +- Rework repeat_strings to use sizes-to-offsets utility ([#12543](https://github.com/rapidsai/cudf/pull/12543)) [@davidwendt](https://github.com/davidwendt) +- Replace exclusive_scan with sizes_to_offsets in cudf::lists::sequences ([#12541](https://github.com/rapidsai/cudf/pull/12541)) [@davidwendt](https://github.com/davidwendt) +- Rework nvtext::ngrams_tokenize to use sizes-to-offsets utility ([#12540](https://github.com/rapidsai/cudf/pull/12540)) [@davidwendt](https://github.com/davidwendt) +- Fix binary-ops gtests coded in namespace cudf::test ([#12536](https://github.com/rapidsai/cudf/pull/12536)) [@davidwendt](https://github.com/davidwendt) +- More `[@acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk](https://github.com/acquire_spill_lock()` and `as_buffer(..., exposed=False)` ([#12535](https://github.com/rapidsai/cudf/pull/12535)) @madsbk) +- Guard CUDA runtime APIs with error checking ([#12531](https://github.com/rapidsai/cudf/pull/12531)) [@PointKernel](https://github.com/PointKernel) +- Update TODOs from issue 10432. ([#12528](https://github.com/rapidsai/cudf/pull/12528)) [@bdice](https://github.com/bdice) +- Update rapids-cmake definitions version in GitHub Actions style checks. ([#12511](https://github.com/rapidsai/cudf/pull/12511)) [@bdice](https://github.com/bdice) +- Switch `engine=cudf` to the new `JSON` reader ([#12509](https://github.com/rapidsai/cudf/pull/12509)) [@galipremsagar](https://github.com/galipremsagar) +- Fix SUM/MEAN aggregation type support. ([#12503](https://github.com/rapidsai/cudf/pull/12503)) [@bdice](https://github.com/bdice) +- Stop using pandas._testing ([#12492](https://github.com/rapidsai/cudf/pull/12492)) [@vyasr](https://github.com/vyasr) +- Fix ROLLING_TEST gtests coded in namespace cudf::test ([#12490](https://github.com/rapidsai/cudf/pull/12490)) [@davidwendt](https://github.com/davidwendt) +- Fix erroneously skipped ORC ZSTD test ([#12486](https://github.com/rapidsai/cudf/pull/12486)) [@vuule](https://github.com/vuule) +- Rework nvtext::generate_character_ngrams to use make_strings_children ([#12480](https://github.com/rapidsai/cudf/pull/12480)) [@davidwendt](https://github.com/davidwendt) +- Raise warnings as errors in the test suite ([#12468](https://github.com/rapidsai/cudf/pull/12468)) [@vyasr](https://github.com/vyasr) +- Remove `int32` hard-coding in python ([#12467](https://github.com/rapidsai/cudf/pull/12467)) [@galipremsagar](https://github.com/galipremsagar) +- Use cudaMemcpyDefault. ([#12466](https://github.com/rapidsai/cudf/pull/12466)) [@bdice](https://github.com/bdice) +- Update workflows for nightly tests ([#12462](https://github.com/rapidsai/cudf/pull/12462)) [@ajschmidt8](https://github.com/ajschmidt8) +- Build CUDA `11.8` and Python `3.10` Packages ([#12457](https://github.com/rapidsai/cudf/pull/12457)) [@ajschmidt8](https://github.com/ajschmidt8) +- JNI build image default as cuda11.8 ([#12441](https://github.com/rapidsai/cudf/pull/12441)) [@pxLi](https://github.com/pxLi) +- Re-enable `Recently Updated` Check ([#12435](https://github.com/rapidsai/cudf/pull/12435)) [@ajschmidt8](https://github.com/ajschmidt8) +- Rework remaining cudf::strings::from_xyz functions to use make_strings_children ([#12434](https://github.com/rapidsai/cudf/pull/12434)) [@vuule](https://github.com/vuule) +- Build wheels alongside conda CI ([#12427](https://github.com/rapidsai/cudf/pull/12427)) [@sevagh](https://github.com/sevagh) +- Remove arguments for checking exception messages in Python ([#12424](https://github.com/rapidsai/cudf/pull/12424)) [@vyasr](https://github.com/vyasr) +- Clean up cuco usage ([#12421](https://github.com/rapidsai/cudf/pull/12421)) [@PointKernel](https://github.com/PointKernel) +- Fix warnings in remaining modules ([#12406](https://github.com/rapidsai/cudf/pull/12406)) [@vyasr](https://github.com/vyasr) +- Update `ops-bot.yaml` ([#12402](https://github.com/rapidsai/cudf/pull/12402)) [@ajschmidt8](https://github.com/ajschmidt8) +- Rework cudf::strings::integers_to_ipv4 to use make_strings_children utility ([#12401](https://github.com/rapidsai/cudf/pull/12401)) [@davidwendt](https://github.com/davidwendt) +- Use `numpy.empty()` instead of `bytearray` to allocate host memory for spilling ([#12399](https://github.com/rapidsai/cudf/pull/12399)) [@madsbk](https://github.com/madsbk) +- Deprecate chunksize from dask_cudf.read_csv ([#12394](https://github.com/rapidsai/cudf/pull/12394)) [@rjzamora](https://github.com/rjzamora) +- Expose the RMM pool size in JNI ([#12390](https://github.com/rapidsai/cudf/pull/12390)) [@revans2](https://github.com/revans2) +- Fix COPYING_TEST: gtests coded in namespace cudf::test ([#12387](https://github.com/rapidsai/cudf/pull/12387)) [@davidwendt](https://github.com/davidwendt) +- Rework cudf::strings::url_encode to use make_strings_children utility ([#12385](https://github.com/rapidsai/cudf/pull/12385)) [@davidwendt](https://github.com/davidwendt) +- Use make_strings_children in parse_data nested json reader ([#12382](https://github.com/rapidsai/cudf/pull/12382)) [@karthikeyann](https://github.com/karthikeyann) +- Fix warnings in test_datetime.py ([#12381](https://github.com/rapidsai/cudf/pull/12381)) [@vyasr](https://github.com/vyasr) +- Mixed Join Benchmarks ([#12375](https://github.com/rapidsai/cudf/pull/12375)) [@divyegala](https://github.com/divyegala) +- Fix warnings in dataframe.py ([#12369](https://github.com/rapidsai/cudf/pull/12369)) [@vyasr](https://github.com/vyasr) +- Update conda recipes. ([#12368](https://github.com/rapidsai/cudf/pull/12368)) [@bdice](https://github.com/bdice) +- Use gpu-latest-1 runner tag ([#12366](https://github.com/rapidsai/cudf/pull/12366)) [@bdice](https://github.com/bdice) +- Rework cudf::strings::from_booleans to use make_strings_children ([#12365](https://github.com/rapidsai/cudf/pull/12365)) [@vuule](https://github.com/vuule) +- Fix warnings in test modules up to test_dataframe.py ([#12355](https://github.com/rapidsai/cudf/pull/12355)) [@vyasr](https://github.com/vyasr) +- JSON column performance optimization - struct column nulls ([#12354](https://github.com/rapidsai/cudf/pull/12354)) [@karthikeyann](https://github.com/karthikeyann) +- Accelerate stable-segmented-sort with CUB segmented sort ([#12347](https://github.com/rapidsai/cudf/pull/12347)) [@davidwendt](https://github.com/davidwendt) +- Add size check to make_offsets_child_column utility ([#12345](https://github.com/rapidsai/cudf/pull/12345)) [@davidwendt](https://github.com/davidwendt) +- Enable max compression ratio small block optimization for ZSTD ([#12338](https://github.com/rapidsai/cudf/pull/12338)) [@vuule](https://github.com/vuule) +- Fix warnings in test_monotonic.py ([#12334](https://github.com/rapidsai/cudf/pull/12334)) [@vyasr](https://github.com/vyasr) +- Improve JSON column creation performance (list offsets) ([#12330](https://github.com/rapidsai/cudf/pull/12330)) [@karthikeyann](https://github.com/karthikeyann) +- Upgrade to `arrow-10.0.1` ([#12327](https://github.com/rapidsai/cudf/pull/12327)) [@galipremsagar](https://github.com/galipremsagar) +- Fix warnings in test_orc.py ([#12326](https://github.com/rapidsai/cudf/pull/12326)) [@vyasr](https://github.com/vyasr) +- Fix warnings in test_groupby.py ([#12324](https://github.com/rapidsai/cudf/pull/12324)) [@vyasr](https://github.com/vyasr) +- Fix `test_notebooks.sh` ([#12323](https://github.com/rapidsai/cudf/pull/12323)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix transform gtests coded in namespace cudf::test ([#12321](https://github.com/rapidsai/cudf/pull/12321)) [@davidwendt](https://github.com/davidwendt) +- Fix `check_style.sh` script ([#12320](https://github.com/rapidsai/cudf/pull/12320)) [@ajschmidt8](https://github.com/ajschmidt8) +- Rework cudf::strings::from_timestamps to use make_strings_children ([#12317](https://github.com/rapidsai/cudf/pull/12317)) [@davidwendt](https://github.com/davidwendt) +- Fix warnings in test_index.py ([#12313](https://github.com/rapidsai/cudf/pull/12313)) [@vyasr](https://github.com/vyasr) +- Fix warnings in test_multiindex.py ([#12310](https://github.com/rapidsai/cudf/pull/12310)) [@vyasr](https://github.com/vyasr) +- CSV, JSON reader to infer integer column with nulls as int64 instead of float64 ([#12309](https://github.com/rapidsai/cudf/pull/12309)) [@karthikeyann](https://github.com/karthikeyann) +- Fix warnings in test_indexing.py ([#12305](https://github.com/rapidsai/cudf/pull/12305)) [@vyasr](https://github.com/vyasr) +- Fix warnings in test_joining.py ([#12304](https://github.com/rapidsai/cudf/pull/12304)) [@vyasr](https://github.com/vyasr) +- Unpin `dask` and `distributed` for development ([#12302](https://github.com/rapidsai/cudf/pull/12302)) [@galipremsagar](https://github.com/galipremsagar) +- Re-enable `sccache` for Jenkins builds ([#12297](https://github.com/rapidsai/cudf/pull/12297)) [@ajschmidt8](https://github.com/ajschmidt8) +- Define needs for pr-builder workflow. ([#12296](https://github.com/rapidsai/cudf/pull/12296)) [@bdice](https://github.com/bdice) +- Forward merge 22.12 into 23.02 ([#12294](https://github.com/rapidsai/cudf/pull/12294)) [@vyasr](https://github.com/vyasr) +- Fix warnings in test_stats.py ([#12293](https://github.com/rapidsai/cudf/pull/12293)) [@vyasr](https://github.com/vyasr) +- Fix table gtests coded in namespace cudf::test ([#12292](https://github.com/rapidsai/cudf/pull/12292)) [@davidwendt](https://github.com/davidwendt) +- Change cython for regex calls to use cudf::strings::regex_program ([#12289](https://github.com/rapidsai/cudf/pull/12289)) [@davidwendt](https://github.com/davidwendt) +- Improved error reporting when reading multiple JSON files ([#12285](https://github.com/rapidsai/cudf/pull/12285)) [@vuule](https://github.com/vuule) +- Deprecate Frame.sum_of_squares ([#12284](https://github.com/rapidsai/cudf/pull/12284)) [@vyasr](https://github.com/vyasr) +- Remove deprecated code for 23.02 ([#12281](https://github.com/rapidsai/cudf/pull/12281)) [@vyasr](https://github.com/vyasr) +- Clean up handling of max_page_size_bytes in Parquet writer ([#12277](https://github.com/rapidsai/cudf/pull/12277)) [@etseidl](https://github.com/etseidl) +- Fix replace gtests coded in namespace cudf::test ([#12270](https://github.com/rapidsai/cudf/pull/12270)) [@davidwendt](https://github.com/davidwendt) +- Add pandas nullable type support in `Index.to_pandas` ([#12268](https://github.com/rapidsai/cudf/pull/12268)) [@galipremsagar](https://github.com/galipremsagar) +- Rework nvtext::detokenize to use indexalator for row indices ([#12267](https://github.com/rapidsai/cudf/pull/12267)) [@davidwendt](https://github.com/davidwendt) +- Fix reduction gtests coded in namespace cudf::test ([#12257](https://github.com/rapidsai/cudf/pull/12257)) [@davidwendt](https://github.com/davidwendt) +- Remove default parameters from cudf::detail::sort function declarations ([#12254](https://github.com/rapidsai/cudf/pull/12254)) [@davidwendt](https://github.com/davidwendt) +- Add `duplicated` support for `Series`, `DataFrame` and `Index` ([#12246](https://github.com/rapidsai/cudf/pull/12246)) [@galipremsagar](https://github.com/galipremsagar) +- Replace column/table test utilities with macros ([#12242](https://github.com/rapidsai/cudf/pull/12242)) [@PointKernel](https://github.com/PointKernel) +- Rework cudf::strings::pad and zfill to use make_strings_children ([#12238](https://github.com/rapidsai/cudf/pull/12238)) [@davidwendt](https://github.com/davidwendt) +- Fix sort gtests coded in namespace cudf::test ([#12237](https://github.com/rapidsai/cudf/pull/12237)) [@davidwendt](https://github.com/davidwendt) +- Wrapping concat and file writes in `[@acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk](https://github.com/acquire_spill_lock()` ([#12232](https://github.com/rapidsai/cudf/pull/12232)) @madsbk) +- Rename `cudf::structs::detail::superimpose_parent_nulls` APIs ([#12230](https://github.com/rapidsai/cudf/pull/12230)) [@ttnghia](https://github.com/ttnghia) +- Cover parsing to decimal types in `read_json` tests ([#12229](https://github.com/rapidsai/cudf/pull/12229)) [@vuule](https://github.com/vuule) +- Spill Statistics ([#12223](https://github.com/rapidsai/cudf/pull/12223)) [@madsbk](https://github.com/madsbk) +- Use CUDF_JNI_ENABLE_PROFILING to conditionally enable profiling support. ([#12221](https://github.com/rapidsai/cudf/pull/12221)) [@bdice](https://github.com/bdice) +- Clean up of `test_spilling.py` ([#12220](https://github.com/rapidsai/cudf/pull/12220)) [@madsbk](https://github.com/madsbk) +- Simplify repetitive boolean logic ([#12218](https://github.com/rapidsai/cudf/pull/12218)) [@vuule](https://github.com/vuule) +- Add `Series.hasnans` and `Index.hasnans` ([#12214](https://github.com/rapidsai/cudf/pull/12214)) [@galipremsagar](https://github.com/galipremsagar) +- Add cudf::strings:udf::replace function ([#12210](https://github.com/rapidsai/cudf/pull/12210)) [@davidwendt](https://github.com/davidwendt) +- Adds in new java APIs for appending byte arrays to host columnar data ([#12208](https://github.com/rapidsai/cudf/pull/12208)) [@revans2](https://github.com/revans2) +- Remove Python dependencies from Java CI. ([#12193](https://github.com/rapidsai/cudf/pull/12193)) [@bdice](https://github.com/bdice) +- Fix null order in sort-based groupby and improve groupby tests ([#12191](https://github.com/rapidsai/cudf/pull/12191)) [@divyegala](https://github.com/divyegala) +- Move strings children functions from cudf/strings/detail/utilities.cuh to new header ([#12185](https://github.com/rapidsai/cudf/pull/12185)) [@davidwendt](https://github.com/davidwendt) +- Clean up existing JNI scalar to column code ([#12173](https://github.com/rapidsai/cudf/pull/12173)) [@revans2](https://github.com/revans2) +- Remove JIT type names, refactor id_to_type. ([#12158](https://github.com/rapidsai/cudf/pull/12158)) [@bdice](https://github.com/bdice) +- Update JNI version to 23.02.0-SNAPSHOT ([#12129](https://github.com/rapidsai/cudf/pull/12129)) [@pxLi](https://github.com/pxLi) +- Minor refactor of cpp/src/io/parquet/page_data.cu ([#12126](https://github.com/rapidsai/cudf/pull/12126)) [@etseidl](https://github.com/etseidl) +- Add codespell as a linter ([#12097](https://github.com/rapidsai/cudf/pull/12097)) [@benfred](https://github.com/benfred) +- Enable specifying exceptions in error macros ([#12078](https://github.com/rapidsai/cudf/pull/12078)) [@vyasr](https://github.com/vyasr) +- Move `_label_encoding` from Series to Column ([#12040](https://github.com/rapidsai/cudf/pull/12040)) [@shwina](https://github.com/shwina) +- Add GitHub Actions Workflows ([#12002](https://github.com/rapidsai/cudf/pull/12002)) [@ajschmidt8](https://github.com/ajschmidt8) +- Consolidate dask-cudf `groupby_agg` calls in one place ([#10835](https://github.com/rapidsai/cudf/pull/10835)) [@charlesbluca](https://github.com/charlesbluca) + +# cuDF 22.12.00 (8 Dec 2022) + +## 🚨 Breaking Changes + +- Add JNI for `substring` without 'end' parameter. ([#12113](https://github.com/rapidsai/cudf/pull/12113)) [@firestarman](https://github.com/firestarman) +- Refactor `purge_nonempty_nulls` ([#12111](https://github.com/rapidsai/cudf/pull/12111)) [@ttnghia](https://github.com/ttnghia) +- Create an `int8` column in `read_csv` when all elements are missing ([#12110](https://github.com/rapidsai/cudf/pull/12110)) [@vuule](https://github.com/vuule) +- Throw an error when libcudf is built without cuFile and `LIBCUDF_CUFILE_POLICY` is set to `"ALWAYS"` ([#12080](https://github.com/rapidsai/cudf/pull/12080)) [@vuule](https://github.com/vuule) +- Fix type promotion edge cases in numerical binops ([#12074](https://github.com/rapidsai/cudf/pull/12074)) [@wence-](https://github.com/wence-) +- Reduce/Remove reliance on `**kwargs` and `*args` in `IO` readers & writers ([#12025](https://github.com/rapidsai/cudf/pull/12025)) [@galipremsagar](https://github.com/galipremsagar) +- Rollback of `DeviceBufferLike` ([#12009](https://github.com/rapidsai/cudf/pull/12009)) [@madsbk](https://github.com/madsbk) +- Remove unused `managed_allocator` ([#12005](https://github.com/rapidsai/cudf/pull/12005)) [@vyasr](https://github.com/vyasr) +- Pass column names to `write_csv` instead of `table_metadata` pointer ([#11972](https://github.com/rapidsai/cudf/pull/11972)) [@vuule](https://github.com/vuule) +- Accept const refs instead of const unique_ptr refs in reduce and scan APIs. ([#11960](https://github.com/rapidsai/cudf/pull/11960)) [@vyasr](https://github.com/vyasr) +- Default to equal NaNs in make_merge_sets_aggregation. ([#11952](https://github.com/rapidsai/cudf/pull/11952)) [@bdice](https://github.com/bdice) +- Remove validation that requires introspection ([#11938](https://github.com/rapidsai/cudf/pull/11938)) [@vyasr](https://github.com/vyasr) +- Trim quotes for non-string values in nested json parsing ([#11898](https://github.com/rapidsai/cudf/pull/11898)) [@karthikeyann](https://github.com/karthikeyann) +- Add tests ensuring that cudf's default stream is always used ([#11875](https://github.com/rapidsai/cudf/pull/11875)) [@vyasr](https://github.com/vyasr) +- Support nested types as groupby keys in libcudf ([#11792](https://github.com/rapidsai/cudf/pull/11792)) [@PointKernel](https://github.com/PointKernel) +- Default to equal NaNs in make_collect_set_aggregation. ([#11621](https://github.com/rapidsai/cudf/pull/11621)) [@bdice](https://github.com/bdice) +- Removing int8 column option from parquet byte_array writing ([#11539](https://github.com/rapidsai/cudf/pull/11539)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- part1: Simplify BaseIndex to an abstract class ([#10389](https://github.com/rapidsai/cudf/pull/10389)) [@skirui-source](https://github.com/skirui-source) + +## 🐛 Bug Fixes + +- Fix include line for IO Cython modules ([#12250](https://github.com/rapidsai/cudf/pull/12250)) [@vyasr](https://github.com/vyasr) +- Make dask pinning looser ([#12231](https://github.com/rapidsai/cudf/pull/12231)) [@vyasr](https://github.com/vyasr) +- Workaround for CUB segmented-sort bug with boolean keys ([#12217](https://github.com/rapidsai/cudf/pull/12217)) [@davidwendt](https://github.com/davidwendt) +- Fix `from_dict` backend dispatch to match upstream `dask` ([#12203](https://github.com/rapidsai/cudf/pull/12203)) [@galipremsagar](https://github.com/galipremsagar) +- Merge branch-22.10 into branch-22.12 ([#12198](https://github.com/rapidsai/cudf/pull/12198)) [@davidwendt](https://github.com/davidwendt) +- Fix compression in ORC writer ([#12194](https://github.com/rapidsai/cudf/pull/12194)) [@vuule](https://github.com/vuule) +- Don't use CMake 3.25.0 as it has a show stopping FindCUDAToolkit bug ([#12188](https://github.com/rapidsai/cudf/pull/12188)) [@robertmaynard](https://github.com/robertmaynard) +- Fix data corruption when reading ORC files with empty stripes ([#12160](https://github.com/rapidsai/cudf/pull/12160)) [@vuule](https://github.com/vuule) +- Fix decimal binary operations ([#12142](https://github.com/rapidsai/cudf/pull/12142)) [@galipremsagar](https://github.com/galipremsagar) +- Ensure dlpack include is provided to cudf interop lib ([#12139](https://github.com/rapidsai/cudf/pull/12139)) [@robertmaynard](https://github.com/robertmaynard) +- Safely allocate `udf_string` pointers in `strings_udf` ([#12138](https://github.com/rapidsai/cudf/pull/12138)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix/disable jitify lto ([#12122](https://github.com/rapidsai/cudf/pull/12122)) [@robertmaynard](https://github.com/robertmaynard) +- Fix conditional_full_join benchmark ([#12121](https://github.com/rapidsai/cudf/pull/12121)) [@GregoryKimball](https://github.com/GregoryKimball) +- Fix regex working-memory-size refactor error ([#12119](https://github.com/rapidsai/cudf/pull/12119)) [@davidwendt](https://github.com/davidwendt) +- Add in negative size checks for columns ([#12118](https://github.com/rapidsai/cudf/pull/12118)) [@revans2](https://github.com/revans2) +- Add JNI for `substring` without 'end' parameter. ([#12113](https://github.com/rapidsai/cudf/pull/12113)) [@firestarman](https://github.com/firestarman) +- Fix reading of CSV files with blank second row ([#12098](https://github.com/rapidsai/cudf/pull/12098)) [@vuule](https://github.com/vuule) +- Fix an error in IO with `GzipFile` type ([#12085](https://github.com/rapidsai/cudf/pull/12085)) [@galipremsagar](https://github.com/galipremsagar) +- Workaround groupby aggregate thrust::copy_if overflow ([#12079](https://github.com/rapidsai/cudf/pull/12079)) [@davidwendt](https://github.com/davidwendt) +- Fix alignment of compressed blocks in ORC writer ([#12077](https://github.com/rapidsai/cudf/pull/12077)) [@vuule](https://github.com/vuule) +- Fix singleton-range `__setitem__` edge case ([#12075](https://github.com/rapidsai/cudf/pull/12075)) [@wence-](https://github.com/wence-) +- Fix type promotion edge cases in numerical binops ([#12074](https://github.com/rapidsai/cudf/pull/12074)) [@wence-](https://github.com/wence-) +- Force using old fmt in nvbench. ([#12067](https://github.com/rapidsai/cudf/pull/12067)) [@vyasr](https://github.com/vyasr) +- Fixes List offset bug in Nested JSON reader ([#12060](https://github.com/rapidsai/cudf/pull/12060)) [@karthikeyann](https://github.com/karthikeyann) +- Allow falling back to `shim_60.ptx` by default in `strings_udf` ([#12056](https://github.com/rapidsai/cudf/pull/12056)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Force black exclusions for pre-commit. ([#12036](https://github.com/rapidsai/cudf/pull/12036)) [@bdice](https://github.com/bdice) +- Add `memory_usage` & `items` implementation for `Struct` column & dtype ([#12033](https://github.com/rapidsai/cudf/pull/12033)) [@galipremsagar](https://github.com/galipremsagar) +- Reduce/Remove reliance on `**kwargs` and `*args` in `IO` readers & writers ([#12025](https://github.com/rapidsai/cudf/pull/12025)) [@galipremsagar](https://github.com/galipremsagar) +- Fixes bug in csv_reader_options construction in cython ([#12021](https://github.com/rapidsai/cudf/pull/12021)) [@karthikeyann](https://github.com/karthikeyann) +- Fix issues when both `usecols` and `names` options are used in `read_csv` ([#12018](https://github.com/rapidsai/cudf/pull/12018)) [@vuule](https://github.com/vuule) +- Port thrust's pinned_allocator to cudf, since Thrust 1.17 removes the type ([#12004](https://github.com/rapidsai/cudf/pull/12004)) [@robertmaynard](https://github.com/robertmaynard) +- Revert "Replace most of preprocessor usage in nvcomp adapter with `constexpr`" ([#11999](https://github.com/rapidsai/cudf/pull/11999)) [@vuule](https://github.com/vuule) +- Fix bug where `df.loc` resulting in single row could give wrong index ([#11998](https://github.com/rapidsai/cudf/pull/11998)) [@eriknw](https://github.com/eriknw) +- Switch to DISABLE_DEPRECATION_WARNINGS to match other RAPIDS projects ([#11989](https://github.com/rapidsai/cudf/pull/11989)) [@robertmaynard](https://github.com/robertmaynard) +- Fix maximum page size estimate in Parquet writer ([#11962](https://github.com/rapidsai/cudf/pull/11962)) [@vuule](https://github.com/vuule) +- Fix local offset handling in bgzip reader ([#11918](https://github.com/rapidsai/cudf/pull/11918)) [@upsj](https://github.com/upsj) +- Fix an issue reading struct-of-list types in Parquet. ([#11910](https://github.com/rapidsai/cudf/pull/11910)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix memcheck error in TypeInference.Timestamp gtest ([#11905](https://github.com/rapidsai/cudf/pull/11905)) [@davidwendt](https://github.com/davidwendt) +- Fix type casting in Series.__setitem__ ([#11904](https://github.com/rapidsai/cudf/pull/11904)) [@wence-](https://github.com/wence-) +- Fix memcheck error in get_dremel_data ([#11903](https://github.com/rapidsai/cudf/pull/11903)) [@davidwendt](https://github.com/davidwendt) +- Fixes Unsupported column type error due to empty list columns in Nested JSON reader ([#11897](https://github.com/rapidsai/cudf/pull/11897)) [@karthikeyann](https://github.com/karthikeyann) +- Fix segmented-sort to ignore indices outside the offsets ([#11888](https://github.com/rapidsai/cudf/pull/11888)) [@davidwendt](https://github.com/davidwendt) +- Fix cudf::stable_sorted_order for NaN and -NaN in FLOAT64 columns ([#11874](https://github.com/rapidsai/cudf/pull/11874)) [@davidwendt](https://github.com/davidwendt) +- Fix writing of Parquet files with many fragments ([#11869](https://github.com/rapidsai/cudf/pull/11869)) [@etseidl](https://github.com/etseidl) +- Fix RangeIndex unary operators. ([#11868](https://github.com/rapidsai/cudf/pull/11868)) [@vyasr](https://github.com/vyasr) +- JNI Avoid NPE for reading host binary data ([#11865](https://github.com/rapidsai/cudf/pull/11865)) [@revans2](https://github.com/revans2) +- Fix decimal benchmark input data generation ([#11863](https://github.com/rapidsai/cudf/pull/11863)) [@karthikeyann](https://github.com/karthikeyann) +- Fix pre-commit copyright check ([#11860](https://github.com/rapidsai/cudf/pull/11860)) [@galipremsagar](https://github.com/galipremsagar) +- Fix Parquet support for seconds and milliseconds duration types ([#11854](https://github.com/rapidsai/cudf/pull/11854)) [@vuule](https://github.com/vuule) +- Ensure better compiler cache results between cudf cal-ver branches ([#11835](https://github.com/rapidsai/cudf/pull/11835)) [@robertmaynard](https://github.com/robertmaynard) +- Fix make_column_from_scalar for all-null strings column ([#11807](https://github.com/rapidsai/cudf/pull/11807)) [@davidwendt](https://github.com/davidwendt) +- Tell jitify_preprocess where to search for libnvrtc ([#11787](https://github.com/rapidsai/cudf/pull/11787)) [@robertmaynard](https://github.com/robertmaynard) +- add V2 page header support to parquet reader ([#11778](https://github.com/rapidsai/cudf/pull/11778)) [@etseidl](https://github.com/etseidl) +- Parquet reader: bug fix for a num_rows/skip_rows corner case, w/optimization for nested preprocessing ([#11752](https://github.com/rapidsai/cudf/pull/11752)) [@nvdbaranec](https://github.com/nvdbaranec) +- Determine if Arrow has S3 support at runtime in unit test. ([#11560](https://github.com/rapidsai/cudf/pull/11560)) [@bdice](https://github.com/bdice) + +## 📖 Documentation + +- Use rapidsai CODE_OF_CONDUCT.md ([#12166](https://github.com/rapidsai/cudf/pull/12166)) [@bdice](https://github.com/bdice) +- Add symlinks to notebooks. ([#12128](https://github.com/rapidsai/cudf/pull/12128)) [@bdice](https://github.com/bdice) +- Add `truncate` API to python doc pages ([#12109](https://github.com/rapidsai/cudf/pull/12109)) [@galipremsagar](https://github.com/galipremsagar) +- Update Numba docs links. ([#12107](https://github.com/rapidsai/cudf/pull/12107)) [@bdice](https://github.com/bdice) +- Remove "Multi-GPU with Dask-cuDF" notebook. ([#12095](https://github.com/rapidsai/cudf/pull/12095)) [@bdice](https://github.com/bdice) +- Fix link to c++ developer guide from `CONTRIBUTING.md` ([#12084](https://github.com/rapidsai/cudf/pull/12084)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add pivot_table and crosstab to docs. ([#12014](https://github.com/rapidsai/cudf/pull/12014)) [@bdice](https://github.com/bdice) +- Fix doxygen text for cudf::dictionary::encode ([#11991](https://github.com/rapidsai/cudf/pull/11991)) [@davidwendt](https://github.com/davidwendt) +- Replace default_stream_value with get_default_stream in docs. ([#11985](https://github.com/rapidsai/cudf/pull/11985)) [@vyasr](https://github.com/vyasr) +- Add dtype docs pages and docstrings for `cudf` specific dtypes ([#11974](https://github.com/rapidsai/cudf/pull/11974)) [@galipremsagar](https://github.com/galipremsagar) +- Update Unit Testing in libcudf guidelines to code tests outside the cudf::test namespace ([#11959](https://github.com/rapidsai/cudf/pull/11959)) [@davidwendt](https://github.com/davidwendt) +- Rename libcudf++ to libcudf. ([#11953](https://github.com/rapidsai/cudf/pull/11953)) [@bdice](https://github.com/bdice) +- Fix documentation referring to removed as_gpu_matrix method. ([#11937](https://github.com/rapidsai/cudf/pull/11937)) [@bdice](https://github.com/bdice) +- Remove "experimental" warning for struct columns in ORC reader and writer ([#11880](https://github.com/rapidsai/cudf/pull/11880)) [@vuule](https://github.com/vuule) +- Initial draft of policies and guidelines for libcudf usage. ([#11853](https://github.com/rapidsai/cudf/pull/11853)) [@vyasr](https://github.com/vyasr) +- Add clear indication of non-GPU accelerated parameters in read_json docstring ([#11825](https://github.com/rapidsai/cudf/pull/11825)) [@GregoryKimball](https://github.com/GregoryKimball) +- Add developer docs for writing tests ([#11199](https://github.com/rapidsai/cudf/pull/11199)) [@vyasr](https://github.com/vyasr) + +## 🚀 New Features + +- Adds an EventHandler to Java MemoryBuffer to be invoked on close ([#12125](https://github.com/rapidsai/cudf/pull/12125)) [@abellina](https://github.com/abellina) +- Support `+` in `strings_udf` ([#12117](https://github.com/rapidsai/cudf/pull/12117)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Support `upper` and `lower` in `strings_udf` ([#12099](https://github.com/rapidsai/cudf/pull/12099)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add wheel builds ([#12096](https://github.com/rapidsai/cudf/pull/12096)) [@vyasr](https://github.com/vyasr) +- Allow setting malloc heap size in string udfs ([#12094](https://github.com/rapidsai/cudf/pull/12094)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Support `strip`, `lstrip`, and `rstrip` in `strings_udf` ([#12091](https://github.com/rapidsai/cudf/pull/12091)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Mark nvcomp zstd compression stable ([#12059](https://github.com/rapidsai/cudf/pull/12059)) [@jbrennan333](https://github.com/jbrennan333) +- Add debug-only onAllocated/onDeallocated to RmmEventHandler ([#12054](https://github.com/rapidsai/cudf/pull/12054)) [@abellina](https://github.com/abellina) +- Enable building against the libarrow contained in pyarrow ([#12034](https://github.com/rapidsai/cudf/pull/12034)) [@vyasr](https://github.com/vyasr) +- Add strings `like` jni and native method ([#12032](https://github.com/rapidsai/cudf/pull/12032)) [@cindyyuanjiang](https://github.com/cindyyuanjiang) +- Cleanup common parsing code in JSON, CSV reader ([#12022](https://github.com/rapidsai/cudf/pull/12022)) [@karthikeyann](https://github.com/karthikeyann) +- byte_range support for JSON Lines format ([#12017](https://github.com/rapidsai/cudf/pull/12017)) [@karthikeyann](https://github.com/karthikeyann) +- Minor cleanup of root CMakeLists.txt for better organization ([#11988](https://github.com/rapidsai/cudf/pull/11988)) [@robertmaynard](https://github.com/robertmaynard) +- Add inplace arithmetic operators to `MaskedType` ([#11987](https://github.com/rapidsai/cudf/pull/11987)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Implement JNI for chunked Parquet reader ([#11961](https://github.com/rapidsai/cudf/pull/11961)) [@ttnghia](https://github.com/ttnghia) +- Add method argument to DataFrame.quantile ([#11957](https://github.com/rapidsai/cudf/pull/11957)) [@rjzamora](https://github.com/rjzamora) +- Add gpu memory watermark apis to JNI ([#11950](https://github.com/rapidsai/cudf/pull/11950)) [@abellina](https://github.com/abellina) +- Adds retryCount to RmmEventHandler.onAllocFailure ([#11940](https://github.com/rapidsai/cudf/pull/11940)) [@abellina](https://github.com/abellina) +- Enable returning string data from UDFs used through `apply` ([#11933](https://github.com/rapidsai/cudf/pull/11933)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Switch over to rapids-cmake patches for thrust ([#11921](https://github.com/rapidsai/cudf/pull/11921)) [@robertmaynard](https://github.com/robertmaynard) +- Add strings udf C++ classes and functions for phase II ([#11912](https://github.com/rapidsai/cudf/pull/11912)) [@davidwendt](https://github.com/davidwendt) +- Trim quotes for non-string values in nested json parsing ([#11898](https://github.com/rapidsai/cudf/pull/11898)) [@karthikeyann](https://github.com/karthikeyann) +- Enable CEC for `strings_udf` ([#11884](https://github.com/rapidsai/cudf/pull/11884)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- ArrowIPCTableWriter writes en empty batch in the case of an empty table. ([#11883](https://github.com/rapidsai/cudf/pull/11883)) [@firestarman](https://github.com/firestarman) +- Implement chunked Parquet reader ([#11867](https://github.com/rapidsai/cudf/pull/11867)) [@ttnghia](https://github.com/ttnghia) +- Add `read_orc_metadata` to libcudf ([#11815](https://github.com/rapidsai/cudf/pull/11815)) [@vuule](https://github.com/vuule) +- Support nested types as groupby keys in libcudf ([#11792](https://github.com/rapidsai/cudf/pull/11792)) [@PointKernel](https://github.com/PointKernel) +- Adding feature Truncate to DataFrame and Series ([#11435](https://github.com/rapidsai/cudf/pull/11435)) [@VamsiTallam95](https://github.com/VamsiTallam95) + +## 🛠️ Improvements + +- Reduce number of tests marked `spilling` ([#12197](https://github.com/rapidsai/cudf/pull/12197)) [@madsbk](https://github.com/madsbk) +- Pin `dask` and `distributed` for release ([#12165](https://github.com/rapidsai/cudf/pull/12165)) [@galipremsagar](https://github.com/galipremsagar) +- Don't rely on GNU find in headers_test.sh ([#12164](https://github.com/rapidsai/cudf/pull/12164)) [@wence-](https://github.com/wence-) +- Update cp.clip call ([#12148](https://github.com/rapidsai/cudf/pull/12148)) [@quasiben](https://github.com/quasiben) +- Enable automatic column projection in groupby().agg ([#12124](https://github.com/rapidsai/cudf/pull/12124)) [@rjzamora](https://github.com/rjzamora) +- Refactor `purge_nonempty_nulls` ([#12111](https://github.com/rapidsai/cudf/pull/12111)) [@ttnghia](https://github.com/ttnghia) +- Create an `int8` column in `read_csv` when all elements are missing ([#12110](https://github.com/rapidsai/cudf/pull/12110)) [@vuule](https://github.com/vuule) +- Spilling to host memory ([#12106](https://github.com/rapidsai/cudf/pull/12106)) [@madsbk](https://github.com/madsbk) +- First pass of `pd.read_orc` changes in tests ([#12103](https://github.com/rapidsai/cudf/pull/12103)) [@galipremsagar](https://github.com/galipremsagar) +- Expose engine argument in dask_cudf.read_json ([#12101](https://github.com/rapidsai/cudf/pull/12101)) [@rjzamora](https://github.com/rjzamora) +- Remove CUDA 10 compatibility code. ([#12088](https://github.com/rapidsai/cudf/pull/12088)) [@bdice](https://github.com/bdice) +- Move and update `dask` nigthly install in CI ([#12082](https://github.com/rapidsai/cudf/pull/12082)) [@galipremsagar](https://github.com/galipremsagar) +- Throw an error when libcudf is built without cuFile and `LIBCUDF_CUFILE_POLICY` is set to `"ALWAYS"` ([#12080](https://github.com/rapidsai/cudf/pull/12080)) [@vuule](https://github.com/vuule) +- Remove macros that inspect the contents of exceptions ([#12076](https://github.com/rapidsai/cudf/pull/12076)) [@vyasr](https://github.com/vyasr) +- Fix ingest_raw_data performance issue in Nested JSON reader due to RVO ([#12070](https://github.com/rapidsai/cudf/pull/12070)) [@karthikeyann](https://github.com/karthikeyann) +- Remove overflow error during decimal binops ([#12063](https://github.com/rapidsai/cudf/pull/12063)) [@galipremsagar](https://github.com/galipremsagar) +- Change cudf::detail::tdigest to cudf::tdigest::detail ([#12050](https://github.com/rapidsai/cudf/pull/12050)) [@davidwendt](https://github.com/davidwendt) +- Fix quantile gtests coded in namespace cudf::test ([#12049](https://github.com/rapidsai/cudf/pull/12049)) [@davidwendt](https://github.com/davidwendt) +- Add support for `DataFrame.from_dict`\`to_dict` and `Series.to_dict` ([#12048](https://github.com/rapidsai/cudf/pull/12048)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor Parquet reader ([#12046](https://github.com/rapidsai/cudf/pull/12046)) [@ttnghia](https://github.com/ttnghia) +- Forward merge 22.10 into 22.12 ([#12045](https://github.com/rapidsai/cudf/pull/12045)) [@vyasr](https://github.com/vyasr) +- Standardize newlines at ends of files. ([#12042](https://github.com/rapidsai/cudf/pull/12042)) [@bdice](https://github.com/bdice) +- Trim trailing whitespace from all files. ([#12041](https://github.com/rapidsai/cudf/pull/12041)) [@bdice](https://github.com/bdice) +- Use nosync policy in gather and scatter implementations. ([#12038](https://github.com/rapidsai/cudf/pull/12038)) [@bdice](https://github.com/bdice) +- Remove smart quotes from all docstrings. ([#12035](https://github.com/rapidsai/cudf/pull/12035)) [@bdice](https://github.com/bdice) +- Update cuda-python dependency to 11.7.1 ([#12030](https://github.com/rapidsai/cudf/pull/12030)) [@galipremsagar](https://github.com/galipremsagar) +- Add cython-lint to pre-commit checks. ([#12020](https://github.com/rapidsai/cudf/pull/12020)) [@bdice](https://github.com/bdice) +- Use pragma once ([#12019](https://github.com/rapidsai/cudf/pull/12019)) [@bdice](https://github.com/bdice) +- New GHA to add issues/prs to project board ([#12016](https://github.com/rapidsai/cudf/pull/12016)) [@jarmak-nv](https://github.com/jarmak-nv) +- Add DataFrame.pivot_table. ([#12015](https://github.com/rapidsai/cudf/pull/12015)) [@bdice](https://github.com/bdice) +- Rollback of `DeviceBufferLike` ([#12009](https://github.com/rapidsai/cudf/pull/12009)) [@madsbk](https://github.com/madsbk) +- Remove default parameters for nvtext::detail functions ([#12007](https://github.com/rapidsai/cudf/pull/12007)) [@davidwendt](https://github.com/davidwendt) +- Remove default parameters for cudf::dictionary::detail functions ([#12006](https://github.com/rapidsai/cudf/pull/12006)) [@davidwendt](https://github.com/davidwendt) +- Remove unused `managed_allocator` ([#12005](https://github.com/rapidsai/cudf/pull/12005)) [@vyasr](https://github.com/vyasr) +- Remove default parameters for cudf::strings::detail functions ([#12003](https://github.com/rapidsai/cudf/pull/12003)) [@davidwendt](https://github.com/davidwendt) +- Remove unnecessary code from dask-cudf _Frame ([#12001](https://github.com/rapidsai/cudf/pull/12001)) [@rjzamora](https://github.com/rjzamora) +- Ignore python docs build artifacts ([#12000](https://github.com/rapidsai/cudf/pull/12000)) [@galipremsagar](https://github.com/galipremsagar) +- Use rapids-cmake for google benchmark. ([#11997](https://github.com/rapidsai/cudf/pull/11997)) [@vyasr](https://github.com/vyasr) +- Leverage rapids_cython for more automated RPATH handling ([#11996](https://github.com/rapidsai/cudf/pull/11996)) [@vyasr](https://github.com/vyasr) +- Remove stale labeler ([#11995](https://github.com/rapidsai/cudf/pull/11995)) [@raydouglass](https://github.com/raydouglass) +- Move protobuf compilation to CMake ([#11986](https://github.com/rapidsai/cudf/pull/11986)) [@vyasr](https://github.com/vyasr) +- Replace most of preprocessor usage in nvcomp adapter with `constexpr` ([#11980](https://github.com/rapidsai/cudf/pull/11980)) [@vuule](https://github.com/vuule) +- Add missing noexcepts to column_in_metadata methods ([#11973](https://github.com/rapidsai/cudf/pull/11973)) [@vyasr](https://github.com/vyasr) +- Pass column names to `write_csv` instead of `table_metadata` pointer ([#11972](https://github.com/rapidsai/cudf/pull/11972)) [@vuule](https://github.com/vuule) +- Accelerate libcudf segmented sort with CUB segmented sort ([#11969](https://github.com/rapidsai/cudf/pull/11969)) [@davidwendt](https://github.com/davidwendt) +- Feature/remove default streams ([#11967](https://github.com/rapidsai/cudf/pull/11967)) [@vyasr](https://github.com/vyasr) +- Add pool memory resource to libcudf basic example ([#11966](https://github.com/rapidsai/cudf/pull/11966)) [@davidwendt](https://github.com/davidwendt) +- Fix some libcudf calls to cudf::detail::gather ([#11963](https://github.com/rapidsai/cudf/pull/11963)) [@davidwendt](https://github.com/davidwendt) +- Accept const refs instead of const unique_ptr refs in reduce and scan APIs. ([#11960](https://github.com/rapidsai/cudf/pull/11960)) [@vyasr](https://github.com/vyasr) +- Add deprecation warning for set_allocator. ([#11958](https://github.com/rapidsai/cudf/pull/11958)) [@vyasr](https://github.com/vyasr) +- Fix lists and structs gtests coded in namespace cudf::test ([#11956](https://github.com/rapidsai/cudf/pull/11956)) [@davidwendt](https://github.com/davidwendt) +- Add full page indexes to Parquet writer benchmarks ([#11955](https://github.com/rapidsai/cudf/pull/11955)) [@etseidl](https://github.com/etseidl) +- Use gather-based strings factory in cudf::strings::strip ([#11954](https://github.com/rapidsai/cudf/pull/11954)) [@davidwendt](https://github.com/davidwendt) +- Default to equal NaNs in make_merge_sets_aggregation. ([#11952](https://github.com/rapidsai/cudf/pull/11952)) [@bdice](https://github.com/bdice) +- Add `strip_delimiters` option to `read_text` ([#11946](https://github.com/rapidsai/cudf/pull/11946)) [@upsj](https://github.com/upsj) +- Refactor multibyte_split `output_builder` ([#11945](https://github.com/rapidsai/cudf/pull/11945)) [@upsj](https://github.com/upsj) +- Remove validation that requires introspection ([#11938](https://github.com/rapidsai/cudf/pull/11938)) [@vyasr](https://github.com/vyasr) +- Add `.str.find_multiple` API ([#11928](https://github.com/rapidsai/cudf/pull/11928)) [@galipremsagar](https://github.com/galipremsagar) +- Add regex_program class for use with all regex APIs ([#11927](https://github.com/rapidsai/cudf/pull/11927)) [@davidwendt](https://github.com/davidwendt) +- Enable backend dispatching for Dask-DataFrame creation ([#11920](https://github.com/rapidsai/cudf/pull/11920)) [@rjzamora](https://github.com/rjzamora) +- Performance improvement in JSON Tree traversal ([#11919](https://github.com/rapidsai/cudf/pull/11919)) [@karthikeyann](https://github.com/karthikeyann) +- Fix some gtests incorrectly coded in namespace cudf::test (part I) ([#11917](https://github.com/rapidsai/cudf/pull/11917)) [@davidwendt](https://github.com/davidwendt) +- Refactor pad/zfill functions for reuse with strings udf ([#11914](https://github.com/rapidsai/cudf/pull/11914)) [@davidwendt](https://github.com/davidwendt) +- Add `nanosecond` & `microsecond` to `DatetimeProperties` ([#11911](https://github.com/rapidsai/cudf/pull/11911)) [@galipremsagar](https://github.com/galipremsagar) +- Pin mimesis version in setup.py. ([#11906](https://github.com/rapidsai/cudf/pull/11906)) [@bdice](https://github.com/bdice) +- Error on `ListColumn` or any new unsupported column in `cudf.Index` ([#11902](https://github.com/rapidsai/cudf/pull/11902)) [@galipremsagar](https://github.com/galipremsagar) +- Add thrust output iterator fix (1805) to thrust.patch ([#11900](https://github.com/rapidsai/cudf/pull/11900)) [@davidwendt](https://github.com/davidwendt) +- Relax `codecov` threshold diff ([#11899](https://github.com/rapidsai/cudf/pull/11899)) [@galipremsagar](https://github.com/galipremsagar) +- Use public APIs in STREAM_COMPACTION_NVBENCH ([#11892](https://github.com/rapidsai/cudf/pull/11892)) [@GregoryKimball](https://github.com/GregoryKimball) +- Add coverage for string UDF tests. ([#11891](https://github.com/rapidsai/cudf/pull/11891)) [@vyasr](https://github.com/vyasr) +- Provide `data_chunk_source` wrapper for `datasource` ([#11886](https://github.com/rapidsai/cudf/pull/11886)) [@upsj](https://github.com/upsj) +- Handle `multibyte_split` byte_range out-of-bounds offsets on host ([#11885](https://github.com/rapidsai/cudf/pull/11885)) [@upsj](https://github.com/upsj) +- Add tests ensuring that cudf's default stream is always used ([#11875](https://github.com/rapidsai/cudf/pull/11875)) [@vyasr](https://github.com/vyasr) +- Change expect_strings_empty into expect_column_empty libcudf test utility ([#11873](https://github.com/rapidsai/cudf/pull/11873)) [@davidwendt](https://github.com/davidwendt) +- Add ngroup ([#11871](https://github.com/rapidsai/cudf/pull/11871)) [@shwina](https://github.com/shwina) +- Reduce memory usage in nested JSON parser - tree generation ([#11864](https://github.com/rapidsai/cudf/pull/11864)) [@karthikeyann](https://github.com/karthikeyann) +- Unpin `dask` and `distributed` for development ([#11859](https://github.com/rapidsai/cudf/pull/11859)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unused includes for table/row_operators ([#11857](https://github.com/rapidsai/cudf/pull/11857)) [@GregoryKimball](https://github.com/GregoryKimball) +- Use conda-forge's `pyorc` ([#11855](https://github.com/rapidsai/cudf/pull/11855)) [@jakirkham](https://github.com/jakirkham) +- Add libcudf strings examples ([#11849](https://github.com/rapidsai/cudf/pull/11849)) [@davidwendt](https://github.com/davidwendt) +- Remove `cudf_io` namespace alias ([#11827](https://github.com/rapidsai/cudf/pull/11827)) [@vuule](https://github.com/vuule) +- Test/remove thrust vector usage ([#11813](https://github.com/rapidsai/cudf/pull/11813)) [@vyasr](https://github.com/vyasr) +- Add BGZIP reader to python `read_text` ([#11802](https://github.com/rapidsai/cudf/pull/11802)) [@upsj](https://github.com/upsj) +- Merge branch-22.10 into branch-22.12 ([#11801](https://github.com/rapidsai/cudf/pull/11801)) [@davidwendt](https://github.com/davidwendt) +- Fix compile warning from CUDF_FUNC_RANGE in a member function ([#11798](https://github.com/rapidsai/cudf/pull/11798)) [@davidwendt](https://github.com/davidwendt) +- Update cudf JNI version to 22.12.0-SNAPSHOT ([#11764](https://github.com/rapidsai/cudf/pull/11764)) [@pxLi](https://github.com/pxLi) +- Update flake8 to 5.0.4 and use flake8-force to check Cython. ([#11736](https://github.com/rapidsai/cudf/pull/11736)) [@bdice](https://github.com/bdice) +- Add BGZIP multibyte_split benchmark ([#11723](https://github.com/rapidsai/cudf/pull/11723)) [@upsj](https://github.com/upsj) +- Bifurcate Dependency Lists ([#11674](https://github.com/rapidsai/cudf/pull/11674)) [@bdice](https://github.com/bdice) +- Default to equal NaNs in make_collect_set_aggregation. ([#11621](https://github.com/rapidsai/cudf/pull/11621)) [@bdice](https://github.com/bdice) +- Conform "bench_isin" to match generator column names ([#11549](https://github.com/rapidsai/cudf/pull/11549)) [@GregoryKimball](https://github.com/GregoryKimball) +- Removing int8 column option from parquet byte_array writing ([#11539](https://github.com/rapidsai/cudf/pull/11539)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add checks for HLG layers in dask-cudf groupby tests ([#10853](https://github.com/rapidsai/cudf/pull/10853)) [@charlesbluca](https://github.com/charlesbluca) +- part1: Simplify BaseIndex to an abstract class ([#10389](https://github.com/rapidsai/cudf/pull/10389)) [@skirui-source](https://github.com/skirui-source) +- Make all `nvcc` warnings into errors ([#8916](https://github.com/rapidsai/cudf/pull/8916)) [@trxcllnt](https://github.com/trxcllnt) + +# cuDF 22.10.00 (12 Oct 2022) + +## 🚨 Breaking Changes + +- Disable Zstandard decompression on nvCOMP 2.4 and Pascal GPus ([#11856](https://github.com/rapidsai/cudf/pull/11856)) [@vuule](https://github.com/vuule) +- Disable nvCOMP DEFLATE integration ([#11811](https://github.com/rapidsai/cudf/pull/11811)) [@vuule](https://github.com/vuule) +- Fix return type of `Index.isna` & `Index.notna` ([#11769](https://github.com/rapidsai/cudf/pull/11769)) [@galipremsagar](https://github.com/galipremsagar) +- Remove `kwargs` in `read_csv` & `to_csv` ([#11762](https://github.com/rapidsai/cudf/pull/11762)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `cudf::partition*` APIs that do not return offsets for empty output table ([#11709](https://github.com/rapidsai/cudf/pull/11709)) [@ttnghia](https://github.com/ttnghia) +- Fix regex negated classes to not automatically include new-lines ([#11644](https://github.com/rapidsai/cudf/pull/11644)) [@davidwendt](https://github.com/davidwendt) +- Update zfill to match Python output ([#11634](https://github.com/rapidsai/cudf/pull/11634)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `pandas` to `1.5` ([#11617](https://github.com/rapidsai/cudf/pull/11617)) [@galipremsagar](https://github.com/galipremsagar) +- Change default value of `ordered` to `False` in `CategoricalDtype` ([#11604](https://github.com/rapidsai/cudf/pull/11604)) [@galipremsagar](https://github.com/galipremsagar) +- Move cudf::strings::findall_record to cudf::strings::findall ([#11575](https://github.com/rapidsai/cudf/pull/11575)) [@davidwendt](https://github.com/davidwendt) +- Adding optional parquet reader schema ([#11524](https://github.com/rapidsai/cudf/pull/11524)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Deprecate `skiprows` and `num_rows` in `read_orc` ([#11522](https://github.com/rapidsai/cudf/pull/11522)) [@galipremsagar](https://github.com/galipremsagar) +- Remove support for skip_rows / num_rows options in the parquet reader. ([#11503](https://github.com/rapidsai/cudf/pull/11503)) [@nvdbaranec](https://github.com/nvdbaranec) +- Drop support for `skiprows` and `num_rows` in `cudf.read_parquet` ([#11480](https://github.com/rapidsai/cudf/pull/11480)) [@galipremsagar](https://github.com/galipremsagar) +- Disable Arrow S3 support by default. ([#11470](https://github.com/rapidsai/cudf/pull/11470)) [@bdice](https://github.com/bdice) +- Convert thrust::optional usages to std::optional ([#11455](https://github.com/rapidsai/cudf/pull/11455)) [@robertmaynard](https://github.com/robertmaynard) +- Remove unused is_struct trait. ([#11450](https://github.com/rapidsai/cudf/pull/11450)) [@bdice](https://github.com/bdice) +- Refactor the `Buffer` class ([#11447](https://github.com/rapidsai/cudf/pull/11447)) [@madsbk](https://github.com/madsbk) +- Return empty dataframe when reading an ORC file using empty `columns` option ([#11446](https://github.com/rapidsai/cudf/pull/11446)) [@vuule](https://github.com/vuule) +- Refactor pad_side and strip_type enums into side_type enum ([#11438](https://github.com/rapidsai/cudf/pull/11438)) [@davidwendt](https://github.com/davidwendt) +- Remove HASH_SERIAL_MURMUR3 / serial32BitMurmurHash3 ([#11383](https://github.com/rapidsai/cudf/pull/11383)) [@bdice](https://github.com/bdice) +- Use the new JSON parser when the experimental reader is selected ([#11364](https://github.com/rapidsai/cudf/pull/11364)) [@vuule](https://github.com/vuule) +- Remove deprecated Series.applymap. ([#11031](https://github.com/rapidsai/cudf/pull/11031)) [@bdice](https://github.com/bdice) +- Remove deprecated expand parameter from str.findall. ([#11030](https://github.com/rapidsai/cudf/pull/11030)) [@bdice](https://github.com/bdice) + +## 🐛 Bug Fixes + +- Fixes bug in temporary decompression space estimation before calling nvcomp ([#11879](https://github.com/rapidsai/cudf/pull/11879)) [@abellina](https://github.com/abellina) +- Handle `ptx` file paths during `strings_udf` import ([#11862](https://github.com/rapidsai/cudf/pull/11862)) [@galipremsagar](https://github.com/galipremsagar) +- Disable Zstandard decompression on nvCOMP 2.4 and Pascal GPus ([#11856](https://github.com/rapidsai/cudf/pull/11856)) [@vuule](https://github.com/vuule) +- Reset `strings_udf` CEC and solve several related issues ([#11846](https://github.com/rapidsai/cudf/pull/11846)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix bug in new shuffle-based groupby implementation ([#11836](https://github.com/rapidsai/cudf/pull/11836)) [@rjzamora](https://github.com/rjzamora) +- Fix `is_valid` checks in `Scalar._binaryop` ([#11818](https://github.com/rapidsai/cudf/pull/11818)) [@wence-](https://github.com/wence-) +- Fix operator `NotImplemented` issue with `numpy` ([#11816](https://github.com/rapidsai/cudf/pull/11816)) [@galipremsagar](https://github.com/galipremsagar) +- Disable nvCOMP DEFLATE integration ([#11811](https://github.com/rapidsai/cudf/pull/11811)) [@vuule](https://github.com/vuule) +- Build `strings_udf` package with other python packages in nightlies ([#11808](https://github.com/rapidsai/cudf/pull/11808)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Revert problematic shuffle=explicit-comms changes ([#11803](https://github.com/rapidsai/cudf/pull/11803)) [@rjzamora](https://github.com/rjzamora) +- Fix regex out-of-bounds write in strided rows logic ([#11797](https://github.com/rapidsai/cudf/pull/11797)) [@davidwendt](https://github.com/davidwendt) +- Build `cudf` locally before building `strings_udf` conda packages in CI ([#11785](https://github.com/rapidsai/cudf/pull/11785)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix an issue in cudf::row_bit_count involving structs and lists at multiple levels. ([#11779](https://github.com/rapidsai/cudf/pull/11779)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix return type of `Index.isna` & `Index.notna` ([#11769](https://github.com/rapidsai/cudf/pull/11769)) [@galipremsagar](https://github.com/galipremsagar) +- Fix issue with set-item in case of `list` and `struct` types ([#11760](https://github.com/rapidsai/cudf/pull/11760)) [@galipremsagar](https://github.com/galipremsagar) +- Ensure all libcudf APIs run on cudf's default stream ([#11759](https://github.com/rapidsai/cudf/pull/11759)) [@vyasr](https://github.com/vyasr) +- Resolve dask_cudf failures caused by upstream groupby changes ([#11755](https://github.com/rapidsai/cudf/pull/11755)) [@rjzamora](https://github.com/rjzamora) +- Fix ORC string sum statistics ([#11740](https://github.com/rapidsai/cudf/pull/11740)) [@vuule](https://github.com/vuule) +- Add `strings_udf` package for python 3.9 ([#11730](https://github.com/rapidsai/cudf/pull/11730)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Ensure that all tests launch kernels on cudf's default stream ([#11726](https://github.com/rapidsai/cudf/pull/11726)) [@vyasr](https://github.com/vyasr) +- Don't assume stream is a compile-time constant expression ([#11725](https://github.com/rapidsai/cudf/pull/11725)) [@vyasr](https://github.com/vyasr) +- Fix get_thrust.cmake format at patch command ([#11715](https://github.com/rapidsai/cudf/pull/11715)) [@davidwendt](https://github.com/davidwendt) +- Fix `cudf::partition*` APIs that do not return offsets for empty output table ([#11709](https://github.com/rapidsai/cudf/pull/11709)) [@ttnghia](https://github.com/ttnghia) +- Fix cudf::lists::sort_lists for NaN and Infinity values ([#11703](https://github.com/rapidsai/cudf/pull/11703)) [@davidwendt](https://github.com/davidwendt) +- Modify ORC reader timestamp parsing to match the apache reader behavior ([#11699](https://github.com/rapidsai/cudf/pull/11699)) [@vuule](https://github.com/vuule) +- Fix `DataFrame.from_arrow` to preserve type metadata ([#11698](https://github.com/rapidsai/cudf/pull/11698)) [@galipremsagar](https://github.com/galipremsagar) +- Fix compile error due to missing header ([#11697](https://github.com/rapidsai/cudf/pull/11697)) [@ttnghia](https://github.com/ttnghia) +- Default to Snappy compression in `to_orc` when using cuDF or Dask ([#11690](https://github.com/rapidsai/cudf/pull/11690)) [@vuule](https://github.com/vuule) +- Fix an issue related to `Multindex` when `group_keys=True` ([#11689](https://github.com/rapidsai/cudf/pull/11689)) [@galipremsagar](https://github.com/galipremsagar) +- Transfer correct dtype to exploded column ([#11687](https://github.com/rapidsai/cudf/pull/11687)) [@wence-](https://github.com/wence-) +- Ignore protobuf generated files in `mypy` checks ([#11685](https://github.com/rapidsai/cudf/pull/11685)) [@galipremsagar](https://github.com/galipremsagar) +- Maintain the index name after `.loc` ([#11677](https://github.com/rapidsai/cudf/pull/11677)) [@shwina](https://github.com/shwina) +- Fix issue with extracting nested column data & dtype preservation ([#11671](https://github.com/rapidsai/cudf/pull/11671)) [@galipremsagar](https://github.com/galipremsagar) +- Ensure that all cudf tests and benchmarks are conda env aware ([#11666](https://github.com/rapidsai/cudf/pull/11666)) [@robertmaynard](https://github.com/robertmaynard) +- Update to Thrust 1.17.2 to fix cub ODR issues ([#11665](https://github.com/rapidsai/cudf/pull/11665)) [@robertmaynard](https://github.com/robertmaynard) +- Fix multi-file remote datasource bug ([#11655](https://github.com/rapidsai/cudf/pull/11655)) [@rjzamora](https://github.com/rjzamora) +- Fix invalid regex quantifier check to not include alternation ([#11654](https://github.com/rapidsai/cudf/pull/11654)) [@davidwendt](https://github.com/davidwendt) +- Fix bug in `device_write()`: it uses an incorrect size ([#11651](https://github.com/rapidsai/cudf/pull/11651)) [@madsbk](https://github.com/madsbk) +- fixes overflows in benchmarks ([#11649](https://github.com/rapidsai/cudf/pull/11649)) [@elstehle](https://github.com/elstehle) +- Fix regex negated classes to not automatically include new-lines ([#11644](https://github.com/rapidsai/cudf/pull/11644)) [@davidwendt](https://github.com/davidwendt) +- Fix compile error in benchmark nested_json.cpp ([#11637](https://github.com/rapidsai/cudf/pull/11637)) [@davidwendt](https://github.com/davidwendt) +- Update zfill to match Python output ([#11634](https://github.com/rapidsai/cudf/pull/11634)) [@davidwendt](https://github.com/davidwendt) +- Removed converted type for INT32 and INT64 since they do not convert ([#11627](https://github.com/rapidsai/cudf/pull/11627)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix host scalars construction of nested types ([#11612](https://github.com/rapidsai/cudf/pull/11612)) [@galipremsagar](https://github.com/galipremsagar) +- Fix compile warning in nested_json_gpu.cu ([#11607](https://github.com/rapidsai/cudf/pull/11607)) [@davidwendt](https://github.com/davidwendt) +- Change default value of `ordered` to `False` in `CategoricalDtype` ([#11604](https://github.com/rapidsai/cudf/pull/11604)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve order if necessary when deduping categoricals internally ([#11597](https://github.com/rapidsai/cudf/pull/11597)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add is_timestamp test for leap second (60) ([#11594](https://github.com/rapidsai/cudf/pull/11594)) [@davidwendt](https://github.com/davidwendt) +- Fix an issue with `to_arrow` when column name type is not a string ([#11590](https://github.com/rapidsai/cudf/pull/11590)) [@galipremsagar](https://github.com/galipremsagar) +- Fix exception in segmented-reduce benchmark ([#11588](https://github.com/rapidsai/cudf/pull/11588)) [@davidwendt](https://github.com/davidwendt) +- Fix encode/decode of negative timestamps in ORC reader/writer ([#11586](https://github.com/rapidsai/cudf/pull/11586)) [@vuule](https://github.com/vuule) +- Correct distribution data type in `quantiles` benchmark ([#11584](https://github.com/rapidsai/cudf/pull/11584)) [@vuule](https://github.com/vuule) +- Fix multibyte_split benchmark for host buffers ([#11583](https://github.com/rapidsai/cudf/pull/11583)) [@upsj](https://github.com/upsj) +- xfail custreamz display test for now ([#11567](https://github.com/rapidsai/cudf/pull/11567)) [@shwina](https://github.com/shwina) +- Fix JNI for TableWithMeta to use schema_info instead of column_names ([#11566](https://github.com/rapidsai/cudf/pull/11566)) [@jlowe](https://github.com/jlowe) +- Reduce code duplication for `dask` & `distributed` nightly/stable installs ([#11565](https://github.com/rapidsai/cudf/pull/11565)) [@galipremsagar](https://github.com/galipremsagar) +- Fix groupby failures in dask_cudf CI ([#11561](https://github.com/rapidsai/cudf/pull/11561)) [@rjzamora](https://github.com/rjzamora) +- Fix for pivot: error when 'values' is a multicharacter string ([#11538](https://github.com/rapidsai/cudf/pull/11538)) [@shaswat-indian](https://github.com/shaswat-indian) +- find_package(cudf) + arrow9 usable with cudf build directory ([#11535](https://github.com/rapidsai/cudf/pull/11535)) [@robertmaynard](https://github.com/robertmaynard) +- Fixing crash when writing binary nested data in parquet ([#11526](https://github.com/rapidsai/cudf/pull/11526)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix for: error when assigning a value to an empty series ([#11523](https://github.com/rapidsai/cudf/pull/11523)) [@shaswat-indian](https://github.com/shaswat-indian) +- Fix invalid results from conditional-left-anti-join in debug build ([#11517](https://github.com/rapidsai/cudf/pull/11517)) [@davidwendt](https://github.com/davidwendt) +- Fix cmake error after upgrading to Arrow 9 ([#11513](https://github.com/rapidsai/cudf/pull/11513)) [@ttnghia](https://github.com/ttnghia) +- Fix reverse binary operators acting on a host value and cudf.Scalar ([#11512](https://github.com/rapidsai/cudf/pull/11512)) [@bdice](https://github.com/bdice) +- Update parquet fuzz tests to drop support for `skiprows` & `num_rows` ([#11505](https://github.com/rapidsai/cudf/pull/11505)) [@galipremsagar](https://github.com/galipremsagar) +- Use rapids-cmake 22.10 best practice for RAPIDS.cmake location ([#11493](https://github.com/rapidsai/cudf/pull/11493)) [@robertmaynard](https://github.com/robertmaynard) +- Handle some zero-sized corner cases in dlpack interop ([#11449](https://github.com/rapidsai/cudf/pull/11449)) [@wence-](https://github.com/wence-) +- Return empty dataframe when reading an ORC file using empty `columns` option ([#11446](https://github.com/rapidsai/cudf/pull/11446)) [@vuule](https://github.com/vuule) +- libcudf c++ example updated to CPM version 0.35.3 ([#11417](https://github.com/rapidsai/cudf/pull/11417)) [@robertmaynard](https://github.com/robertmaynard) +- Fix regex quantifier check to include capture groups ([#11373](https://github.com/rapidsai/cudf/pull/11373)) [@davidwendt](https://github.com/davidwendt) +- Fix read_text when byte_range is aligned with field ([#11371](https://github.com/rapidsai/cudf/pull/11371)) [@upsj](https://github.com/upsj) +- Fix to_timestamps truncated subsecond calculation ([#11367](https://github.com/rapidsai/cudf/pull/11367)) [@davidwendt](https://github.com/davidwendt) +- column: calculate null_count before release()ing the cudf::column ([#11365](https://github.com/rapidsai/cudf/pull/11365)) [@wence-](https://github.com/wence-) + +## 📖 Documentation + +- Update `guide-to-udfs` notebook ([#11861](https://github.com/rapidsai/cudf/pull/11861)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Update docstring for cudf.read_text ([#11799](https://github.com/rapidsai/cudf/pull/11799)) [@GregoryKimball](https://github.com/GregoryKimball) +- Add doc section for `list` & `struct` handling ([#11770](https://github.com/rapidsai/cudf/pull/11770)) [@galipremsagar](https://github.com/galipremsagar) +- Document that minimum required CMake version is now 3.23.1 ([#11751](https://github.com/rapidsai/cudf/pull/11751)) [@robertmaynard](https://github.com/robertmaynard) +- Update libcudf documentation build command in DOCUMENTATION.md ([#11735](https://github.com/rapidsai/cudf/pull/11735)) [@davidwendt](https://github.com/davidwendt) +- Add docs for use of string data to `DataFrame.apply` and `Series.apply` and update guide to UDFs notebook ([#11733](https://github.com/rapidsai/cudf/pull/11733)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enable more Pydocstyle rules ([#11582](https://github.com/rapidsai/cudf/pull/11582)) [@bdice](https://github.com/bdice) +- Remove unused cpp/img folder ([#11554](https://github.com/rapidsai/cudf/pull/11554)) [@davidwendt](https://github.com/davidwendt) +- Publish C++ developer docs ([#11475](https://github.com/rapidsai/cudf/pull/11475)) [@vyasr](https://github.com/vyasr) +- Fix a misalignment in `cudf.get_dummies` docstring ([#11443](https://github.com/rapidsai/cudf/pull/11443)) [@galipremsagar](https://github.com/galipremsagar) +- Update contributing doc to include links to the developer guides ([#11390](https://github.com/rapidsai/cudf/pull/11390)) [@davidwendt](https://github.com/davidwendt) +- Fix table_view_base doxygen format ([#11340](https://github.com/rapidsai/cudf/pull/11340)) [@davidwendt](https://github.com/davidwendt) +- Create main developer guide for Python ([#11235](https://github.com/rapidsai/cudf/pull/11235)) [@vyasr](https://github.com/vyasr) +- Add developer documentation for benchmarking ([#11122](https://github.com/rapidsai/cudf/pull/11122)) [@vyasr](https://github.com/vyasr) +- cuDF error handling document ([#7917](https://github.com/rapidsai/cudf/pull/7917)) [@isVoid](https://github.com/isVoid) + +## 🚀 New Features + +- Add hasNull statistic reading ability to ORC ([#11747](https://github.com/rapidsai/cudf/pull/11747)) [@devavret](https://github.com/devavret) +- Add `istitle` to string UDFs ([#11738](https://github.com/rapidsai/cudf/pull/11738)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- JSON Column creation in GPU ([#11714](https://github.com/rapidsai/cudf/pull/11714)) [@karthikeyann](https://github.com/karthikeyann) +- Adds option to take explicit nested schema for nested JSON reader ([#11682](https://github.com/rapidsai/cudf/pull/11682)) [@elstehle](https://github.com/elstehle) +- Add BGZIP `data_chunk_reader` ([#11652](https://github.com/rapidsai/cudf/pull/11652)) [@upsj](https://github.com/upsj) +- Support DECIMAL order-by for RANGE window functions ([#11645](https://github.com/rapidsai/cudf/pull/11645)) [@mythrocks](https://github.com/mythrocks) +- changing version of cmake to 3.23.3 ([#11619](https://github.com/rapidsai/cudf/pull/11619)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Generate unique keys table in java JNI `contiguousSplitGroups` ([#11614](https://github.com/rapidsai/cudf/pull/11614)) [@res-life](https://github.com/res-life) +- Generic type casting to support the new nested JSON reader ([#11613](https://github.com/rapidsai/cudf/pull/11613)) [@elstehle](https://github.com/elstehle) +- JSON tree traversal ([#11610](https://github.com/rapidsai/cudf/pull/11610)) [@karthikeyann](https://github.com/karthikeyann) +- Add casting operators to masked UDFs ([#11578](https://github.com/rapidsai/cudf/pull/11578)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Adds type inference and type conversion for leaf-columns to the nested JSON parser ([#11574](https://github.com/rapidsai/cudf/pull/11574)) [@elstehle](https://github.com/elstehle) +- Add strings 'like' function ([#11558](https://github.com/rapidsai/cudf/pull/11558)) [@davidwendt](https://github.com/davidwendt) +- Handle hyphen as literal for regex cclass when incomplete range ([#11557](https://github.com/rapidsai/cudf/pull/11557)) [@davidwendt](https://github.com/davidwendt) +- Enable ZSTD compression in ORC and Parquet writers ([#11551](https://github.com/rapidsai/cudf/pull/11551)) [@vuule](https://github.com/vuule) +- Adds support for json lines format to the nested JSON reader ([#11534](https://github.com/rapidsai/cudf/pull/11534)) [@elstehle](https://github.com/elstehle) +- Adding optional parquet reader schema ([#11524](https://github.com/rapidsai/cudf/pull/11524)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Adds GPU implementation of JSON-token-stream to JSON-tree ([#11518](https://github.com/rapidsai/cudf/pull/11518)) [@karthikeyann](https://github.com/karthikeyann) +- Add `gdb` pretty-printers for simple types ([#11499](https://github.com/rapidsai/cudf/pull/11499)) [@upsj](https://github.com/upsj) +- Add `create_random_column` function to the data generator ([#11490](https://github.com/rapidsai/cudf/pull/11490)) [@vuule](https://github.com/vuule) +- Add fluent API builder to `data_profile` ([#11479](https://github.com/rapidsai/cudf/pull/11479)) [@vuule](https://github.com/vuule) +- Adds Nested Json benchmark ([#11466](https://github.com/rapidsai/cudf/pull/11466)) [@karthikeyann](https://github.com/karthikeyann) +- Convert thrust::optional usages to std::optional ([#11455](https://github.com/rapidsai/cudf/pull/11455)) [@robertmaynard](https://github.com/robertmaynard) +- Python API for the future experimental JSON reader ([#11426](https://github.com/rapidsai/cudf/pull/11426)) [@vuule](https://github.com/vuule) +- Return schema info from JSON reader ([#11419](https://github.com/rapidsai/cudf/pull/11419)) [@vuule](https://github.com/vuule) +- Add regex ASCII flag support for matching builtin character classes ([#11404](https://github.com/rapidsai/cudf/pull/11404)) [@davidwendt](https://github.com/davidwendt) +- Truncate parquet column indexes ([#11403](https://github.com/rapidsai/cudf/pull/11403)) [@etseidl](https://github.com/etseidl) +- Adds the end-to-end JSON parser implementation ([#11388](https://github.com/rapidsai/cudf/pull/11388)) [@elstehle](https://github.com/elstehle) +- Use the new JSON parser when the experimental reader is selected ([#11364](https://github.com/rapidsai/cudf/pull/11364)) [@vuule](https://github.com/vuule) +- Add placeholder for the experimental JSON reader ([#11334](https://github.com/rapidsai/cudf/pull/11334)) [@vuule](https://github.com/vuule) +- Add read-only functions on string dtypes to `DataFrame.apply` and `Series.apply` ([#11319](https://github.com/rapidsai/cudf/pull/11319)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Added 'crosstab' and 'pivot_table' features ([#11314](https://github.com/rapidsai/cudf/pull/11314)) [@shaswat-indian](https://github.com/shaswat-indian) +- Quickly error out when trying to build with unsupported nvcc versions ([#11297](https://github.com/rapidsai/cudf/pull/11297)) [@robertmaynard](https://github.com/robertmaynard) +- Adds JSON tokenizer ([#11264](https://github.com/rapidsai/cudf/pull/11264)) [@elstehle](https://github.com/elstehle) +- List lexicographic comparator ([#11129](https://github.com/rapidsai/cudf/pull/11129)) [@devavret](https://github.com/devavret) +- Add generic type inference for cuIO ([#11121](https://github.com/rapidsai/cudf/pull/11121)) [@PointKernel](https://github.com/PointKernel) +- Fully support nested types in `cudf::contains` ([#10656](https://github.com/rapidsai/cudf/pull/10656)) [@ttnghia](https://github.com/ttnghia) +- Support nested types in `lists::contains` ([#10548](https://github.com/rapidsai/cudf/pull/10548)) [@ttnghia](https://github.com/ttnghia) + +## 🛠️ Improvements + +- Pin `dask` and `distributed` for release ([#11822](https://github.com/rapidsai/cudf/pull/11822)) [@galipremsagar](https://github.com/galipremsagar) +- Add examples for Nested JSON reader ([#11814](https://github.com/rapidsai/cudf/pull/11814)) [@GregoryKimball](https://github.com/GregoryKimball) +- Support shuffle-based groupby aggregations in dask_cudf ([#11800](https://github.com/rapidsai/cudf/pull/11800)) [@rjzamora](https://github.com/rjzamora) +- Update strings udf version updater script ([#11772](https://github.com/rapidsai/cudf/pull/11772)) [@galipremsagar](https://github.com/galipremsagar) +- Remove `kwargs` in `read_csv` & `to_csv` ([#11762](https://github.com/rapidsai/cudf/pull/11762)) [@galipremsagar](https://github.com/galipremsagar) +- Pass `dtype` param to avoid `pd.Series` warnings ([#11761](https://github.com/rapidsai/cudf/pull/11761)) [@galipremsagar](https://github.com/galipremsagar) +- Enable `schema_element` & `keep_quotes` support in json reader ([#11746](https://github.com/rapidsai/cudf/pull/11746)) [@galipremsagar](https://github.com/galipremsagar) +- Add ability to construct `ListColumn` when size is `None` ([#11745](https://github.com/rapidsai/cudf/pull/11745)) [@galipremsagar](https://github.com/galipremsagar) +- Reduces memory requirements in JSON parser and adds bytes/s and peak memory usage to benchmarks ([#11732](https://github.com/rapidsai/cudf/pull/11732)) [@elstehle](https://github.com/elstehle) +- Add missing copyright headers. ([#11712](https://github.com/rapidsai/cudf/pull/11712)) [@bdice](https://github.com/bdice) +- Fix copyright check issues in pre-commit ([#11711](https://github.com/rapidsai/cudf/pull/11711)) [@bdice](https://github.com/bdice) +- Include decimal in supported types for range window order-by columns ([#11710](https://github.com/rapidsai/cudf/pull/11710)) [@mythrocks](https://github.com/mythrocks) +- Disable very large column gtest for contiguous-split ([#11706](https://github.com/rapidsai/cudf/pull/11706)) [@davidwendt](https://github.com/davidwendt) +- Drop split_out=None test from groupby.agg ([#11704](https://github.com/rapidsai/cudf/pull/11704)) [@wence-](https://github.com/wence-) +- Use CubinLinker for CUDA Minor Version Compatibility ([#11701](https://github.com/rapidsai/cudf/pull/11701)) [@gmarkall](https://github.com/gmarkall) +- Add regex capture-group parameter to auto convert to non-capture groups ([#11695](https://github.com/rapidsai/cudf/pull/11695)) [@davidwendt](https://github.com/davidwendt) +- Add a `__dataframe__` method to the protocol dataframe object ([#11692](https://github.com/rapidsai/cudf/pull/11692)) [@rgommers](https://github.com/rgommers) +- Special-case multibyte_split for single-byte delimiter ([#11681](https://github.com/rapidsai/cudf/pull/11681)) [@upsj](https://github.com/upsj) +- Remove isort exclusions ([#11680](https://github.com/rapidsai/cudf/pull/11680)) [@bdice](https://github.com/bdice) +- Refactor CSV reader benchmarks with nvbench ([#11678](https://github.com/rapidsai/cudf/pull/11678)) [@PointKernel](https://github.com/PointKernel) +- Check conda recipe headers with pre-commit ([#11669](https://github.com/rapidsai/cudf/pull/11669)) [@bdice](https://github.com/bdice) +- Remove redundant style check for clang-format. ([#11668](https://github.com/rapidsai/cudf/pull/11668)) [@bdice](https://github.com/bdice) +- Add support for `group_keys` in `groupby` ([#11659](https://github.com/rapidsai/cudf/pull/11659)) [@galipremsagar](https://github.com/galipremsagar) +- Fix pandoc pinning. ([#11658](https://github.com/rapidsai/cudf/pull/11658)) [@bdice](https://github.com/bdice) +- Revert removal of skip_rows / num_rows options from the Parquet reader. ([#11657](https://github.com/rapidsai/cudf/pull/11657)) [@nvdbaranec](https://github.com/nvdbaranec) +- Update git metadata ([#11647](https://github.com/rapidsai/cudf/pull/11647)) [@bdice](https://github.com/bdice) +- Call set_null_count on a returning column if null-count is known ([#11646](https://github.com/rapidsai/cudf/pull/11646)) [@davidwendt](https://github.com/davidwendt) +- Fix some libcudf detail calls not passing the stream variable ([#11642](https://github.com/rapidsai/cudf/pull/11642)) [@davidwendt](https://github.com/davidwendt) +- Update to mypy 0.971 ([#11640](https://github.com/rapidsai/cudf/pull/11640)) [@wence-](https://github.com/wence-) +- Refactor strings strip functor to details header ([#11635](https://github.com/rapidsai/cudf/pull/11635)) [@davidwendt](https://github.com/davidwendt) +- Fix incorrect `nullCount` in `get_json_object` ([#11633](https://github.com/rapidsai/cudf/pull/11633)) [@trxcllnt](https://github.com/trxcllnt) +- Simplify `hostdevice_vector` ([#11631](https://github.com/rapidsai/cudf/pull/11631)) [@upsj](https://github.com/upsj) +- Refactor parquet writer benchmarks with nvbench ([#11623](https://github.com/rapidsai/cudf/pull/11623)) [@PointKernel](https://github.com/PointKernel) +- Rework contains_scalar to check nulls at runtime ([#11622](https://github.com/rapidsai/cudf/pull/11622)) [@davidwendt](https://github.com/davidwendt) +- Fix incorrect memory resource used in rolling temp columns ([#11618](https://github.com/rapidsai/cudf/pull/11618)) [@mythrocks](https://github.com/mythrocks) +- Upgrade `pandas` to `1.5` ([#11617](https://github.com/rapidsai/cudf/pull/11617)) [@galipremsagar](https://github.com/galipremsagar) +- Move type-dispatcher calls from traits.hpp to traits.cpp ([#11616](https://github.com/rapidsai/cudf/pull/11616)) [@davidwendt](https://github.com/davidwendt) +- Refactor parquet reader benchmarks with nvbench ([#11611](https://github.com/rapidsai/cudf/pull/11611)) [@PointKernel](https://github.com/PointKernel) +- Forward-merge branch-22.08 to branch-22.10 ([#11608](https://github.com/rapidsai/cudf/pull/11608)) [@bdice](https://github.com/bdice) +- Use stream in Java API. ([#11601](https://github.com/rapidsai/cudf/pull/11601)) [@bdice](https://github.com/bdice) +- Refactors of public/detail APIs, CUDF_FUNC_RANGE, stream handling. ([#11600](https://github.com/rapidsai/cudf/pull/11600)) [@bdice](https://github.com/bdice) +- Improve ORC writer benchmark with nvbench ([#11598](https://github.com/rapidsai/cudf/pull/11598)) [@PointKernel](https://github.com/PointKernel) +- Tune multibyte_split kernel ([#11587](https://github.com/rapidsai/cudf/pull/11587)) [@upsj](https://github.com/upsj) +- Move split_utils.cuh to strings/detail ([#11585](https://github.com/rapidsai/cudf/pull/11585)) [@davidwendt](https://github.com/davidwendt) +- Fix warnings due to compiler regression with `if constexpr` ([#11581](https://github.com/rapidsai/cudf/pull/11581)) [@ttnghia](https://github.com/ttnghia) +- Add full 24-bit dictionary support to Parquet writer ([#11580](https://github.com/rapidsai/cudf/pull/11580)) [@etseidl](https://github.com/etseidl) +- Expose "explicit-comms" option in shuffle-based dask_cudf functions ([#11576](https://github.com/rapidsai/cudf/pull/11576)) [@rjzamora](https://github.com/rjzamora) +- Move cudf::strings::findall_record to cudf::strings::findall ([#11575](https://github.com/rapidsai/cudf/pull/11575)) [@davidwendt](https://github.com/davidwendt) +- Refactor dask_cudf groupby to use apply_concat_apply ([#11571](https://github.com/rapidsai/cudf/pull/11571)) [@rjzamora](https://github.com/rjzamora) +- Add ability to write `list(struct)` columns as `map` type in orc writer ([#11568](https://github.com/rapidsai/cudf/pull/11568)) [@galipremsagar](https://github.com/galipremsagar) +- Add byte_range to multibyte_split benchmark + NVBench refactor ([#11562](https://github.com/rapidsai/cudf/pull/11562)) [@upsj](https://github.com/upsj) +- JNI support for writing binary columns in parquet ([#11556](https://github.com/rapidsai/cudf/pull/11556)) [@revans2](https://github.com/revans2) +- Support additional dictionary bit widths in Parquet writer ([#11547](https://github.com/rapidsai/cudf/pull/11547)) [@etseidl](https://github.com/etseidl) +- Refactor string/numeric conversion utilities ([#11545](https://github.com/rapidsai/cudf/pull/11545)) [@davidwendt](https://github.com/davidwendt) +- Removing unnecessary asserts in parquet tests ([#11544](https://github.com/rapidsai/cudf/pull/11544)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Clean up ORC reader benchmarks with NVBench ([#11543](https://github.com/rapidsai/cudf/pull/11543)) [@PointKernel](https://github.com/PointKernel) +- Reuse MurmurHash3_32 in Parquet page data. ([#11528](https://github.com/rapidsai/cudf/pull/11528)) [@bdice](https://github.com/bdice) +- Add hexadecimal value separators ([#11527](https://github.com/rapidsai/cudf/pull/11527)) [@bdice](https://github.com/bdice) +- Deprecate `skiprows` and `num_rows` in `read_orc` ([#11522](https://github.com/rapidsai/cudf/pull/11522)) [@galipremsagar](https://github.com/galipremsagar) +- Struct support for `NULL_EQUALS` binary operation ([#11520](https://github.com/rapidsai/cudf/pull/11520)) [@rwlee](https://github.com/rwlee) +- Bump hadoop-common from 3.2.3 to 3.2.4 in /java ([#11516](https://github.com/rapidsai/cudf/pull/11516)) [@dependabot[bot]](https://github.com/dependabot[bot]) +- Fix Feather test warning. ([#11511](https://github.com/rapidsai/cudf/pull/11511)) [@bdice](https://github.com/bdice) +- copy_range ballot_syncs to have no execution dependency ([#11508](https://github.com/rapidsai/cudf/pull/11508)) [@robertmaynard](https://github.com/robertmaynard) +- Upgrade to `arrow-9.x` ([#11507](https://github.com/rapidsai/cudf/pull/11507)) [@galipremsagar](https://github.com/galipremsagar) +- Remove support for skip_rows / num_rows options in the parquet reader. ([#11503](https://github.com/rapidsai/cudf/pull/11503)) [@nvdbaranec](https://github.com/nvdbaranec) +- Single-pass `multibyte_split` ([#11500](https://github.com/rapidsai/cudf/pull/11500)) [@upsj](https://github.com/upsj) +- Sanitize percentile_approx() output for empty input ([#11498](https://github.com/rapidsai/cudf/pull/11498)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Unpin `dask` and `distributed` for development ([#11492](https://github.com/rapidsai/cudf/pull/11492)) [@galipremsagar](https://github.com/galipremsagar) +- Move SparkMurmurHash3_32 functor. ([#11489](https://github.com/rapidsai/cudf/pull/11489)) [@bdice](https://github.com/bdice) +- Refactor group_nunique.cu to use nullate::DYNAMIC for reduce-by-key functor ([#11482](https://github.com/rapidsai/cudf/pull/11482)) [@davidwendt](https://github.com/davidwendt) +- Drop support for `skiprows` and `num_rows` in `cudf.read_parquet` ([#11480](https://github.com/rapidsai/cudf/pull/11480)) [@galipremsagar](https://github.com/galipremsagar) +- Add reduction `distinct_count` benchmark ([#11473](https://github.com/rapidsai/cudf/pull/11473)) [@ttnghia](https://github.com/ttnghia) +- Add groupby `nunique` aggregation benchmark ([#11472](https://github.com/rapidsai/cudf/pull/11472)) [@ttnghia](https://github.com/ttnghia) +- Disable Arrow S3 support by default. ([#11470](https://github.com/rapidsai/cudf/pull/11470)) [@bdice](https://github.com/bdice) +- Add groupby `max` aggregation benchmark ([#11464](https://github.com/rapidsai/cudf/pull/11464)) [@ttnghia](https://github.com/ttnghia) +- Extract Dremel encoding code from Parquet ([#11461](https://github.com/rapidsai/cudf/pull/11461)) [@vyasr](https://github.com/vyasr) +- Add missing Thrust #includes. ([#11457](https://github.com/rapidsai/cudf/pull/11457)) [@bdice](https://github.com/bdice) +- Make CMake hooks verbose ([#11456](https://github.com/rapidsai/cudf/pull/11456)) [@vyasr](https://github.com/vyasr) +- Control Parquet page size through Python API ([#11454](https://github.com/rapidsai/cudf/pull/11454)) [@etseidl](https://github.com/etseidl) +- Add control of Parquet column index creation to python ([#11453](https://github.com/rapidsai/cudf/pull/11453)) [@etseidl](https://github.com/etseidl) +- Remove unused is_struct trait. ([#11450](https://github.com/rapidsai/cudf/pull/11450)) [@bdice](https://github.com/bdice) +- Refactor the `Buffer` class ([#11447](https://github.com/rapidsai/cudf/pull/11447)) [@madsbk](https://github.com/madsbk) +- Refactor pad_side and strip_type enums into side_type enum ([#11438](https://github.com/rapidsai/cudf/pull/11438)) [@davidwendt](https://github.com/davidwendt) +- Update to Thrust 1.17.0 ([#11437](https://github.com/rapidsai/cudf/pull/11437)) [@bdice](https://github.com/bdice) +- Add in JNI for parsing JSON data and getting the metadata back too. ([#11431](https://github.com/rapidsai/cudf/pull/11431)) [@revans2](https://github.com/revans2) +- Convert byte_array_view to use std::byte ([#11424](https://github.com/rapidsai/cudf/pull/11424)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Deprecate unflatten_nested_columns ([#11421](https://github.com/rapidsai/cudf/pull/11421)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Remove HASH_SERIAL_MURMUR3 / serial32BitMurmurHash3 ([#11383](https://github.com/rapidsai/cudf/pull/11383)) [@bdice](https://github.com/bdice) +- Add Spark list hashing Java tests ([#11379](https://github.com/rapidsai/cudf/pull/11379)) [@bdice](https://github.com/bdice) +- Move cmake to the build section. ([#11376](https://github.com/rapidsai/cudf/pull/11376)) [@vyasr](https://github.com/vyasr) +- Remove use of CUDA driver API calls from libcudf ([#11370](https://github.com/rapidsai/cudf/pull/11370)) [@shwina](https://github.com/shwina) +- Add column constructor from device_uvector&& ([#11356](https://github.com/rapidsai/cudf/pull/11356)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Remove unused custreamz thirdparty directory ([#11343](https://github.com/rapidsai/cudf/pull/11343)) [@vyasr](https://github.com/vyasr) +- Update jni version to 22.10.0-SNAPSHOT ([#11338](https://github.com/rapidsai/cudf/pull/11338)) [@pxLi](https://github.com/pxLi) +- Enable using upstream jitify2 ([#11287](https://github.com/rapidsai/cudf/pull/11287)) [@shwina](https://github.com/shwina) +- Cache cudf.Scalar ([#11246](https://github.com/rapidsai/cudf/pull/11246)) [@shwina](https://github.com/shwina) +- Remove deprecated Series.applymap. ([#11031](https://github.com/rapidsai/cudf/pull/11031)) [@bdice](https://github.com/bdice) +- Remove deprecated expand parameter from str.findall. ([#11030](https://github.com/rapidsai/cudf/pull/11030)) [@bdice](https://github.com/bdice) + +# cuDF 22.08.00 (17 Aug 2022) + +## 🚨 Breaking Changes + +- Remove legacy join APIs ([#11274](https://github.com/rapidsai/cudf/pull/11274)) [@vyasr](https://github.com/vyasr) +- Remove `lists::drop_list_duplicates` ([#11236](https://github.com/rapidsai/cudf/pull/11236)) [@ttnghia](https://github.com/ttnghia) +- Remove Index.replace API ([#11131](https://github.com/rapidsai/cudf/pull/11131)) [@vyasr](https://github.com/vyasr) +- Remove deprecated Index methods from Frame ([#11073](https://github.com/rapidsai/cudf/pull/11073)) [@vyasr](https://github.com/vyasr) +- Remove public API of cudf.merge_sorted. ([#11032](https://github.com/rapidsai/cudf/pull/11032)) [@bdice](https://github.com/bdice) +- Drop python `3.7` in code-base ([#11029](https://github.com/rapidsai/cudf/pull/11029)) [@galipremsagar](https://github.com/galipremsagar) +- Return empty dataframe when reading a Parquet file using empty `columns` option ([#11018](https://github.com/rapidsai/cudf/pull/11018)) [@vuule](https://github.com/vuule) +- Remove Arrow CUDA IPC code ([#10995](https://github.com/rapidsai/cudf/pull/10995)) [@shwina](https://github.com/shwina) +- Buffer: make `.ptr` read-only ([#10872](https://github.com/rapidsai/cudf/pull/10872)) [@madsbk](https://github.com/madsbk) + +## 🐛 Bug Fixes + +- Fix `distributed` error related to `loop_in_thread` ([#11428](https://github.com/rapidsai/cudf/pull/11428)) [@galipremsagar](https://github.com/galipremsagar) +- Relax arrow pinning to just 8.x and remove cuda build dependency from cudf recipe ([#11412](https://github.com/rapidsai/cudf/pull/11412)) [@kkraus14](https://github.com/kkraus14) +- Revert "Allow CuPy 11" ([#11409](https://github.com/rapidsai/cudf/pull/11409)) [@jakirkham](https://github.com/jakirkham) +- Fix `moto` timeouts ([#11369](https://github.com/rapidsai/cudf/pull/11369)) [@galipremsagar](https://github.com/galipremsagar) +- Set `+/-infinity` as the `identity` values for floating-point numbers in device operators `min` and `max` ([#11357](https://github.com/rapidsai/cudf/pull/11357)) [@ttnghia](https://github.com/ttnghia) +- Fix memory_usage() for `ListSeries` ([#11355](https://github.com/rapidsai/cudf/pull/11355)) [@thomcom](https://github.com/thomcom) +- Fix constructing Column from column_view with expired mask ([#11354](https://github.com/rapidsai/cudf/pull/11354)) [@shwina](https://github.com/shwina) +- Handle parquet corner case: Columns with more rows than are in the row group. ([#11353](https://github.com/rapidsai/cudf/pull/11353)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix `DatetimeIndex` & `TimedeltaIndex` constructors ([#11342](https://github.com/rapidsai/cudf/pull/11342)) [@galipremsagar](https://github.com/galipremsagar) +- Fix unsigned-compare compile warning in IntPow binops ([#11339](https://github.com/rapidsai/cudf/pull/11339)) [@davidwendt](https://github.com/davidwendt) +- Fix performance issue and add a new code path to `cudf::detail::contains` ([#11330](https://github.com/rapidsai/cudf/pull/11330)) [@ttnghia](https://github.com/ttnghia) +- Pin `pytorch` to temporarily unblock from `libcupti` errors ([#11289](https://github.com/rapidsai/cudf/pull/11289)) [@galipremsagar](https://github.com/galipremsagar) +- Workaround for nvcomp zstd overwriting blocks for orc due to underestimate of sizes ([#11288](https://github.com/rapidsai/cudf/pull/11288)) [@jbrennan333](https://github.com/jbrennan333) +- Fix inconsistency when hashing two tables in `cudf::detail::contains` ([#11284](https://github.com/rapidsai/cudf/pull/11284)) [@ttnghia](https://github.com/ttnghia) +- Fix issue related to numpy array and `category` dtype ([#11282](https://github.com/rapidsai/cudf/pull/11282)) [@galipremsagar](https://github.com/galipremsagar) +- Add NotImplementedError when on is specified in DataFrame.join. ([#11275](https://github.com/rapidsai/cudf/pull/11275)) [@vyasr](https://github.com/vyasr) +- Fix invalid allocate_like() and empty_like() tests. ([#11268](https://github.com/rapidsai/cudf/pull/11268)) [@nvdbaranec](https://github.com/nvdbaranec) +- Returns DataFrame When Concatenating Along Axis 1 ([#11263](https://github.com/rapidsai/cudf/pull/11263)) [@isVoid](https://github.com/isVoid) +- Fix compile error due to missing header ([#11257](https://github.com/rapidsai/cudf/pull/11257)) [@ttnghia](https://github.com/ttnghia) +- Fix a memory aliasing/crash issue in scatter for lists. ([#11254](https://github.com/rapidsai/cudf/pull/11254)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix `tests/rolling/empty_input_test` ([#11238](https://github.com/rapidsai/cudf/pull/11238)) [@ttnghia](https://github.com/ttnghia) +- Fix const qualifier when using `host_span<bitmask_type const*>` ([#11220](https://github.com/rapidsai/cudf/pull/11220)) [@ttnghia](https://github.com/ttnghia) +- Avoid using `nvcompBatchedDeflateDecompressGetTempSizeEx` in cuIO ([#11213](https://github.com/rapidsai/cudf/pull/11213)) [@vuule](https://github.com/vuule) +- Generate benchmark data with correct run length regardless of cardinality ([#11205](https://github.com/rapidsai/cudf/pull/11205)) [@vuule](https://github.com/vuule) +- Fix cumulative count index behavior ([#11188](https://github.com/rapidsai/cudf/pull/11188)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix assertion in dask_cudf test_struct_explode ([#11170](https://github.com/rapidsai/cudf/pull/11170)) [@rjzamora](https://github.com/rjzamora) +- Provides a method for the user to remove the hook and re-register the hook in a custom shutdown hook manager ([#11161](https://github.com/rapidsai/cudf/pull/11161)) [@res-life](https://github.com/res-life) +- Fix compatibility issues with pandas 1.4.3 ([#11152](https://github.com/rapidsai/cudf/pull/11152)) [@vyasr](https://github.com/vyasr) +- Ensure cuco export set is installed in cmake build ([#11147](https://github.com/rapidsai/cudf/pull/11147)) [@jlowe](https://github.com/jlowe) +- Avoid redundant deepcopy in `cudf.from_pandas` ([#11142](https://github.com/rapidsai/cudf/pull/11142)) [@galipremsagar](https://github.com/galipremsagar) +- Fix compile error due to missing header ([#11126](https://github.com/rapidsai/cudf/pull/11126)) [@ttnghia](https://github.com/ttnghia) +- Fix `__cuda_array_interface__` failures ([#11113](https://github.com/rapidsai/cudf/pull/11113)) [@galipremsagar](https://github.com/galipremsagar) +- Support octal and hex within regex character class pattern ([#11112](https://github.com/rapidsai/cudf/pull/11112)) [@davidwendt](https://github.com/davidwendt) +- Fix split_re matching logic for word boundaries ([#11106](https://github.com/rapidsai/cudf/pull/11106)) [@davidwendt](https://github.com/davidwendt) +- Handle multiple files metadata in `read_parquet` ([#11105](https://github.com/rapidsai/cudf/pull/11105)) [@galipremsagar](https://github.com/galipremsagar) +- Fix index alignment for Series objects with repeated index ([#11103](https://github.com/rapidsai/cudf/pull/11103)) [@shwina](https://github.com/shwina) +- FindcuFile now searches in the current CUDA Toolkit location ([#11101](https://github.com/rapidsai/cudf/pull/11101)) [@robertmaynard](https://github.com/robertmaynard) +- Fix regex word boundary logic to include underline ([#11099](https://github.com/rapidsai/cudf/pull/11099)) [@davidwendt](https://github.com/davidwendt) +- Exclude CudaFatalTest when selecting all Java tests ([#11083](https://github.com/rapidsai/cudf/pull/11083)) [@jlowe](https://github.com/jlowe) +- Fix duplicate `cudatoolkit` pinning issue ([#11070](https://github.com/rapidsai/cudf/pull/11070)) [@galipremsagar](https://github.com/galipremsagar) +- Maintain the input index in the result of a groupby-transform ([#11068](https://github.com/rapidsai/cudf/pull/11068)) [@shwina](https://github.com/shwina) +- Fix bug with row count comparison for expect_columns_equivalent(). ([#11059](https://github.com/rapidsai/cudf/pull/11059)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix BPE uninitialized size value for null and empty input strings ([#11054](https://github.com/rapidsai/cudf/pull/11054)) [@davidwendt](https://github.com/davidwendt) +- Include missing header for usage of `get_current_device_resource()` ([#11047](https://github.com/rapidsai/cudf/pull/11047)) [@AtlantaPepsi](https://github.com/AtlantaPepsi) +- Fix warn_unused_result error in parquet test ([#11026](https://github.com/rapidsai/cudf/pull/11026)) [@karthikeyann](https://github.com/karthikeyann) +- Return empty dataframe when reading a Parquet file using empty `columns` option ([#11018](https://github.com/rapidsai/cudf/pull/11018)) [@vuule](https://github.com/vuule) +- Fix small error in page row count limiting ([#10991](https://github.com/rapidsai/cudf/pull/10991)) [@etseidl](https://github.com/etseidl) +- Fix a row index entry error in ORC writer issue ([#10989](https://github.com/rapidsai/cudf/pull/10989)) [@vuule](https://github.com/vuule) +- Fix grouped covariance to require both values to be convertible to double. ([#10891](https://github.com/rapidsai/cudf/pull/10891)) [@bdice](https://github.com/bdice) + +## 📖 Documentation + +- Fix issues with day & night modes in python docs ([#11400](https://github.com/rapidsai/cudf/pull/11400)) [@galipremsagar](https://github.com/galipremsagar) +- Update missing data handling APIs in docs ([#11345](https://github.com/rapidsai/cudf/pull/11345)) [@galipremsagar](https://github.com/galipremsagar) +- Add lists filtering APIs to doxygen group. ([#11336](https://github.com/rapidsai/cudf/pull/11336)) [@bdice](https://github.com/bdice) +- Remove unused import in README sample ([#11318](https://github.com/rapidsai/cudf/pull/11318)) [@vyasr](https://github.com/vyasr) +- Note null behavior in `where` docs ([#11276](https://github.com/rapidsai/cudf/pull/11276)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Update docstring for spans in `get_row_data_range` ([#11271](https://github.com/rapidsai/cudf/pull/11271)) [@vyasr](https://github.com/vyasr) +- Update nvCOMP integration table ([#11231](https://github.com/rapidsai/cudf/pull/11231)) [@vuule](https://github.com/vuule) +- Add dev docs for documentation writing ([#11217](https://github.com/rapidsai/cudf/pull/11217)) [@vyasr](https://github.com/vyasr) +- Documentation fix for concatenate ([#11187](https://github.com/rapidsai/cudf/pull/11187)) [@dagardner-nv](https://github.com/dagardner-nv) +- Fix unresolved links in markdown ([#11173](https://github.com/rapidsai/cudf/pull/11173)) [@karthikeyann](https://github.com/karthikeyann) +- Fix cudf version in README.md install commands ([#11164](https://github.com/rapidsai/cudf/pull/11164)) [@jvanstraten](https://github.com/jvanstraten) +- Switch `language` from `None` to `"en"` in docs build ([#11133](https://github.com/rapidsai/cudf/pull/11133)) [@galipremsagar](https://github.com/galipremsagar) +- Remove docs mentioning scalar_view since no such class exists. ([#11132](https://github.com/rapidsai/cudf/pull/11132)) [@bdice](https://github.com/bdice) +- Add docstring entry for `DataFrame.value_counts` ([#11039](https://github.com/rapidsai/cudf/pull/11039)) [@galipremsagar](https://github.com/galipremsagar) +- Add docs to rolling var, std, count. ([#11035](https://github.com/rapidsai/cudf/pull/11035)) [@bdice](https://github.com/bdice) +- Fix docs for Numba UDFs. ([#11020](https://github.com/rapidsai/cudf/pull/11020)) [@bdice](https://github.com/bdice) +- Replace column comparison utilities functions with macros ([#11007](https://github.com/rapidsai/cudf/pull/11007)) [@karthikeyann](https://github.com/karthikeyann) +- Fix Doxygen warnings in multiple headers files ([#11003](https://github.com/rapidsai/cudf/pull/11003)) [@karthikeyann](https://github.com/karthikeyann) +- Fix doxygen warnings in utilities/ headers ([#10974](https://github.com/rapidsai/cudf/pull/10974)) [@karthikeyann](https://github.com/karthikeyann) +- Fix Doxygen warnings in table header files ([#10964](https://github.com/rapidsai/cudf/pull/10964)) [@karthikeyann](https://github.com/karthikeyann) +- Fix Doxygen warnings in column header files ([#10963](https://github.com/rapidsai/cudf/pull/10963)) [@karthikeyann](https://github.com/karthikeyann) +- Fix Doxygen warnings in strings / header files ([#10937](https://github.com/rapidsai/cudf/pull/10937)) [@karthikeyann](https://github.com/karthikeyann) +- Generate Doxygen Tag File for Libcudf ([#10932](https://github.com/rapidsai/cudf/pull/10932)) [@isVoid](https://github.com/isVoid) +- Fix doxygen warnings in structs, lists headers ([#10923](https://github.com/rapidsai/cudf/pull/10923)) [@karthikeyann](https://github.com/karthikeyann) +- Fix doxygen warnings in fixed_point.hpp ([#10922](https://github.com/rapidsai/cudf/pull/10922)) [@karthikeyann](https://github.com/karthikeyann) +- Fix doxygen warnings in ast/, rolling, tdigest/, wrappers/, dictionary/ headers ([#10921](https://github.com/rapidsai/cudf/pull/10921)) [@karthikeyann](https://github.com/karthikeyann) +- fix doxygen warnings in cudf/io/types.hpp, other header files ([#10913](https://github.com/rapidsai/cudf/pull/10913)) [@karthikeyann](https://github.com/karthikeyann) +- fix doxygen warnings in cudf/io/ avro, csv, json, orc, parquet header files ([#10912](https://github.com/rapidsai/cudf/pull/10912)) [@karthikeyann](https://github.com/karthikeyann) +- Fix doxygen warnings in cudf/*.hpp ([#10896](https://github.com/rapidsai/cudf/pull/10896)) [@karthikeyann](https://github.com/karthikeyann) +- Add missing documentation in aggregation.hpp ([#10887](https://github.com/rapidsai/cudf/pull/10887)) [@karthikeyann](https://github.com/karthikeyann) +- Revise PR template. ([#10774](https://github.com/rapidsai/cudf/pull/10774)) [@bdice](https://github.com/bdice) + +## 🚀 New Features + +- Change cmake to allow controlling Arrow version via cmake variable ([#11429](https://github.com/rapidsai/cudf/pull/11429)) [@kkraus14](https://github.com/kkraus14) +- Adding support for list<int8> columns to be written as byte arrays in parquet ([#11328](https://github.com/rapidsai/cudf/pull/11328)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Adding byte array view structure ([#11322](https://github.com/rapidsai/cudf/pull/11322)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Adding byte_array statistics ([#11303](https://github.com/rapidsai/cudf/pull/11303)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add column indexes to Parquet writer ([#11302](https://github.com/rapidsai/cudf/pull/11302)) [@etseidl](https://github.com/etseidl) +- Provide an Option for Default Integer and Floating Bitwidth ([#11272](https://github.com/rapidsai/cudf/pull/11272)) [@isVoid](https://github.com/isVoid) +- FST benchmark ([#11243](https://github.com/rapidsai/cudf/pull/11243)) [@karthikeyann](https://github.com/karthikeyann) +- Adds the Finite-State Transducer algorithm ([#11242](https://github.com/rapidsai/cudf/pull/11242)) [@elstehle](https://github.com/elstehle) +- Refactor `collect_set` to use `cudf::distinct` and `cudf::lists::distinct` ([#11228](https://github.com/rapidsai/cudf/pull/11228)) [@ttnghia](https://github.com/ttnghia) +- Treat zstd as stable in nvcomp releases 2.3.2 and later ([#11226](https://github.com/rapidsai/cudf/pull/11226)) [@jbrennan333](https://github.com/jbrennan333) +- Add 24 bit dictionary support to Parquet writer ([#11216](https://github.com/rapidsai/cudf/pull/11216)) [@devavret](https://github.com/devavret) +- Enable positive group indices for extractAllRecord on JNI ([#11215](https://github.com/rapidsai/cudf/pull/11215)) [@anthony-chang](https://github.com/anthony-chang) +- JNI bindings for NTH_ELEMENT window aggregation ([#11201](https://github.com/rapidsai/cudf/pull/11201)) [@mythrocks](https://github.com/mythrocks) +- Add JNI bindings for extractAllRecord ([#11196](https://github.com/rapidsai/cudf/pull/11196)) [@anthony-chang](https://github.com/anthony-chang) +- Add `cudf.options` ([#11193](https://github.com/rapidsai/cudf/pull/11193)) [@isVoid](https://github.com/isVoid) +- Add thrift support for parquet column and offset indexes ([#11178](https://github.com/rapidsai/cudf/pull/11178)) [@etseidl](https://github.com/etseidl) +- Adding binary read/write as options for parquet ([#11160](https://github.com/rapidsai/cudf/pull/11160)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Support `nth_element` for window functions ([#11158](https://github.com/rapidsai/cudf/pull/11158)) [@mythrocks](https://github.com/mythrocks) +- Implement `lists::distinct` and `cudf::detail::stable_distinct` ([#11149](https://github.com/rapidsai/cudf/pull/11149)) [@ttnghia](https://github.com/ttnghia) +- Implement Groupby pct_change ([#11144](https://github.com/rapidsai/cudf/pull/11144)) [@skirui-source](https://github.com/skirui-source) +- Add JNI for set operations ([#11143](https://github.com/rapidsai/cudf/pull/11143)) [@ttnghia](https://github.com/ttnghia) +- Remove deprecated PER_THREAD_DEFAULT_STREAM ([#11134](https://github.com/rapidsai/cudf/pull/11134)) [@jbrennan333](https://github.com/jbrennan333) +- Added a Java method to check the existence of a list of keys in a map ([#11128](https://github.com/rapidsai/cudf/pull/11128)) [@razajafri](https://github.com/razajafri) +- Feature/python benchmarking ([#11125](https://github.com/rapidsai/cudf/pull/11125)) [@vyasr](https://github.com/vyasr) +- Support `nan_equality` in `cudf::distinct` ([#11118](https://github.com/rapidsai/cudf/pull/11118)) [@ttnghia](https://github.com/ttnghia) +- Added JNI for getMapValueForKeys ([#11104](https://github.com/rapidsai/cudf/pull/11104)) [@razajafri](https://github.com/razajafri) +- Refactor `semi_anti_join` ([#11100](https://github.com/rapidsai/cudf/pull/11100)) [@ttnghia](https://github.com/ttnghia) +- Replace remaining instances of rmm::cuda_stream_default with cudf::default_stream_value ([#11082](https://github.com/rapidsai/cudf/pull/11082)) [@jbrennan333](https://github.com/jbrennan333) +- Adds the Logical Stack algorithm ([#11078](https://github.com/rapidsai/cudf/pull/11078)) [@elstehle](https://github.com/elstehle) +- Add doxygen-check pre-commit hook ([#11076](https://github.com/rapidsai/cudf/pull/11076)) [@karthikeyann](https://github.com/karthikeyann) +- Use new nvCOMP API to optimize the decompression temp memory size ([#11064](https://github.com/rapidsai/cudf/pull/11064)) [@vuule](https://github.com/vuule) +- Add Doxygen CI check ([#11057](https://github.com/rapidsai/cudf/pull/11057)) [@karthikeyann](https://github.com/karthikeyann) +- Support `duplicate_keep_option` in `cudf::distinct` ([#11052](https://github.com/rapidsai/cudf/pull/11052)) [@ttnghia](https://github.com/ttnghia) +- Support set operations ([#11043](https://github.com/rapidsai/cudf/pull/11043)) [@ttnghia](https://github.com/ttnghia) +- Support for ZLIB compression in ORC writer ([#11036](https://github.com/rapidsai/cudf/pull/11036)) [@vuule](https://github.com/vuule) +- Adding feature swaplevels ([#11027](https://github.com/rapidsai/cudf/pull/11027)) [@VamsiTallam95](https://github.com/VamsiTallam95) +- Use nvCOMP for ZLIB decompression in ORC reader ([#11024](https://github.com/rapidsai/cudf/pull/11024)) [@vuule](https://github.com/vuule) +- Function for bfill, ffill #9591 ([#11022](https://github.com/rapidsai/cudf/pull/11022)) [@Sreekiran096](https://github.com/Sreekiran096) +- Generate group offsets from element labels ([#11017](https://github.com/rapidsai/cudf/pull/11017)) [@ttnghia](https://github.com/ttnghia) +- Feature axes ([#10979](https://github.com/rapidsai/cudf/pull/10979)) [@VamsiTallam95](https://github.com/VamsiTallam95) +- Generate group labels from offsets ([#10945](https://github.com/rapidsai/cudf/pull/10945)) [@ttnghia](https://github.com/ttnghia) +- Add missing cuIO benchmark coverage for duration types ([#10933](https://github.com/rapidsai/cudf/pull/10933)) [@vuule](https://github.com/vuule) +- Dask-cuDF cumulative groupby ops ([#10889](https://github.com/rapidsai/cudf/pull/10889)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Reindex Improvements ([#10815](https://github.com/rapidsai/cudf/pull/10815)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Implement value_counts for DataFrame ([#10813](https://github.com/rapidsai/cudf/pull/10813)) [@martinfalisse](https://github.com/martinfalisse) + +## 🛠️ Improvements + +- Pin `dask` & `distributed` for release ([#11433](https://github.com/rapidsai/cudf/pull/11433)) [@galipremsagar](https://github.com/galipremsagar) +- Use documented header template for `doxygen` ([#11430](https://github.com/rapidsai/cudf/pull/11430)) [@galipremsagar](https://github.com/galipremsagar) +- Relax arrow version in dev env ([#11418](https://github.com/rapidsai/cudf/pull/11418)) [@galipremsagar](https://github.com/galipremsagar) +- Allow CuPy 11 ([#11393](https://github.com/rapidsai/cudf/pull/11393)) [@jakirkham](https://github.com/jakirkham) +- Improve multibyte_split performance ([#11347](https://github.com/rapidsai/cudf/pull/11347)) [@cwharris](https://github.com/cwharris) +- Switch death test to use explicit trap. ([#11326](https://github.com/rapidsai/cudf/pull/11326)) [@vyasr](https://github.com/vyasr) +- Add --output-on-failure to ctest args. ([#11321](https://github.com/rapidsai/cudf/pull/11321)) [@vyasr](https://github.com/vyasr) +- Consolidate remaining DataFrame/Series APIs ([#11315](https://github.com/rapidsai/cudf/pull/11315)) [@vyasr](https://github.com/vyasr) +- Add JNI support for the join_strings API ([#11309](https://github.com/rapidsai/cudf/pull/11309)) [@revans2](https://github.com/revans2) +- Add cupy version to setup.py install_requires ([#11306](https://github.com/rapidsai/cudf/pull/11306)) [@vyasr](https://github.com/vyasr) +- removing some unused code ([#11305](https://github.com/rapidsai/cudf/pull/11305)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add test of wildcard selection ([#11300](https://github.com/rapidsai/cudf/pull/11300)) [@vyasr](https://github.com/vyasr) +- Update parquet reader to take stream parameter ([#11294](https://github.com/rapidsai/cudf/pull/11294)) [@PointKernel](https://github.com/PointKernel) +- Spark list hashing ([#11292](https://github.com/rapidsai/cudf/pull/11292)) [@bdice](https://github.com/bdice) +- Remove legacy join APIs ([#11274](https://github.com/rapidsai/cudf/pull/11274)) [@vyasr](https://github.com/vyasr) +- Fix `cudf` recipes syntax ([#11273](https://github.com/rapidsai/cudf/pull/11273)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix `cudf` recipe ([#11267](https://github.com/rapidsai/cudf/pull/11267)) [@ajschmidt8](https://github.com/ajschmidt8) +- Cleanup config files ([#11266](https://github.com/rapidsai/cudf/pull/11266)) [@vyasr](https://github.com/vyasr) +- Run mypy on all packages ([#11265](https://github.com/rapidsai/cudf/pull/11265)) [@vyasr](https://github.com/vyasr) +- Update to isort 5.10.1. ([#11262](https://github.com/rapidsai/cudf/pull/11262)) [@vyasr](https://github.com/vyasr) +- Consolidate flake8 and pydocstyle configuration ([#11260](https://github.com/rapidsai/cudf/pull/11260)) [@vyasr](https://github.com/vyasr) +- Remove redundant black config specifications. ([#11258](https://github.com/rapidsai/cudf/pull/11258)) [@vyasr](https://github.com/vyasr) +- Ensure DeprecationWarnings are not introduced via pre-commit ([#11255](https://github.com/rapidsai/cudf/pull/11255)) [@wence-](https://github.com/wence-) +- Optimization to gpu::PreprocessColumnData in parquet reader. ([#11252](https://github.com/rapidsai/cudf/pull/11252)) [@nvdbaranec](https://github.com/nvdbaranec) +- Move rolling impl details to detail/ directory. ([#11250](https://github.com/rapidsai/cudf/pull/11250)) [@mythrocks](https://github.com/mythrocks) +- Remove `lists::drop_list_duplicates` ([#11236](https://github.com/rapidsai/cudf/pull/11236)) [@ttnghia](https://github.com/ttnghia) +- Use `cudf::lists::distinct` in Python binding ([#11234](https://github.com/rapidsai/cudf/pull/11234)) [@ttnghia](https://github.com/ttnghia) +- Use `cudf::lists::distinct` in Java binding ([#11233](https://github.com/rapidsai/cudf/pull/11233)) [@ttnghia](https://github.com/ttnghia) +- Use `cudf::distinct` in Java binding ([#11232](https://github.com/rapidsai/cudf/pull/11232)) [@ttnghia](https://github.com/ttnghia) +- Pin `dask-cuda` in dev environment ([#11229](https://github.com/rapidsai/cudf/pull/11229)) [@galipremsagar](https://github.com/galipremsagar) +- Remove cruft in map_lookup ([#11221](https://github.com/rapidsai/cudf/pull/11221)) [@mythrocks](https://github.com/mythrocks) +- Deprecate `skiprows` & `num_rows` in parquet reader ([#11218](https://github.com/rapidsai/cudf/pull/11218)) [@galipremsagar](https://github.com/galipremsagar) +- Remove Frame._index ([#11210](https://github.com/rapidsai/cudf/pull/11210)) [@vyasr](https://github.com/vyasr) +- Improve performance for `cudf::contains` when searching for a scalar ([#11202](https://github.com/rapidsai/cudf/pull/11202)) [@ttnghia](https://github.com/ttnghia) +- Document why Development component is needing for CMake. ([#11200](https://github.com/rapidsai/cudf/pull/11200)) [@vyasr](https://github.com/vyasr) +- cleanup unused code in rolling_test.hpp ([#11195](https://github.com/rapidsai/cudf/pull/11195)) [@karthikeyann](https://github.com/karthikeyann) +- Standardize join internals around DataFrame ([#11184](https://github.com/rapidsai/cudf/pull/11184)) [@vyasr](https://github.com/vyasr) +- Move character case table declarations from src to detail ([#11183](https://github.com/rapidsai/cudf/pull/11183)) [@davidwendt](https://github.com/davidwendt) +- Remove usage of Frame in StringMethods ([#11181](https://github.com/rapidsai/cudf/pull/11181)) [@vyasr](https://github.com/vyasr) +- Expose get_json_object_options to Python ([#11180](https://github.com/rapidsai/cudf/pull/11180)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Fix decimal128 stats in parquet writer ([#11179](https://github.com/rapidsai/cudf/pull/11179)) [@etseidl](https://github.com/etseidl) +- Modify CheckPageRows in parquet_test to use datasources ([#11177](https://github.com/rapidsai/cudf/pull/11177)) [@etseidl](https://github.com/etseidl) +- Pin max version of `cuda-python` to `11.7.0` ([#11174](https://github.com/rapidsai/cudf/pull/11174)) [@Ethyling](https://github.com/Ethyling) +- Refactor and optimize Frame.where ([#11168](https://github.com/rapidsai/cudf/pull/11168)) [@vyasr](https://github.com/vyasr) +- Add npos const static member to cudf::string_view ([#11166](https://github.com/rapidsai/cudf/pull/11166)) [@davidwendt](https://github.com/davidwendt) +- Move _drop_rows_by_label from Frame to IndexedFrame ([#11157](https://github.com/rapidsai/cudf/pull/11157)) [@vyasr](https://github.com/vyasr) +- Clean up _copy_type_metadata ([#11156](https://github.com/rapidsai/cudf/pull/11156)) [@vyasr](https://github.com/vyasr) +- Add `nvcc` conda package in dev environment ([#11154](https://github.com/rapidsai/cudf/pull/11154)) [@galipremsagar](https://github.com/galipremsagar) +- Struct binary comparison op functionality for spark rapids ([#11153](https://github.com/rapidsai/cudf/pull/11153)) [@rwlee](https://github.com/rwlee) +- Refactor inline conditionals. ([#11151](https://github.com/rapidsai/cudf/pull/11151)) [@bdice](https://github.com/bdice) +- Refactor Spark hashing tests ([#11145](https://github.com/rapidsai/cudf/pull/11145)) [@bdice](https://github.com/bdice) +- Add new `_from_data_like_self` factory ([#11140](https://github.com/rapidsai/cudf/pull/11140)) [@vyasr](https://github.com/vyasr) +- Update get_cucollections to use rapids-cmake ([#11139](https://github.com/rapidsai/cudf/pull/11139)) [@vyasr](https://github.com/vyasr) +- Remove unnecessary extra function for libcudacxx detection ([#11138](https://github.com/rapidsai/cudf/pull/11138)) [@vyasr](https://github.com/vyasr) +- Allow initial value for cudf::reduce and cudf::segmented_reduce. ([#11137](https://github.com/rapidsai/cudf/pull/11137)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Remove Index.replace API ([#11131](https://github.com/rapidsai/cudf/pull/11131)) [@vyasr](https://github.com/vyasr) +- Move char-type table function declarations from src to detail ([#11127](https://github.com/rapidsai/cudf/pull/11127)) [@davidwendt](https://github.com/davidwendt) +- Clean up repo root ([#11124](https://github.com/rapidsai/cudf/pull/11124)) [@bdice](https://github.com/bdice) +- Improve print formatting of strings containing newline characters. ([#11108](https://github.com/rapidsai/cudf/pull/11108)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix cudf::string_view::find() to return pos for empty string argument ([#11107](https://github.com/rapidsai/cudf/pull/11107)) [@davidwendt](https://github.com/davidwendt) +- Forward-merge branch-22.06 to branch-22.08 ([#11086](https://github.com/rapidsai/cudf/pull/11086)) [@bdice](https://github.com/bdice) +- Take iterators by value in clamp.cu. ([#11084](https://github.com/rapidsai/cudf/pull/11084)) [@bdice](https://github.com/bdice) +- Performance improvements for row to column conversions ([#11075](https://github.com/rapidsai/cudf/pull/11075)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Remove deprecated Index methods from Frame ([#11073](https://github.com/rapidsai/cudf/pull/11073)) [@vyasr](https://github.com/vyasr) +- Use per-page max compressed size estimate for compression ([#11066](https://github.com/rapidsai/cudf/pull/11066)) [@devavret](https://github.com/devavret) +- column to row refactor for performance ([#11063](https://github.com/rapidsai/cudf/pull/11063)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Include `skbuild` directory into `build.sh` `clean` operation ([#11060](https://github.com/rapidsai/cudf/pull/11060)) [@galipremsagar](https://github.com/galipremsagar) +- Unpin `dask` & `distributed` for development ([#11058](https://github.com/rapidsai/cudf/pull/11058)) [@galipremsagar](https://github.com/galipremsagar) +- Add support for `Series.between` ([#11051](https://github.com/rapidsai/cudf/pull/11051)) [@galipremsagar](https://github.com/galipremsagar) +- Fix groupby include ([#11046](https://github.com/rapidsai/cudf/pull/11046)) [@bwyogatama](https://github.com/bwyogatama) +- Regex cleanup internal reclass and reclass_device classes ([#11045](https://github.com/rapidsai/cudf/pull/11045)) [@davidwendt](https://github.com/davidwendt) +- Remove public API of cudf.merge_sorted. ([#11032](https://github.com/rapidsai/cudf/pull/11032)) [@bdice](https://github.com/bdice) +- Drop python `3.7` in code-base ([#11029](https://github.com/rapidsai/cudf/pull/11029)) [@galipremsagar](https://github.com/galipremsagar) +- Addition & integration of the integer power operator ([#11025](https://github.com/rapidsai/cudf/pull/11025)) [@AtlantaPepsi](https://github.com/AtlantaPepsi) +- Refactor `lists::contains` ([#11019](https://github.com/rapidsai/cudf/pull/11019)) [@ttnghia](https://github.com/ttnghia) +- Change build.sh to find C++ library by default and avoid shadowing CMAKE_ARGS ([#11013](https://github.com/rapidsai/cudf/pull/11013)) [@vyasr](https://github.com/vyasr) +- Clean up parquet unit test ([#11005](https://github.com/rapidsai/cudf/pull/11005)) [@PointKernel](https://github.com/PointKernel) +- Add missing #pragma once to header files ([#11004](https://github.com/rapidsai/cudf/pull/11004)) [@karthikeyann](https://github.com/karthikeyann) +- Cleanup `iterator.cuh` and add fixed point support for `scalar_optional_accessor` ([#10999](https://github.com/rapidsai/cudf/pull/10999)) [@ttnghia](https://github.com/ttnghia) +- Refactor `cudf::contains` ([#10997](https://github.com/rapidsai/cudf/pull/10997)) [@ttnghia](https://github.com/ttnghia) +- Remove Arrow CUDA IPC code ([#10995](https://github.com/rapidsai/cudf/pull/10995)) [@shwina](https://github.com/shwina) +- Change file extension for groupby benchmark ([#10985](https://github.com/rapidsai/cudf/pull/10985)) [@ttnghia](https://github.com/ttnghia) +- Sort recipe include checks. ([#10984](https://github.com/rapidsai/cudf/pull/10984)) [@bdice](https://github.com/bdice) +- Update cuCollections for thrust upgrade ([#10983](https://github.com/rapidsai/cudf/pull/10983)) [@PointKernel](https://github.com/PointKernel) +- Expose row-group size options in cudf ParquetWriter ([#10980](https://github.com/rapidsai/cudf/pull/10980)) [@rjzamora](https://github.com/rjzamora) +- Cleanup cudf::strings::detail::regex_parser class source ([#10975](https://github.com/rapidsai/cudf/pull/10975)) [@davidwendt](https://github.com/davidwendt) +- Handle missing fields as nulls in get_json_object() ([#10970](https://github.com/rapidsai/cudf/pull/10970)) [@SrikarVanavasam](https://github.com/SrikarVanavasam) +- Fix license families to match all-caps expected by conda-verify. ([#10931](https://github.com/rapidsai/cudf/pull/10931)) [@bdice](https://github.com/bdice) +- Include <optional> for GCC 11 compatibility. ([#10927](https://github.com/rapidsai/cudf/pull/10927)) [@bdice](https://github.com/bdice) +- Enable builds with scikit-build ([#10919](https://github.com/rapidsai/cudf/pull/10919)) [@vyasr](https://github.com/vyasr) +- Improve `distinct` by using `cuco::static_map::retrieve_all` ([#10916](https://github.com/rapidsai/cudf/pull/10916)) [@PointKernel](https://github.com/PointKernel) +- update cudfjni to 22.08.0-SNAPSHOT ([#10910](https://github.com/rapidsai/cudf/pull/10910)) [@pxLi](https://github.com/pxLi) +- Improve the capture of fatal cuda error ([#10884](https://github.com/rapidsai/cudf/pull/10884)) [@sperlingxx](https://github.com/sperlingxx) +- Cleanup regex compiler operators and operands source ([#10879](https://github.com/rapidsai/cudf/pull/10879)) [@davidwendt](https://github.com/davidwendt) +- Buffer: make `.ptr` read-only ([#10872](https://github.com/rapidsai/cudf/pull/10872)) [@madsbk](https://github.com/madsbk) +- Configurable NaN handling in device_row_comparators ([#10870](https://github.com/rapidsai/cudf/pull/10870)) [@rwlee](https://github.com/rwlee) +- Register `cudf.core.groupby.Grouper` objects to dask `grouper_dispatch` ([#10838](https://github.com/rapidsai/cudf/pull/10838)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Upgrade to `arrow-8` ([#10816](https://github.com/rapidsai/cudf/pull/10816)) [@galipremsagar](https://github.com/galipremsagar) +- Remove _getattr_ method in RangeIndex class ([#10538](https://github.com/rapidsai/cudf/pull/10538)) [@skirui-source](https://github.com/skirui-source) +- Adding bins to value counts ([#8247](https://github.com/rapidsai/cudf/pull/8247)) [@marlenezw](https://github.com/marlenezw) + +# cuDF 22.06.00 (7 Jun 2022) + +## 🚨 Breaking Changes + +- Enable Zstandard decompression only when all nvcomp integrations are enabled ([#10944](https://github.com/rapidsai/cudf/pull/10944)) [@vuule](https://github.com/vuule) +- Rename `sliced_child` to `get_sliced_child`. ([#10885](https://github.com/rapidsai/cudf/pull/10885)) [@bdice](https://github.com/bdice) +- Add parameters to control page size in Parquet writer ([#10882](https://github.com/rapidsai/cudf/pull/10882)) [@etseidl](https://github.com/etseidl) +- Make cudf::test::expect_columns_equal() to fail when comparing unsanitary lists. ([#10880](https://github.com/rapidsai/cudf/pull/10880)) [@nvdbaranec](https://github.com/nvdbaranec) +- Cleanup regex compiler fixed quantifiers source ([#10843](https://github.com/rapidsai/cudf/pull/10843)) [@davidwendt](https://github.com/davidwendt) +- Refactor `cudf::contains`, renaming and switching parameters role ([#10802](https://github.com/rapidsai/cudf/pull/10802)) [@ttnghia](https://github.com/ttnghia) +- Generic serialization of all column types ([#10784](https://github.com/rapidsai/cudf/pull/10784)) [@wence-](https://github.com/wence-) +- Return per-file metadata from readers ([#10782](https://github.com/rapidsai/cudf/pull/10782)) [@vuule](https://github.com/vuule) +- HostColumnVectoreCore#isNull should return true for out-of-range rows ([#10779](https://github.com/rapidsai/cudf/pull/10779)) [@gerashegalov](https://github.com/gerashegalov) +- Update `groupby::hash` to use new row operators for keys ([#10770](https://github.com/rapidsai/cudf/pull/10770)) [@PointKernel](https://github.com/PointKernel) +- update mangle_dupe_cols behavior in csv reader to match pandas 1.4.0 behavior ([#10749](https://github.com/rapidsai/cudf/pull/10749)) [@karthikeyann](https://github.com/karthikeyann) +- Rename CUDA_TRY macro to CUDF_CUDA_TRY, rename CHECK_CUDA macro to CUDF_CHECK_CUDA. ([#10589](https://github.com/rapidsai/cudf/pull/10589)) [@bdice](https://github.com/bdice) +- Upgrade `cudf` to support `pandas` 1.4.x versions ([#10584](https://github.com/rapidsai/cudf/pull/10584)) [@galipremsagar](https://github.com/galipremsagar) +- Move binop methods from Frame to IndexedFrame and standardize the docstring ([#10576](https://github.com/rapidsai/cudf/pull/10576)) [@vyasr](https://github.com/vyasr) +- Add default= kwarg to .list.get() accessor method ([#10547](https://github.com/rapidsai/cudf/pull/10547)) [@shwina](https://github.com/shwina) +- Remove deprecated `decimal_cols_as_float` in the ORC reader ([#10515](https://github.com/rapidsai/cudf/pull/10515)) [@vuule](https://github.com/vuule) +- Support nvComp 2.3 if local, otherwise use nvcomp 2.2 ([#10513](https://github.com/rapidsai/cudf/pull/10513)) [@robertmaynard](https://github.com/robertmaynard) +- Fix findall_record to return empty list for no matches ([#10491](https://github.com/rapidsai/cudf/pull/10491)) [@davidwendt](https://github.com/davidwendt) +- Namespace/Docstring Fixes for Reduction ([#10471](https://github.com/rapidsai/cudf/pull/10471)) [@isVoid](https://github.com/isVoid) +- Additional refactoring of hash functions ([#10462](https://github.com/rapidsai/cudf/pull/10462)) [@bdice](https://github.com/bdice) +- Fix default value of str.split expand parameter. ([#10457](https://github.com/rapidsai/cudf/pull/10457)) [@bdice](https://github.com/bdice) +- Remove deprecated code. ([#10450](https://github.com/rapidsai/cudf/pull/10450)) [@vyasr](https://github.com/vyasr) + +## 🐛 Bug Fixes + +- Fix single column `MultiIndex` issue in `sort_index` ([#10957](https://github.com/rapidsai/cudf/pull/10957)) [@galipremsagar](https://github.com/galipremsagar) +- Make SerializedTableHeader(numRows) public ([#10949](https://github.com/rapidsai/cudf/pull/10949)) [@gerashegalov](https://github.com/gerashegalov) +- Fix `gcc_linux` version pinning in dev environment ([#10943](https://github.com/rapidsai/cudf/pull/10943)) [@galipremsagar](https://github.com/galipremsagar) +- Fix an issue with reading raw string in `cudf.read_json` ([#10924](https://github.com/rapidsai/cudf/pull/10924)) [@galipremsagar](https://github.com/galipremsagar) +- Make cudf::test::expect_columns_equal() to fail when comparing unsanitary lists. ([#10880](https://github.com/rapidsai/cudf/pull/10880)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix segmented_reduce on empty column with non-empty offsets ([#10876](https://github.com/rapidsai/cudf/pull/10876)) [@davidwendt](https://github.com/davidwendt) +- Fix dask-cudf groupby handling when grouping by all columns ([#10866](https://github.com/rapidsai/cudf/pull/10866)) [@charlesbluca](https://github.com/charlesbluca) +- Fix a bug in `distinct`: using nested nulls logic ([#10848](https://github.com/rapidsai/cudf/pull/10848)) [@PointKernel](https://github.com/PointKernel) +- Fix constness / references in weak ordering operator() signatures. ([#10846](https://github.com/rapidsai/cudf/pull/10846)) [@bdice](https://github.com/bdice) +- Suppress sizeof-array-div warnings in thrust found by gcc-11 ([#10840](https://github.com/rapidsai/cudf/pull/10840)) [@robertmaynard](https://github.com/robertmaynard) +- Add handling for string by-columns in dask-cudf groupby ([#10830](https://github.com/rapidsai/cudf/pull/10830)) [@charlesbluca](https://github.com/charlesbluca) +- Fix compile warning in search.cu ([#10827](https://github.com/rapidsai/cudf/pull/10827)) [@davidwendt](https://github.com/davidwendt) +- Fix element access const correctness in `hostdevice_vector` ([#10804](https://github.com/rapidsai/cudf/pull/10804)) [@vuule](https://github.com/vuule) +- Update `cuco` git tag ([#10788](https://github.com/rapidsai/cudf/pull/10788)) [@PointKernel](https://github.com/PointKernel) +- HostColumnVectoreCore#isNull should return true for out-of-range rows ([#10779](https://github.com/rapidsai/cudf/pull/10779)) [@gerashegalov](https://github.com/gerashegalov) +- Fixing deprecation warnings in test_orc.py ([#10772](https://github.com/rapidsai/cudf/pull/10772)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Enable writing to `s3` storage in chunked parquet writer ([#10769](https://github.com/rapidsai/cudf/pull/10769)) [@galipremsagar](https://github.com/galipremsagar) +- Fix construction of nested structs with EMPTY child ([#10761](https://github.com/rapidsai/cudf/pull/10761)) [@shwina](https://github.com/shwina) +- Fix replace error when regex has only zero match quantifiers ([#10760](https://github.com/rapidsai/cudf/pull/10760)) [@davidwendt](https://github.com/davidwendt) +- Fix an issue with one_level_list schemas in parquet reader. ([#10750](https://github.com/rapidsai/cudf/pull/10750)) [@nvdbaranec](https://github.com/nvdbaranec) +- update mangle_dupe_cols behavior in csv reader to match pandas 1.4.0 behavior ([#10749](https://github.com/rapidsai/cudf/pull/10749)) [@karthikeyann](https://github.com/karthikeyann) +- Fix `cupy` function in notebook ([#10737](https://github.com/rapidsai/cudf/pull/10737)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix `fillna` to retain `columns` when it is `MultiIndex` ([#10729](https://github.com/rapidsai/cudf/pull/10729)) [@galipremsagar](https://github.com/galipremsagar) +- Fix scatter for all-empty-string column case ([#10724](https://github.com/rapidsai/cudf/pull/10724)) [@davidwendt](https://github.com/davidwendt) +- Retain series name in `Series.apply` ([#10716](https://github.com/rapidsai/cudf/pull/10716)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Correct build dir `cudf-config` dependency issues for static builds ([#10704](https://github.com/rapidsai/cudf/pull/10704)) [@robertmaynard](https://github.com/robertmaynard) +- Fix list of testing requirements in setup.py. ([#10678](https://github.com/rapidsai/cudf/pull/10678)) [@bdice](https://github.com/bdice) +- Fix rounding to zero error in stod on very small float numbers ([#10672](https://github.com/rapidsai/cudf/pull/10672)) [@davidwendt](https://github.com/davidwendt) +- cuco isn't a cudf dependency when we are built shared ([#10662](https://github.com/rapidsai/cudf/pull/10662)) [@robertmaynard](https://github.com/robertmaynard) +- Fix to_timestamps to support Z for %z format specifier ([#10617](https://github.com/rapidsai/cudf/pull/10617)) [@davidwendt](https://github.com/davidwendt) +- Verify compression type in Parquet reader ([#10610](https://github.com/rapidsai/cudf/pull/10610)) [@vuule](https://github.com/vuule) +- Fix struct row comparator's exception on empty structs ([#10604](https://github.com/rapidsai/cudf/pull/10604)) [@sperlingxx](https://github.com/sperlingxx) +- Fix strings strip() to accept only str Scalar for to_strip parameter ([#10597](https://github.com/rapidsai/cudf/pull/10597)) [@davidwendt](https://github.com/davidwendt) +- Fix has_atomic_support check in can_use_hash_groupby() ([#10588](https://github.com/rapidsai/cudf/pull/10588)) [@jbrennan333](https://github.com/jbrennan333) +- Revert Thrust 1.16 to Thrust 1.15 ([#10586](https://github.com/rapidsai/cudf/pull/10586)) [@bdice](https://github.com/bdice) +- Fix missing RMM_STATIC_CUDART define when compiling JNI with static CUDA runtime ([#10585](https://github.com/rapidsai/cudf/pull/10585)) [@jlowe](https://github.com/jlowe) +- pin more cmake versions ([#10570](https://github.com/rapidsai/cudf/pull/10570)) [@robertmaynard](https://github.com/robertmaynard) +- Re-enable Build Metrics Report ([#10562](https://github.com/rapidsai/cudf/pull/10562)) [@davidwendt](https://github.com/davidwendt) +- Remove statically linked CUDA runtime check in Java build ([#10532](https://github.com/rapidsai/cudf/pull/10532)) [@jlowe](https://github.com/jlowe) +- Fix temp data cleanup in `test_text.py` ([#10524](https://github.com/rapidsai/cudf/pull/10524)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Update pre-commit to run black 22.3.0 ([#10523](https://github.com/rapidsai/cudf/pull/10523)) [@vyasr](https://github.com/vyasr) +- Remove deprecated `decimal_cols_as_float` in the ORC reader ([#10515](https://github.com/rapidsai/cudf/pull/10515)) [@vuule](https://github.com/vuule) +- Fix findall_record to return empty list for no matches ([#10491](https://github.com/rapidsai/cudf/pull/10491)) [@davidwendt](https://github.com/davidwendt) +- Allow users to specify data types for a subset of columns in `read_csv` ([#10484](https://github.com/rapidsai/cudf/pull/10484)) [@vuule](https://github.com/vuule) +- Fix default value of str.split expand parameter. ([#10457](https://github.com/rapidsai/cudf/pull/10457)) [@bdice](https://github.com/bdice) +- Improve coverage of dask-cudf's groupby aggregation, add tests for `dropna` support ([#10449](https://github.com/rapidsai/cudf/pull/10449)) [@charlesbluca](https://github.com/charlesbluca) +- Allow string aggs for `dask_cudf.CudfDataFrameGroupBy.aggregate` ([#10222](https://github.com/rapidsai/cudf/pull/10222)) [@charlesbluca](https://github.com/charlesbluca) +- In-place updates with loc or iloc don't work correctly when the LHS has more than one column ([#9918](https://github.com/rapidsai/cudf/pull/9918)) [@skirui-source](https://github.com/skirui-source) + +## 📖 Documentation + +- Clarify append deprecation notice. ([#10930](https://github.com/rapidsai/cudf/pull/10930)) [@bdice](https://github.com/bdice) +- Use full name of GPUDirect Storage SDK in docs ([#10904](https://github.com/rapidsai/cudf/pull/10904)) [@vuule](https://github.com/vuule) +- Update Dask + Pandas to Dask + cuDF path ([#10897](https://github.com/rapidsai/cudf/pull/10897)) [@miguelusque](https://github.com/miguelusque) +- Add missing documentation in cudf/types.hpp ([#10895](https://github.com/rapidsai/cudf/pull/10895)) [@karthikeyann](https://github.com/karthikeyann) +- Add strong index iterator docs. ([#10888](https://github.com/rapidsai/cudf/pull/10888)) [@bdice](https://github.com/bdice) +- spell check fixes ([#10865](https://github.com/rapidsai/cudf/pull/10865)) [@karthikeyann](https://github.com/karthikeyann) +- Add missing documentation in scalar/ headers ([#10861](https://github.com/rapidsai/cudf/pull/10861)) [@karthikeyann](https://github.com/karthikeyann) +- Remove typo in ngram documentation ([#10859](https://github.com/rapidsai/cudf/pull/10859)) [@miguelusque](https://github.com/miguelusque) +- fix doxygen warnings ([#10842](https://github.com/rapidsai/cudf/pull/10842)) [@karthikeyann](https://github.com/karthikeyann) +- Add a library_design.md file documenting the core Python data structures and their relationship ([#10817](https://github.com/rapidsai/cudf/pull/10817)) [@vyasr](https://github.com/vyasr) +- Add NumPy to intersphinx references. ([#10809](https://github.com/rapidsai/cudf/pull/10809)) [@bdice](https://github.com/bdice) +- Add a section to the docs that compares cuDF with Pandas ([#10796](https://github.com/rapidsai/cudf/pull/10796)) [@shwina](https://github.com/shwina) +- Mention 2 cpp-reviewer requirement in pull request template ([#10768](https://github.com/rapidsai/cudf/pull/10768)) [@davidwendt](https://github.com/davidwendt) +- Enable pydocstyle for all packages. ([#10759](https://github.com/rapidsai/cudf/pull/10759)) [@bdice](https://github.com/bdice) +- Enable pydocstyle rules involving quotes ([#10748](https://github.com/rapidsai/cudf/pull/10748)) [@vyasr](https://github.com/vyasr) +- Revise 10 minutes notebook. ([#10738](https://github.com/rapidsai/cudf/pull/10738)) [@bdice](https://github.com/bdice) +- Reorganize cuDF Python docs ([#10691](https://github.com/rapidsai/cudf/pull/10691)) [@shwina](https://github.com/shwina) +- Fix sphinx/jupyter heading issue in UDF notebook ([#10690](https://github.com/rapidsai/cudf/pull/10690)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Migrated user guide notebooks to MyST-NB and added sphinx extension ([#10685](https://github.com/rapidsai/cudf/pull/10685)) [@mmccarty](https://github.com/mmccarty) +- add data generation to benchmark documentation ([#10677](https://github.com/rapidsai/cudf/pull/10677)) [@karthikeyann](https://github.com/karthikeyann) +- Fix some docs build warnings ([#10674](https://github.com/rapidsai/cudf/pull/10674)) [@galipremsagar](https://github.com/galipremsagar) +- Update UDF notebook in User Guide. ([#10668](https://github.com/rapidsai/cudf/pull/10668)) [@bdice](https://github.com/bdice) +- Improve User Guide docs ([#10663](https://github.com/rapidsai/cudf/pull/10663)) [@bdice](https://github.com/bdice) +- Fix some docstrings formatting ([#10660](https://github.com/rapidsai/cudf/pull/10660)) [@galipremsagar](https://github.com/galipremsagar) +- Remove implementation details from `apply` docstrings ([#10651](https://github.com/rapidsai/cudf/pull/10651)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Revise CONTRIBUTING.md ([#10644](https://github.com/rapidsai/cudf/pull/10644)) [@bdice](https://github.com/bdice) +- Add missing APIs to documentation. ([#10643](https://github.com/rapidsai/cudf/pull/10643)) [@bdice](https://github.com/bdice) +- Use cudf.read_json as documented API name. ([#10640](https://github.com/rapidsai/cudf/pull/10640)) [@bdice](https://github.com/bdice) +- Fix docstring section headings. ([#10639](https://github.com/rapidsai/cudf/pull/10639)) [@bdice](https://github.com/bdice) +- Document cudf.read_text and cudf.read_avro. ([#10638](https://github.com/rapidsai/cudf/pull/10638)) [@bdice](https://github.com/bdice) +- Fix type-o in docstring for json_reader_options ([#10627](https://github.com/rapidsai/cudf/pull/10627)) [@dagardner-nv](https://github.com/dagardner-nv) +- Update guide to UDFs with notes about `Series.applymap` deprecation and related changes ([#10607](https://github.com/rapidsai/cudf/pull/10607)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix doxygen Modules page for cudf::lists::sequences ([#10561](https://github.com/rapidsai/cudf/pull/10561)) [@davidwendt](https://github.com/davidwendt) +- Add Replace Backreferences section to Regex Features page ([#10560](https://github.com/rapidsai/cudf/pull/10560)) [@davidwendt](https://github.com/davidwendt) +- Introduce deprecation policy to developer guide. ([#10252](https://github.com/rapidsai/cudf/pull/10252)) [@vyasr](https://github.com/vyasr) + +## 🚀 New Features + +- Enable Zstandard decompression only when all nvcomp integrations are enabled ([#10944](https://github.com/rapidsai/cudf/pull/10944)) [@vuule](https://github.com/vuule) +- Handle nested types in cudf::concatenate_rows() ([#10890](https://github.com/rapidsai/cudf/pull/10890)) [@nvdbaranec](https://github.com/nvdbaranec) +- Strong index types for equality comparator ([#10883](https://github.com/rapidsai/cudf/pull/10883)) [@ttnghia](https://github.com/ttnghia) +- Add parameters to control page size in Parquet writer ([#10882](https://github.com/rapidsai/cudf/pull/10882)) [@etseidl](https://github.com/etseidl) +- Support for Zstandard decompression in ORC reader ([#10873](https://github.com/rapidsai/cudf/pull/10873)) [@vuule](https://github.com/vuule) +- Use pre-built nvcomp 2.3 binaries by default ([#10851](https://github.com/rapidsai/cudf/pull/10851)) [@robertmaynard](https://github.com/robertmaynard) +- Support for Zstandard decompression in Parquet reader ([#10847](https://github.com/rapidsai/cudf/pull/10847)) [@vuule](https://github.com/vuule) +- Add JNI support for apply_boolean_mask ([#10812](https://github.com/rapidsai/cudf/pull/10812)) [@res-life](https://github.com/res-life) +- Segmented Min/Max for Fixed Point Types ([#10794](https://github.com/rapidsai/cudf/pull/10794)) [@isVoid](https://github.com/isVoid) +- Return per-file metadata from readers ([#10782](https://github.com/rapidsai/cudf/pull/10782)) [@vuule](https://github.com/vuule) +- Segmented `apply_boolean_mask` for `LIST` columns ([#10773](https://github.com/rapidsai/cudf/pull/10773)) [@mythrocks](https://github.com/mythrocks) +- Update `groupby::hash` to use new row operators for keys ([#10770](https://github.com/rapidsai/cudf/pull/10770)) [@PointKernel](https://github.com/PointKernel) +- Support purging non-empty null elements from LIST/STRING columns ([#10701](https://github.com/rapidsai/cudf/pull/10701)) [@mythrocks](https://github.com/mythrocks) +- Add `detail::hash_join` ([#10695](https://github.com/rapidsai/cudf/pull/10695)) [@PointKernel](https://github.com/PointKernel) +- Persist string statistics data across multiple calls to orc chunked write ([#10694](https://github.com/rapidsai/cudf/pull/10694)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add `.list.astype()` to cast list leaves to specified dtype ([#10693](https://github.com/rapidsai/cudf/pull/10693)) [@shwina](https://github.com/shwina) +- JNI: Add generateListOffsets API ([#10683](https://github.com/rapidsai/cudf/pull/10683)) [@sperlingxx](https://github.com/sperlingxx) +- Support `args` in groupby apply ([#10682](https://github.com/rapidsai/cudf/pull/10682)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enable segmented_gather in Java package ([#10669](https://github.com/rapidsai/cudf/pull/10669)) [@sperlingxx](https://github.com/sperlingxx) +- Add row hasher with nested column support ([#10641](https://github.com/rapidsai/cudf/pull/10641)) [@devavret](https://github.com/devavret) +- Add support for numeric_only in DataFrame._reduce ([#10629](https://github.com/rapidsai/cudf/pull/10629)) [@martinfalisse](https://github.com/martinfalisse) +- First step toward statistics in ORC files with chunked writes ([#10567](https://github.com/rapidsai/cudf/pull/10567)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add support for struct columns to the random table generator ([#10566](https://github.com/rapidsai/cudf/pull/10566)) [@vuule](https://github.com/vuule) +- Enable passing a sequence for the `index` argument to `.list.get()` ([#10564](https://github.com/rapidsai/cudf/pull/10564)) [@shwina](https://github.com/shwina) +- Add python bindings for cudf::list::index_of ([#10549](https://github.com/rapidsai/cudf/pull/10549)) [@ChrisJar](https://github.com/ChrisJar) +- Add default= kwarg to .list.get() accessor method ([#10547](https://github.com/rapidsai/cudf/pull/10547)) [@shwina](https://github.com/shwina) +- Add `cudf.DataFrame.applymap` ([#10542](https://github.com/rapidsai/cudf/pull/10542)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Support nvComp 2.3 if local, otherwise use nvcomp 2.2 ([#10513](https://github.com/rapidsai/cudf/pull/10513)) [@robertmaynard](https://github.com/robertmaynard) +- Add column field ID control in parquet writer ([#10504](https://github.com/rapidsai/cudf/pull/10504)) [@PointKernel](https://github.com/PointKernel) +- Deprecate `Series.applymap` ([#10497](https://github.com/rapidsai/cudf/pull/10497)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add option to drop cache in cuIO benchmarks ([#10488](https://github.com/rapidsai/cudf/pull/10488)) [@vuule](https://github.com/vuule) +- move benchmark input generation in device in reduction nvbench ([#10486](https://github.com/rapidsai/cudf/pull/10486)) [@karthikeyann](https://github.com/karthikeyann) +- Support Segmented Min/Max Reduction on String Type ([#10447](https://github.com/rapidsai/cudf/pull/10447)) [@isVoid](https://github.com/isVoid) +- List element Equality comparator ([#10289](https://github.com/rapidsai/cudf/pull/10289)) [@devavret](https://github.com/devavret) +- Implement all methods of groupby rank aggregation in libcudf, python ([#9569](https://github.com/rapidsai/cudf/pull/9569)) [@karthikeyann](https://github.com/karthikeyann) +- Implement DataFrame.eval using libcudf ASTs ([#8022](https://github.com/rapidsai/cudf/pull/8022)) [@vyasr](https://github.com/vyasr) + +## 🛠️ Improvements + +- Use `conda` compilers in env file ([#10915](https://github.com/rapidsai/cudf/pull/10915)) [@galipremsagar](https://github.com/galipremsagar) +- Remove C style artifacts in cuIO ([#10886](https://github.com/rapidsai/cudf/pull/10886)) [@vuule](https://github.com/vuule) +- Rename `sliced_child` to `get_sliced_child`. ([#10885](https://github.com/rapidsai/cudf/pull/10885)) [@bdice](https://github.com/bdice) +- Replace defaulted stream value for libcudf APIs that use NVCOMP ([#10877](https://github.com/rapidsai/cudf/pull/10877)) [@jbrennan333](https://github.com/jbrennan333) +- Add more unit tests for `cudf::distinct` for nested types with sliced input ([#10860](https://github.com/rapidsai/cudf/pull/10860)) [@ttnghia](https://github.com/ttnghia) +- Changing `list_view.cuh` to `list_view.hpp` ([#10854](https://github.com/rapidsai/cudf/pull/10854)) [@ttnghia](https://github.com/ttnghia) +- More error checking in `from_dlpack` ([#10850](https://github.com/rapidsai/cudf/pull/10850)) [@wence-](https://github.com/wence-) +- Cleanup regex compiler fixed quantifiers source ([#10843](https://github.com/rapidsai/cudf/pull/10843)) [@davidwendt](https://github.com/davidwendt) +- Adds the JNI call for Cuda.deviceSynchronize ([#10839](https://github.com/rapidsai/cudf/pull/10839)) [@abellina](https://github.com/abellina) +- Add missing cuda-python dependency to cudf ([#10833](https://github.com/rapidsai/cudf/pull/10833)) [@bdice](https://github.com/bdice) +- Change std::string parameters in cudf::strings APIs to std::string_view ([#10832](https://github.com/rapidsai/cudf/pull/10832)) [@davidwendt](https://github.com/davidwendt) +- Split up search.cu to improve compile time ([#10831](https://github.com/rapidsai/cudf/pull/10831)) [@davidwendt](https://github.com/davidwendt) +- Add tests for null scalar binaryops ([#10828](https://github.com/rapidsai/cudf/pull/10828)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Cleanup regex compile optimize functions ([#10825](https://github.com/rapidsai/cudf/pull/10825)) [@davidwendt](https://github.com/davidwendt) +- Use `ThreadedMotoServer` instead of `subprocess` in spinning up `s3` server ([#10822](https://github.com/rapidsai/cudf/pull/10822)) [@galipremsagar](https://github.com/galipremsagar) +- Import `NA` from `missing` rather than using `cudf.NA` everywhere ([#10821](https://github.com/rapidsai/cudf/pull/10821)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Refactor regex builtin character-class identifiers ([#10814](https://github.com/rapidsai/cudf/pull/10814)) [@davidwendt](https://github.com/davidwendt) +- Change pattern parameter for regex APIs from std::string to std::string_view ([#10810](https://github.com/rapidsai/cudf/pull/10810)) [@davidwendt](https://github.com/davidwendt) +- Make the JNI API to get list offsets as a view public. ([#10807](https://github.com/rapidsai/cudf/pull/10807)) [@revans2](https://github.com/revans2) +- Add cudf JNI docker build github action ([#10806](https://github.com/rapidsai/cudf/pull/10806)) [@pxLi](https://github.com/pxLi) +- Removed `mr` parameter from inplace bitmask operations ([#10805](https://github.com/rapidsai/cudf/pull/10805)) [@AtlantaPepsi](https://github.com/AtlantaPepsi) +- Refactor `cudf::contains`, renaming and switching parameters role ([#10802](https://github.com/rapidsai/cudf/pull/10802)) [@ttnghia](https://github.com/ttnghia) +- Handle closed property in IntervalDtype.from_pandas ([#10798](https://github.com/rapidsai/cudf/pull/10798)) [@wence-](https://github.com/wence-) +- Return weak orderings from `device_row_comparator`. ([#10793](https://github.com/rapidsai/cudf/pull/10793)) [@rwlee](https://github.com/rwlee) +- Rework `Scalar` imports ([#10791](https://github.com/rapidsai/cudf/pull/10791)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enable ccache for cudfjni build in Docker ([#10790](https://github.com/rapidsai/cudf/pull/10790)) [@gerashegalov](https://github.com/gerashegalov) +- Generic serialization of all column types ([#10784](https://github.com/rapidsai/cudf/pull/10784)) [@wence-](https://github.com/wence-) +- simplifying skiprows test in test_orc.py ([#10783](https://github.com/rapidsai/cudf/pull/10783)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Use column_views instead of column_device_views in binary operations. ([#10780](https://github.com/rapidsai/cudf/pull/10780)) [@bdice](https://github.com/bdice) +- Add struct utility functions. ([#10776](https://github.com/rapidsai/cudf/pull/10776)) [@bdice](https://github.com/bdice) +- Add multiple rows to subword tokenizer benchmark ([#10767](https://github.com/rapidsai/cudf/pull/10767)) [@davidwendt](https://github.com/davidwendt) +- Refactor host decompression in ORC reader ([#10764](https://github.com/rapidsai/cudf/pull/10764)) [@vuule](https://github.com/vuule) +- Flush output streams before creating a process to drop caches ([#10762](https://github.com/rapidsai/cudf/pull/10762)) [@vuule](https://github.com/vuule) +- Refactor binaryop/compiled/util.cpp ([#10756](https://github.com/rapidsai/cudf/pull/10756)) [@bdice](https://github.com/bdice) +- Use warp per string for long strings in cudf::strings::contains() ([#10739](https://github.com/rapidsai/cudf/pull/10739)) [@davidwendt](https://github.com/davidwendt) +- Use generator expressions in any/all functions. ([#10736](https://github.com/rapidsai/cudf/pull/10736)) [@bdice](https://github.com/bdice) +- Use canonical "magic methods" (replace `x.__repr__()` with `repr(x)`). ([#10735](https://github.com/rapidsai/cudf/pull/10735)) [@bdice](https://github.com/bdice) +- Improve use of isinstance. ([#10734](https://github.com/rapidsai/cudf/pull/10734)) [@bdice](https://github.com/bdice) +- Rename tests from multiIndex to multiindex. ([#10732](https://github.com/rapidsai/cudf/pull/10732)) [@bdice](https://github.com/bdice) +- Two-table comparators with strong index types ([#10730](https://github.com/rapidsai/cudf/pull/10730)) [@bdice](https://github.com/bdice) +- Replace std::make_pair with std::pair (C++17 CTAD) ([#10727](https://github.com/rapidsai/cudf/pull/10727)) [@karthikeyann](https://github.com/karthikeyann) +- Use structured bindings instead of std::tie ([#10726](https://github.com/rapidsai/cudf/pull/10726)) [@karthikeyann](https://github.com/karthikeyann) +- Missing `f` prefix on f-strings fix ([#10721](https://github.com/rapidsai/cudf/pull/10721)) [@code-review-doctor](https://github.com/code-review-doctor) +- Add `max_file_size` parameter to chunked parquet dataset writer ([#10718](https://github.com/rapidsai/cudf/pull/10718)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `merge_sorted`, change dask cudf usage to internal method ([#10713](https://github.com/rapidsai/cudf/pull/10713)) [@isVoid](https://github.com/isVoid) +- Prepare dask_cudf test_parquet.py for upcoming API changes ([#10709](https://github.com/rapidsai/cudf/pull/10709)) [@rjzamora](https://github.com/rjzamora) +- Remove or simplify various utility functions ([#10705](https://github.com/rapidsai/cudf/pull/10705)) [@vyasr](https://github.com/vyasr) +- Allow building arrow with parquet and not python ([#10702](https://github.com/rapidsai/cudf/pull/10702)) [@revans2](https://github.com/revans2) +- Partial cuIO GPU decompression refactor ([#10699](https://github.com/rapidsai/cudf/pull/10699)) [@vuule](https://github.com/vuule) +- Cython API refactor: `merge.pyx` ([#10698](https://github.com/rapidsai/cudf/pull/10698)) [@isVoid](https://github.com/isVoid) +- Fix random string data length to become variable ([#10697](https://github.com/rapidsai/cudf/pull/10697)) [@galipremsagar](https://github.com/galipremsagar) +- Add bindings for index_of with column search key ([#10696](https://github.com/rapidsai/cudf/pull/10696)) [@ChrisJar](https://github.com/ChrisJar) +- Deprecate index merging ([#10689](https://github.com/rapidsai/cudf/pull/10689)) [@vyasr](https://github.com/vyasr) +- Remove cudf::strings::string namespace ([#10684](https://github.com/rapidsai/cudf/pull/10684)) [@davidwendt](https://github.com/davidwendt) +- Standardize imports. ([#10680](https://github.com/rapidsai/cudf/pull/10680)) [@bdice](https://github.com/bdice) +- Standardize usage of collections.abc. ([#10679](https://github.com/rapidsai/cudf/pull/10679)) [@bdice](https://github.com/bdice) +- Cython API Refactor: `transpose.pyx`, `sort.pyx` ([#10675](https://github.com/rapidsai/cudf/pull/10675)) [@isVoid](https://github.com/isVoid) +- Add device_memory_resource parameter to create_string_vector_from_column ([#10673](https://github.com/rapidsai/cudf/pull/10673)) [@davidwendt](https://github.com/davidwendt) +- Split up mixed-join kernels source files ([#10671](https://github.com/rapidsai/cudf/pull/10671)) [@davidwendt](https://github.com/davidwendt) +- Use `std::filesystem` for temporary directory location and deletion ([#10664](https://github.com/rapidsai/cudf/pull/10664)) [@vuule](https://github.com/vuule) +- cleanup benchmark includes ([#10661](https://github.com/rapidsai/cudf/pull/10661)) [@karthikeyann](https://github.com/karthikeyann) +- Use upstream clang-format pre-commit hook. ([#10659](https://github.com/rapidsai/cudf/pull/10659)) [@bdice](https://github.com/bdice) +- Clean up C++ includes to use <> instead of "". ([#10658](https://github.com/rapidsai/cudf/pull/10658)) [@bdice](https://github.com/bdice) +- Handle RuntimeError thrown by CUDA Python in `validate_setup` ([#10653](https://github.com/rapidsai/cudf/pull/10653)) [@shwina](https://github.com/shwina) +- Rework JNI CMake to leverage rapids_find_package ([#10649](https://github.com/rapidsai/cudf/pull/10649)) [@jlowe](https://github.com/jlowe) +- Use conda to build python packages during GPU tests ([#10648](https://github.com/rapidsai/cudf/pull/10648)) [@Ethyling](https://github.com/Ethyling) +- Deprecate various functions that don't need to be defined for Index. ([#10647](https://github.com/rapidsai/cudf/pull/10647)) [@vyasr](https://github.com/vyasr) +- Update pinning to allow newer CMake versions. ([#10646](https://github.com/rapidsai/cudf/pull/10646)) [@vyasr](https://github.com/vyasr) +- Bump hadoop-common from 3.1.4 to 3.2.3 in /java ([#10645](https://github.com/rapidsai/cudf/pull/10645)) [@dependabot[bot]](https://github.com/dependabot[bot]) +- Remove `concurrent_unordered_multimap`. ([#10642](https://github.com/rapidsai/cudf/pull/10642)) [@bdice](https://github.com/bdice) +- Improve parquet dictionary encoding ([#10635](https://github.com/rapidsai/cudf/pull/10635)) [@PointKernel](https://github.com/PointKernel) +- Improve cudf::cuda_error ([#10630](https://github.com/rapidsai/cudf/pull/10630)) [@sperlingxx](https://github.com/sperlingxx) +- Add support for null and non-numeric types in Series.diff and DataFrame.diff ([#10625](https://github.com/rapidsai/cudf/pull/10625)) [@Matt711](https://github.com/Matt711) +- Branch 22.06 merge 22.04 ([#10624](https://github.com/rapidsai/cudf/pull/10624)) [@vyasr](https://github.com/vyasr) +- Unpin `dask` & `distributed` for development ([#10623](https://github.com/rapidsai/cudf/pull/10623)) [@galipremsagar](https://github.com/galipremsagar) +- Slightly improve accuracy of stod in to_floats ([#10622](https://github.com/rapidsai/cudf/pull/10622)) [@davidwendt](https://github.com/davidwendt) +- Allow libcudfjni to be built as a static library ([#10619](https://github.com/rapidsai/cudf/pull/10619)) [@jlowe](https://github.com/jlowe) +- Change stack-based regex state data to use global memory ([#10600](https://github.com/rapidsai/cudf/pull/10600)) [@davidwendt](https://github.com/davidwendt) +- Resolve Forward merging of `branch-22.04` into `branch-22.06` ([#10598](https://github.com/rapidsai/cudf/pull/10598)) [@galipremsagar](https://github.com/galipremsagar) +- KvikIO as an alternative GDS backend ([#10593](https://github.com/rapidsai/cudf/pull/10593)) [@madsbk](https://github.com/madsbk) +- Rename CUDA_TRY macro to CUDF_CUDA_TRY, rename CHECK_CUDA macro to CUDF_CHECK_CUDA. ([#10589](https://github.com/rapidsai/cudf/pull/10589)) [@bdice](https://github.com/bdice) +- Upgrade `cudf` to support `pandas` 1.4.x versions ([#10584](https://github.com/rapidsai/cudf/pull/10584)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor binary ops for timedelta and datetime columns ([#10581](https://github.com/rapidsai/cudf/pull/10581)) [@vyasr](https://github.com/vyasr) +- Refactor cudf::strings::count_re API to use count_matches utility ([#10580](https://github.com/rapidsai/cudf/pull/10580)) [@davidwendt](https://github.com/davidwendt) +- Update `Programming Language :: Python` Versions to 3.8 & 3.9 ([#10579](https://github.com/rapidsai/cudf/pull/10579)) [@madsbk](https://github.com/madsbk) +- Automate Java cudf jar build with statically linked dependencies ([#10578](https://github.com/rapidsai/cudf/pull/10578)) [@gerashegalov](https://github.com/gerashegalov) +- Add patch for thrust-cub 1.16 to fix sort compile times ([#10577](https://github.com/rapidsai/cudf/pull/10577)) [@davidwendt](https://github.com/davidwendt) +- Move binop methods from Frame to IndexedFrame and standardize the docstring ([#10576](https://github.com/rapidsai/cudf/pull/10576)) [@vyasr](https://github.com/vyasr) +- Cleanup libcudf strings regex classes ([#10573](https://github.com/rapidsai/cudf/pull/10573)) [@davidwendt](https://github.com/davidwendt) +- Simplify preprocessing of arguments for DataFrame binops ([#10563](https://github.com/rapidsai/cudf/pull/10563)) [@vyasr](https://github.com/vyasr) +- Reduce kernel calls to build strings findall results ([#10559](https://github.com/rapidsai/cudf/pull/10559)) [@davidwendt](https://github.com/davidwendt) +- Forward-merge branch-22.04 to branch-22.06 ([#10557](https://github.com/rapidsai/cudf/pull/10557)) [@bdice](https://github.com/bdice) +- Update strings contains benchmark to measure varying match rates ([#10555](https://github.com/rapidsai/cudf/pull/10555)) [@davidwendt](https://github.com/davidwendt) +- JNI: throw CUDA errors more specifically ([#10551](https://github.com/rapidsai/cudf/pull/10551)) [@sperlingxx](https://github.com/sperlingxx) +- Enable building static libs ([#10545](https://github.com/rapidsai/cudf/pull/10545)) [@trxcllnt](https://github.com/trxcllnt) +- Remove pip requirements files. ([#10543](https://github.com/rapidsai/cudf/pull/10543)) [@bdice](https://github.com/bdice) +- Remove Click pinnings that are unnecessary after upgrading black. ([#10541](https://github.com/rapidsai/cudf/pull/10541)) [@vyasr](https://github.com/vyasr) +- Refactor `memory_usage` to improve performance ([#10537](https://github.com/rapidsai/cudf/pull/10537)) [@galipremsagar](https://github.com/galipremsagar) +- Adjust the valid range of group index for replace_with_backrefs ([#10530](https://github.com/rapidsai/cudf/pull/10530)) [@sperlingxx](https://github.com/sperlingxx) +- add accidentally removed comment. ([#10526](https://github.com/rapidsai/cudf/pull/10526)) [@vyasr](https://github.com/vyasr) +- Update conda environment. ([#10525](https://github.com/rapidsai/cudf/pull/10525)) [@vyasr](https://github.com/vyasr) +- Remove ColumnBase.__getitem__ ([#10516](https://github.com/rapidsai/cudf/pull/10516)) [@vyasr](https://github.com/vyasr) +- Optimize `left_semi_join` by materializing the gather mask ([#10511](https://github.com/rapidsai/cudf/pull/10511)) [@cheinger](https://github.com/cheinger) +- Define proper binary operation APIs for columns ([#10509](https://github.com/rapidsai/cudf/pull/10509)) [@vyasr](https://github.com/vyasr) +- Upgrade `arrow-cpp` & `pyarrow` to `7.0.0` ([#10503](https://github.com/rapidsai/cudf/pull/10503)) [@galipremsagar](https://github.com/galipremsagar) +- Update to Thrust 1.16 ([#10489](https://github.com/rapidsai/cudf/pull/10489)) [@bdice](https://github.com/bdice) +- Namespace/Docstring Fixes for Reduction ([#10471](https://github.com/rapidsai/cudf/pull/10471)) [@isVoid](https://github.com/isVoid) +- Update cudfjni 22.06.0-SNAPSHOT ([#10467](https://github.com/rapidsai/cudf/pull/10467)) [@pxLi](https://github.com/pxLi) +- Use Lists of Columns for Various Files ([#10463](https://github.com/rapidsai/cudf/pull/10463)) [@isVoid](https://github.com/isVoid) +- Additional refactoring of hash functions ([#10462](https://github.com/rapidsai/cudf/pull/10462)) [@bdice](https://github.com/bdice) +- Fix Series.str.findall behavior for expand=False. ([#10459](https://github.com/rapidsai/cudf/pull/10459)) [@bdice](https://github.com/bdice) +- Remove deprecated code. ([#10450](https://github.com/rapidsai/cudf/pull/10450)) [@vyasr](https://github.com/vyasr) +- Update cmake-format version. ([#10440](https://github.com/rapidsai/cudf/pull/10440)) [@vyasr](https://github.com/vyasr) +- Consolidate C++ `conda` recipes and add `libcudf-tests` package ([#10326](https://github.com/rapidsai/cudf/pull/10326)) [@ajschmidt8](https://github.com/ajschmidt8) +- Use conda compilers ([#10275](https://github.com/rapidsai/cudf/pull/10275)) [@Ethyling](https://github.com/Ethyling) +- Add row bitmask as a `detail::hash_join` member ([#10248](https://github.com/rapidsai/cudf/pull/10248)) [@PointKernel](https://github.com/PointKernel) + +# cuDF 22.04.00 (6 Apr 2022) + +## 🚨 Breaking Changes + +- Drop unsupported method argument from nunique and distinct_count. ([#10411](https://github.com/rapidsai/cudf/pull/10411)) [@bdice](https://github.com/bdice) +- Refactor stream compaction APIs ([#10370](https://github.com/rapidsai/cudf/pull/10370)) [@PointKernel](https://github.com/PointKernel) +- Add scan_aggregation and reduce_aggregation derived types. ([#10357](https://github.com/rapidsai/cudf/pull/10357)) [@nvdbaranec](https://github.com/nvdbaranec) +- Avoid `decimal` type narrowing for decimal binops ([#10299](https://github.com/rapidsai/cudf/pull/10299)) [@galipremsagar](https://github.com/galipremsagar) +- Rewrites `sample` API ([#10262](https://github.com/rapidsai/cudf/pull/10262)) [@isVoid](https://github.com/isVoid) +- Remove probe-time null equality parameters in `cudf::hash_join` ([#10260](https://github.com/rapidsai/cudf/pull/10260)) [@PointKernel](https://github.com/PointKernel) +- Enable proper `Index` round-tripping in `orc` reader and writer ([#10170](https://github.com/rapidsai/cudf/pull/10170)) [@galipremsagar](https://github.com/galipremsagar) +- Add JNI for `strings::split_re` and `strings::split_record_re` ([#10139](https://github.com/rapidsai/cudf/pull/10139)) [@ttnghia](https://github.com/ttnghia) +- Change cudf::strings::find_multiple to return a lists column ([#10134](https://github.com/rapidsai/cudf/pull/10134)) [@davidwendt](https://github.com/davidwendt) +- Remove the option to completely disable decimal128 columns in the ORC reader ([#10127](https://github.com/rapidsai/cudf/pull/10127)) [@vuule](https://github.com/vuule) +- Remove deprecated code ([#10124](https://github.com/rapidsai/cudf/pull/10124)) [@vyasr](https://github.com/vyasr) +- Update gpu_utils.py to reflect current CUDA support. ([#10113](https://github.com/rapidsai/cudf/pull/10113)) [@bdice](https://github.com/bdice) +- Optimize compaction operations ([#10030](https://github.com/rapidsai/cudf/pull/10030)) [@PointKernel](https://github.com/PointKernel) +- Remove deprecated method Series.set_index. ([#9945](https://github.com/rapidsai/cudf/pull/9945)) [@bdice](https://github.com/bdice) +- Add cudf::strings::findall_record API ([#9911](https://github.com/rapidsai/cudf/pull/9911)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `arrow` & `pyarrow` to `6.0.1` ([#9686](https://github.com/rapidsai/cudf/pull/9686)) [@galipremsagar](https://github.com/galipremsagar) + +## 🐛 Bug Fixes + +- Fix an issue with tdigest merge aggregations. ([#10506](https://github.com/rapidsai/cudf/pull/10506)) [@nvdbaranec](https://github.com/nvdbaranec) +- Batch of fixes for index overflows in grid stride loops. ([#10448](https://github.com/rapidsai/cudf/pull/10448)) [@nvdbaranec](https://github.com/nvdbaranec) +- Update dask_cudf imports to be compatible with latest dask ([#10442](https://github.com/rapidsai/cudf/pull/10442)) [@rlratzel](https://github.com/rlratzel) +- Fix for integer overflow in contiguous-split ([#10437](https://github.com/rapidsai/cudf/pull/10437)) [@jbrennan333](https://github.com/jbrennan333) +- Fix has_null predicate for drop_list_duplicates on nested structs ([#10436](https://github.com/rapidsai/cudf/pull/10436)) [@sperlingxx](https://github.com/sperlingxx) +- Fix empty reduce with List output and non-List input ([#10435](https://github.com/rapidsai/cudf/pull/10435)) [@sperlingxx](https://github.com/sperlingxx) +- Fix `list` and `struct` meta generation issue in `dask-cudf` ([#10434](https://github.com/rapidsai/cudf/pull/10434)) [@galipremsagar](https://github.com/galipremsagar) +- Fix error in `cudf.to_numeric` when a `bool` input is passed ([#10431](https://github.com/rapidsai/cudf/pull/10431)) [@galipremsagar](https://github.com/galipremsagar) +- Support cupy array in `quantile` input ([#10429](https://github.com/rapidsai/cudf/pull/10429)) [@galipremsagar](https://github.com/galipremsagar) +- Fix benchmarks to work with new aggregation types ([#10428](https://github.com/rapidsai/cudf/pull/10428)) [@davidwendt](https://github.com/davidwendt) +- Fix cudf::shift to handle offset greater than column size ([#10414](https://github.com/rapidsai/cudf/pull/10414)) [@davidwendt](https://github.com/davidwendt) +- Fix lifespan of the temporary directory that holds cuFile configuration file ([#10403](https://github.com/rapidsai/cudf/pull/10403)) [@vuule](https://github.com/vuule) +- Fix error thrown in compiled-binaryop benchmark ([#10398](https://github.com/rapidsai/cudf/pull/10398)) [@davidwendt](https://github.com/davidwendt) +- Limiting async allocator using alignment of 512 ([#10395](https://github.com/rapidsai/cudf/pull/10395)) [@rongou](https://github.com/rongou) +- Include <optional> in multibyte split. ([#10385](https://github.com/rapidsai/cudf/pull/10385)) [@bdice](https://github.com/bdice) +- Fix issue with column and scalar re-assignment ([#10377](https://github.com/rapidsai/cudf/pull/10377)) [@galipremsagar](https://github.com/galipremsagar) +- Fix floating point data generation in benchmarks ([#10372](https://github.com/rapidsai/cudf/pull/10372)) [@vuule](https://github.com/vuule) +- Avoid overflow in fused_concatenate_kernel output_index ([#10344](https://github.com/rapidsai/cudf/pull/10344)) [@abellina](https://github.com/abellina) +- Remove is_relationally_comparable for table device views ([#10342](https://github.com/rapidsai/cudf/pull/10342)) [@davidwendt](https://github.com/davidwendt) +- Fix debug compile error in device_span to column_view conversion ([#10331](https://github.com/rapidsai/cudf/pull/10331)) [@davidwendt](https://github.com/davidwendt) +- Add Pascal support to JCUDF transcode (row_conversion) ([#10329](https://github.com/rapidsai/cudf/pull/10329)) [@mythrocks](https://github.com/mythrocks) +- Fix `std::bad_alloc` exception due to JIT reserving a huge buffer ([#10317](https://github.com/rapidsai/cudf/pull/10317)) [@ttnghia](https://github.com/ttnghia) +- Fixes up the overflowed fixed-point round on nullable column ([#10316](https://github.com/rapidsai/cudf/pull/10316)) [@sperlingxx](https://github.com/sperlingxx) +- Fix DataFrame slicing issues for empty cases ([#10310](https://github.com/rapidsai/cudf/pull/10310)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix documentation issues ([#10307](https://github.com/rapidsai/cudf/pull/10307)) [@ajschmidt8](https://github.com/ajschmidt8) +- Allow Java bindings to use default decimal precisions when writing columns ([#10276](https://github.com/rapidsai/cudf/pull/10276)) [@sperlingxx](https://github.com/sperlingxx) +- Fix incorrect slicing of GDS read/write calls ([#10274](https://github.com/rapidsai/cudf/pull/10274)) [@vuule](https://github.com/vuule) +- Fix out-of-memory error in compiled-binaryop benchmark ([#10269](https://github.com/rapidsai/cudf/pull/10269)) [@davidwendt](https://github.com/davidwendt) +- Add tests of reflected ufuncs and fix behavior of logical reflected ufuncs ([#10261](https://github.com/rapidsai/cudf/pull/10261)) [@vyasr](https://github.com/vyasr) +- Remove probe-time null equality parameters in `cudf::hash_join` ([#10260](https://github.com/rapidsai/cudf/pull/10260)) [@PointKernel](https://github.com/PointKernel) +- Fix out-of-memory error in UrlDecode benchmark ([#10258](https://github.com/rapidsai/cudf/pull/10258)) [@davidwendt](https://github.com/davidwendt) +- Fix groupby reductions that perform operations on source type instead of target type ([#10250](https://github.com/rapidsai/cudf/pull/10250)) [@ttnghia](https://github.com/ttnghia) +- Fix small leak in explode ([#10245](https://github.com/rapidsai/cudf/pull/10245)) [@revans2](https://github.com/revans2) +- Yet another small JNI memory leak ([#10238](https://github.com/rapidsai/cudf/pull/10238)) [@revans2](https://github.com/revans2) +- Fix regex octal parsing to limit to 3 characters ([#10233](https://github.com/rapidsai/cudf/pull/10233)) [@davidwendt](https://github.com/davidwendt) +- Fix string to decimal128 conversion handling large exponents ([#10231](https://github.com/rapidsai/cudf/pull/10231)) [@davidwendt](https://github.com/davidwendt) +- Fix JNI leak on copy to device ([#10229](https://github.com/rapidsai/cudf/pull/10229)) [@revans2](https://github.com/revans2) +- Fix the data generator element size for decimal types ([#10225](https://github.com/rapidsai/cudf/pull/10225)) [@vuule](https://github.com/vuule) +- Fix `decimal` metadata in parquet writer ([#10224](https://github.com/rapidsai/cudf/pull/10224)) [@galipremsagar](https://github.com/galipremsagar) +- Fix strings handling of hex in regex pattern ([#10220](https://github.com/rapidsai/cudf/pull/10220)) [@davidwendt](https://github.com/davidwendt) +- Fix docs builds ([#10216](https://github.com/rapidsai/cudf/pull/10216)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix a leftover _has_nulls change from Nullate ([#10211](https://github.com/rapidsai/cudf/pull/10211)) [@devavret](https://github.com/devavret) +- Fix bitmask of the output for JNI of `lists::drop_list_duplicates` ([#10210](https://github.com/rapidsai/cudf/pull/10210)) [@ttnghia](https://github.com/ttnghia) +- Fix compile error in `binaryop/compiled/util.cpp` ([#10209](https://github.com/rapidsai/cudf/pull/10209)) [@ttnghia](https://github.com/ttnghia) +- Skip ORC and Parquet readers' benchmark cases that are not currently supported ([#10194](https://github.com/rapidsai/cudf/pull/10194)) [@vuule](https://github.com/vuule) +- Fix JNI leak of a cudf::column_view native class. ([#10171](https://github.com/rapidsai/cudf/pull/10171)) [@revans2](https://github.com/revans2) +- Enable proper `Index` round-tripping in `orc` reader and writer ([#10170](https://github.com/rapidsai/cudf/pull/10170)) [@galipremsagar](https://github.com/galipremsagar) +- Convert Column Name to String Before Using Struct Column Factory ([#10156](https://github.com/rapidsai/cudf/pull/10156)) [@isVoid](https://github.com/isVoid) +- Preserve the correct `ListDtype` while creating an identical empty column ([#10151](https://github.com/rapidsai/cudf/pull/10151)) [@galipremsagar](https://github.com/galipremsagar) +- benchmark fixture - static object pointer fix ([#10145](https://github.com/rapidsai/cudf/pull/10145)) [@karthikeyann](https://github.com/karthikeyann) +- Fix UDF Caching ([#10133](https://github.com/rapidsai/cudf/pull/10133)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Raise duplicate column error in `DataFrame.rename` ([#10120](https://github.com/rapidsai/cudf/pull/10120)) [@galipremsagar](https://github.com/galipremsagar) +- Fix flaky memory usage test by guaranteeing array size. ([#10114](https://github.com/rapidsai/cudf/pull/10114)) [@vyasr](https://github.com/vyasr) +- Encode values from python callback for C++ ([#10103](https://github.com/rapidsai/cudf/pull/10103)) [@jdye64](https://github.com/jdye64) +- Add check for regex instructions causing an infinite-loop ([#10095](https://github.com/rapidsai/cudf/pull/10095)) [@davidwendt](https://github.com/davidwendt) +- Remove metadata singleton from nvtext normalizer ([#10090](https://github.com/rapidsai/cudf/pull/10090)) [@davidwendt](https://github.com/davidwendt) +- Column equality testing fixes ([#10011](https://github.com/rapidsai/cudf/pull/10011)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Pin libcudf runtime dependency for cudf / libcudf-kafka nightlies ([#9847](https://github.com/rapidsai/cudf/pull/9847)) [@charlesbluca](https://github.com/charlesbluca) + +## 📖 Documentation + +- Fix documentation for DataFrame.corr and Series.corr. ([#10493](https://github.com/rapidsai/cudf/pull/10493)) [@bdice](https://github.com/bdice) +- Add `cut` to API docs ([#10479](https://github.com/rapidsai/cudf/pull/10479)) [@shwina](https://github.com/shwina) +- Remove documentation for methods removed in #10124. ([#10366](https://github.com/rapidsai/cudf/pull/10366)) [@bdice](https://github.com/bdice) +- Fix documentation issues ([#10306](https://github.com/rapidsai/cudf/pull/10306)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix `fixed_point` binary operation documentation ([#10198](https://github.com/rapidsai/cudf/pull/10198)) [@codereport](https://github.com/codereport) +- Remove cleaned up methods from docs ([#10189](https://github.com/rapidsai/cudf/pull/10189)) [@galipremsagar](https://github.com/galipremsagar) +- Update developer guide to recommend no default stream parameter. ([#10136](https://github.com/rapidsai/cudf/pull/10136)) [@bdice](https://github.com/bdice) +- Update benchmarking guide to use NVBench. ([#10093](https://github.com/rapidsai/cudf/pull/10093)) [@bdice](https://github.com/bdice) + +## 🚀 New Features + +- Add StringIO support to read_text ([#10465](https://github.com/rapidsai/cudf/pull/10465)) [@cwharris](https://github.com/cwharris) +- Add support for tdigest and merge_tdigest aggregations through cudf::reduce ([#10433](https://github.com/rapidsai/cudf/pull/10433)) [@nvdbaranec](https://github.com/nvdbaranec) +- JNI support for Collect Ops in Reduction ([#10427](https://github.com/rapidsai/cudf/pull/10427)) [@sperlingxx](https://github.com/sperlingxx) +- Enable read_text with dask_cudf using byte_range ([#10407](https://github.com/rapidsai/cudf/pull/10407)) [@ChrisJar](https://github.com/ChrisJar) +- Add `cudf::stable_sort_by_key` ([#10387](https://github.com/rapidsai/cudf/pull/10387)) [@PointKernel](https://github.com/PointKernel) +- Implement `maps_column_view` abstraction over `LIST<STRUCT<K,V>>` ([#10380](https://github.com/rapidsai/cudf/pull/10380)) [@mythrocks](https://github.com/mythrocks) +- Support Java bindings for Avro reader ([#10373](https://github.com/rapidsai/cudf/pull/10373)) [@HaoYang670](https://github.com/HaoYang670) +- Refactor stream compaction APIs ([#10370](https://github.com/rapidsai/cudf/pull/10370)) [@PointKernel](https://github.com/PointKernel) +- Support collect aggregations in reduction ([#10353](https://github.com/rapidsai/cudf/pull/10353)) [@sperlingxx](https://github.com/sperlingxx) +- Refactor array_ufunc for Index and unify across all classes ([#10346](https://github.com/rapidsai/cudf/pull/10346)) [@vyasr](https://github.com/vyasr) +- Add JNI for extract_list_element with index column ([#10341](https://github.com/rapidsai/cudf/pull/10341)) [@firestarman](https://github.com/firestarman) +- Support `min` and `max` operations for structs in rolling window ([#10332](https://github.com/rapidsai/cudf/pull/10332)) [@ttnghia](https://github.com/ttnghia) +- Add device create_sequence_table for benchmarks ([#10300](https://github.com/rapidsai/cudf/pull/10300)) [@karthikeyann](https://github.com/karthikeyann) +- Enable numpy ufuncs for DataFrame ([#10287](https://github.com/rapidsai/cudf/pull/10287)) [@vyasr](https://github.com/vyasr) +- move input generation for json benchmark to device ([#10281](https://github.com/rapidsai/cudf/pull/10281)) [@karthikeyann](https://github.com/karthikeyann) +- move input generation for type dispatcher benchmark to device ([#10280](https://github.com/rapidsai/cudf/pull/10280)) [@karthikeyann](https://github.com/karthikeyann) +- move input generation for copy benchmark to device ([#10279](https://github.com/rapidsai/cudf/pull/10279)) [@karthikeyann](https://github.com/karthikeyann) +- generate url decode benchmark input in device ([#10278](https://github.com/rapidsai/cudf/pull/10278)) [@karthikeyann](https://github.com/karthikeyann) +- device input generation in join bench ([#10277](https://github.com/rapidsai/cudf/pull/10277)) [@karthikeyann](https://github.com/karthikeyann) +- Add nvtext::byte_pair_encoding API ([#10270](https://github.com/rapidsai/cudf/pull/10270)) [@davidwendt](https://github.com/davidwendt) +- Prevent internal usage of expensive APIs ([#10263](https://github.com/rapidsai/cudf/pull/10263)) [@vyasr](https://github.com/vyasr) +- Column to JCUDF row for tables with strings ([#10235](https://github.com/rapidsai/cudf/pull/10235)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Support `percent_rank()` aggregation ([#10227](https://github.com/rapidsai/cudf/pull/10227)) [@mythrocks](https://github.com/mythrocks) +- Refactor Series.__array_ufunc__ ([#10217](https://github.com/rapidsai/cudf/pull/10217)) [@vyasr](https://github.com/vyasr) +- Reduce pytest runtime ([#10203](https://github.com/rapidsai/cudf/pull/10203)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add regex flags parameter to python cudf strings split ([#10185](https://github.com/rapidsai/cudf/pull/10185)) [@davidwendt](https://github.com/davidwendt) +- Support for `MOD`, `PMOD` and `PYMOD` for `decimal32/64/128` ([#10179](https://github.com/rapidsai/cudf/pull/10179)) [@codereport](https://github.com/codereport) +- Adding string row size iterator for row to column and column to row conversion ([#10157](https://github.com/rapidsai/cudf/pull/10157)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add file size counter to cuIO benchmarks ([#10154](https://github.com/rapidsai/cudf/pull/10154)) [@vuule](https://github.com/vuule) +- byte_range support for multibyte_split/read_text ([#10150](https://github.com/rapidsai/cudf/pull/10150)) [@cwharris](https://github.com/cwharris) +- Add JNI for `strings::split_re` and `strings::split_record_re` ([#10139](https://github.com/rapidsai/cudf/pull/10139)) [@ttnghia](https://github.com/ttnghia) +- Add `maxSplit` parameter to Java binding for `strings:split` ([#10137](https://github.com/rapidsai/cudf/pull/10137)) [@ttnghia](https://github.com/ttnghia) +- Add libcudf strings split API that accepts regex pattern ([#10128](https://github.com/rapidsai/cudf/pull/10128)) [@davidwendt](https://github.com/davidwendt) +- generate benchmark input in device ([#10109](https://github.com/rapidsai/cudf/pull/10109)) [@karthikeyann](https://github.com/karthikeyann) +- Avoid `nan_as_null` op if `nan_count` is 0 ([#10082](https://github.com/rapidsai/cudf/pull/10082)) [@galipremsagar](https://github.com/galipremsagar) +- Add Dataframe and Index nunique ([#10077](https://github.com/rapidsai/cudf/pull/10077)) [@martinfalisse](https://github.com/martinfalisse) +- Support nanosecond timestamps in parquet ([#10063](https://github.com/rapidsai/cudf/pull/10063)) [@PointKernel](https://github.com/PointKernel) +- Java bindings for mixed semi and anti joins ([#10040](https://github.com/rapidsai/cudf/pull/10040)) [@jlowe](https://github.com/jlowe) +- Implement mixed equality/conditional semi/anti joins ([#10037](https://github.com/rapidsai/cudf/pull/10037)) [@vyasr](https://github.com/vyasr) +- Optimize compaction operations ([#10030](https://github.com/rapidsai/cudf/pull/10030)) [@PointKernel](https://github.com/PointKernel) +- Support `args=` in `Series.apply` ([#9982](https://github.com/rapidsai/cudf/pull/9982)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add cudf::strings::findall_record API ([#9911](https://github.com/rapidsai/cudf/pull/9911)) [@davidwendt](https://github.com/davidwendt) +- Add covariance for sort groupby (python) ([#9889](https://github.com/rapidsai/cudf/pull/9889)) [@mayankanand007](https://github.com/mayankanand007) +- Implement DataFrame diff() ([#9817](https://github.com/rapidsai/cudf/pull/9817)) [@skirui-source](https://github.com/skirui-source) +- Implement DataFrame pct_change ([#9805](https://github.com/rapidsai/cudf/pull/9805)) [@skirui-source](https://github.com/skirui-source) +- Support segmented reductions and null mask reductions ([#9621](https://github.com/rapidsai/cudf/pull/9621)) [@isVoid](https://github.com/isVoid) +- Add 'spearman' correlation method for `dataframe.corr` and `series.corr` ([#7141](https://github.com/rapidsai/cudf/pull/7141)) [@dominicshanshan](https://github.com/dominicshanshan) + +## 🛠️ Improvements + +- Add `scipy` skip for a test ([#10502](https://github.com/rapidsai/cudf/pull/10502)) [@galipremsagar](https://github.com/galipremsagar) +- Temporarily disable new `ops-bot` functionality ([#10496](https://github.com/rapidsai/cudf/pull/10496)) [@ajschmidt8](https://github.com/ajschmidt8) +- Include <cstddef> to fix compilation of parquet reader on GCC 11. ([#10483](https://github.com/rapidsai/cudf/pull/10483)) [@bdice](https://github.com/bdice) +- Pin `dask` and `distributed` ([#10481](https://github.com/rapidsai/cudf/pull/10481)) [@galipremsagar](https://github.com/galipremsagar) +- MD5 refactoring. ([#10445](https://github.com/rapidsai/cudf/pull/10445)) [@bdice](https://github.com/bdice) +- Remove or split up Frame methods that use the index ([#10439](https://github.com/rapidsai/cudf/pull/10439)) [@vyasr](https://github.com/vyasr) +- Centralization of tdigest aggregation code. ([#10422](https://github.com/rapidsai/cudf/pull/10422)) [@nvdbaranec](https://github.com/nvdbaranec) +- Simplify column binary operations ([#10421](https://github.com/rapidsai/cudf/pull/10421)) [@vyasr](https://github.com/vyasr) +- Add `.github/ops-bot.yaml` config file ([#10420](https://github.com/rapidsai/cudf/pull/10420)) [@ajschmidt8](https://github.com/ajschmidt8) +- Use list of columns for methods in `Groupby.pyx` ([#10419](https://github.com/rapidsai/cudf/pull/10419)) [@isVoid](https://github.com/isVoid) +- Remove warnings in `test_timedelta.py` ([#10418](https://github.com/rapidsai/cudf/pull/10418)) [@galipremsagar](https://github.com/galipremsagar) +- Fix some warnings in `test_parquet.py` ([#10416](https://github.com/rapidsai/cudf/pull/10416)) [@galipremsagar](https://github.com/galipremsagar) +- JNI support for segmented reduce ([#10413](https://github.com/rapidsai/cudf/pull/10413)) [@revans2](https://github.com/revans2) +- Clean up null mask after purging null entries ([#10412](https://github.com/rapidsai/cudf/pull/10412)) [@sperlingxx](https://github.com/sperlingxx) +- Drop unsupported method argument from nunique and distinct_count. ([#10411](https://github.com/rapidsai/cudf/pull/10411)) [@bdice](https://github.com/bdice) +- Use str instead of builtins.str. ([#10410](https://github.com/rapidsai/cudf/pull/10410)) [@bdice](https://github.com/bdice) +- Fix warnings in `test_rolling` ([#10405](https://github.com/rapidsai/cudf/pull/10405)) [@bdice](https://github.com/bdice) +- Enable `codecov` github-check in CI ([#10404](https://github.com/rapidsai/cudf/pull/10404)) [@galipremsagar](https://github.com/galipremsagar) +- Fix warnings in test_cuda_apply, test_numerical, test_pickling, test_unaops. ([#10402](https://github.com/rapidsai/cudf/pull/10402)) [@bdice](https://github.com/bdice) +- Set column names in `_from_columns_like_self` factory ([#10400](https://github.com/rapidsai/cudf/pull/10400)) [@isVoid](https://github.com/isVoid) +- Refactor `nvtx` annotations in `cudf` & `dask-cudf` ([#10396](https://github.com/rapidsai/cudf/pull/10396)) [@galipremsagar](https://github.com/galipremsagar) +- Consolidate .cov and .corr for sort groupby ([#10386](https://github.com/rapidsai/cudf/pull/10386)) [@skirui-source](https://github.com/skirui-source) +- Consolidate some Frame APIs ([#10381](https://github.com/rapidsai/cudf/pull/10381)) [@vyasr](https://github.com/vyasr) +- Refactor hash functions and `hash_combine` ([#10379](https://github.com/rapidsai/cudf/pull/10379)) [@bdice](https://github.com/bdice) +- Add `nvtx` annotations for `Series` and `Index` ([#10374](https://github.com/rapidsai/cudf/pull/10374)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor `filling.repeat` API ([#10371](https://github.com/rapidsai/cudf/pull/10371)) [@isVoid](https://github.com/isVoid) +- Move standalone UTF8 functions from string_view.hpp to utf8.hpp ([#10369](https://github.com/rapidsai/cudf/pull/10369)) [@davidwendt](https://github.com/davidwendt) +- Remove doc for deprecated function `one_hot_encoding` ([#10367](https://github.com/rapidsai/cudf/pull/10367)) [@isVoid](https://github.com/isVoid) +- Refactor array function ([#10364](https://github.com/rapidsai/cudf/pull/10364)) [@vyasr](https://github.com/vyasr) +- Fix warnings in test_csv.py. ([#10362](https://github.com/rapidsai/cudf/pull/10362)) [@bdice](https://github.com/bdice) +- Implement a mixin for binops ([#10360](https://github.com/rapidsai/cudf/pull/10360)) [@vyasr](https://github.com/vyasr) +- Refactor cython interface: `copying.pyx` ([#10359](https://github.com/rapidsai/cudf/pull/10359)) [@isVoid](https://github.com/isVoid) +- Implement a mixin for scans ([#10358](https://github.com/rapidsai/cudf/pull/10358)) [@vyasr](https://github.com/vyasr) +- Add scan_aggregation and reduce_aggregation derived types. ([#10357](https://github.com/rapidsai/cudf/pull/10357)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add cleanup of python artifacts ([#10355](https://github.com/rapidsai/cudf/pull/10355)) [@galipremsagar](https://github.com/galipremsagar) +- Fix warnings in test_categorical.py. ([#10354](https://github.com/rapidsai/cudf/pull/10354)) [@bdice](https://github.com/bdice) +- Create a dispatcher for invoking regex kernel functions ([#10349](https://github.com/rapidsai/cudf/pull/10349)) [@davidwendt](https://github.com/davidwendt) +- Fix `codecov` in CI ([#10347](https://github.com/rapidsai/cudf/pull/10347)) [@galipremsagar](https://github.com/galipremsagar) +- Enable caching for `memory_usage` calculation in `Column` ([#10345](https://github.com/rapidsai/cudf/pull/10345)) [@galipremsagar](https://github.com/galipremsagar) +- C++17 cleanup: traits replace std::enable_if<>::type with std::enable_if_t ([#10343](https://github.com/rapidsai/cudf/pull/10343)) [@karthikeyann](https://github.com/karthikeyann) +- JNI: Support appending DECIMAL128 into ColumnBuilder in terms of byte array ([#10338](https://github.com/rapidsai/cudf/pull/10338)) [@sperlingxx](https://github.com/sperlingxx) +- multibyte_split test improvements ([#10328](https://github.com/rapidsai/cudf/pull/10328)) [@vuule](https://github.com/vuule) +- Fix warnings in test_binops.py. ([#10327](https://github.com/rapidsai/cudf/pull/10327)) [@bdice](https://github.com/bdice) +- Fix warnings from pandas in test_array_ufunc.py. ([#10324](https://github.com/rapidsai/cudf/pull/10324)) [@bdice](https://github.com/bdice) +- Update upload script ([#10321](https://github.com/rapidsai/cudf/pull/10321)) [@ajschmidt8](https://github.com/ajschmidt8) +- Move hash type declarations to hashing.hpp ([#10320](https://github.com/rapidsai/cudf/pull/10320)) [@davidwendt](https://github.com/davidwendt) +- C++17 cleanup: traits replace `::value` with `_v` ([#10319](https://github.com/rapidsai/cudf/pull/10319)) [@karthikeyann](https://github.com/karthikeyann) +- Remove internal columns usage ([#10315](https://github.com/rapidsai/cudf/pull/10315)) [@vyasr](https://github.com/vyasr) +- Remove extraneous `build.sh` parameter ([#10313](https://github.com/rapidsai/cudf/pull/10313)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add const qualifier to MurmurHash3_32::hash_combine ([#10311](https://github.com/rapidsai/cudf/pull/10311)) [@davidwendt](https://github.com/davidwendt) +- Remove `TODO` in `libcudf_kafka` recipe ([#10309](https://github.com/rapidsai/cudf/pull/10309)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add conversions between column_view and device_span<T const>. ([#10302](https://github.com/rapidsai/cudf/pull/10302)) [@bdice](https://github.com/bdice) +- Avoid `decimal` type narrowing for decimal binops ([#10299](https://github.com/rapidsai/cudf/pull/10299)) [@galipremsagar](https://github.com/galipremsagar) +- Deprecate `DataFrame.iteritems` and introduce `.items` ([#10298](https://github.com/rapidsai/cudf/pull/10298)) [@galipremsagar](https://github.com/galipremsagar) +- Explicitly request CMake use `gnu++17` over `c++17` ([#10297](https://github.com/rapidsai/cudf/pull/10297)) [@robertmaynard](https://github.com/robertmaynard) +- Add copyright check as pre-commit hook. ([#10290](https://github.com/rapidsai/cudf/pull/10290)) [@vyasr](https://github.com/vyasr) +- DataFrame `insert` and creation optimizations ([#10285](https://github.com/rapidsai/cudf/pull/10285)) [@galipremsagar](https://github.com/galipremsagar) +- Improve hash join detail functions ([#10273](https://github.com/rapidsai/cudf/pull/10273)) [@PointKernel](https://github.com/PointKernel) +- Replace custom `cached_property` implementation with functools ([#10272](https://github.com/rapidsai/cudf/pull/10272)) [@shwina](https://github.com/shwina) +- Rewrites `sample` API ([#10262](https://github.com/rapidsai/cudf/pull/10262)) [@isVoid](https://github.com/isVoid) +- Bump hadoop-common from 3.1.0 to 3.1.4 in /java ([#10259](https://github.com/rapidsai/cudf/pull/10259)) [@dependabot[bot]](https://github.com/dependabot[bot]) +- Remove making redundant `copy` across code-base ([#10257](https://github.com/rapidsai/cudf/pull/10257)) [@galipremsagar](https://github.com/galipremsagar) +- Add more `nvtx` annotations ([#10256](https://github.com/rapidsai/cudf/pull/10256)) [@galipremsagar](https://github.com/galipremsagar) +- Add `copyright` check in `cudf` ([#10253](https://github.com/rapidsai/cudf/pull/10253)) [@galipremsagar](https://github.com/galipremsagar) +- Remove redundant copies in `fillna` to improve performance ([#10241](https://github.com/rapidsai/cudf/pull/10241)) [@galipremsagar](https://github.com/galipremsagar) +- Remove `std::numeric_limit` specializations for timestamp & durations ([#10239](https://github.com/rapidsai/cudf/pull/10239)) [@codereport](https://github.com/codereport) +- Optimize `DataFrame` creation across code-base ([#10236](https://github.com/rapidsai/cudf/pull/10236)) [@galipremsagar](https://github.com/galipremsagar) +- Change pytest distribution algorithm and increase parallelism in CI ([#10232](https://github.com/rapidsai/cudf/pull/10232)) [@galipremsagar](https://github.com/galipremsagar) +- Add environment variables for I/O thread pool and slice sizes ([#10218](https://github.com/rapidsai/cudf/pull/10218)) [@vuule](https://github.com/vuule) +- Add regex flags to strings findall functions ([#10208](https://github.com/rapidsai/cudf/pull/10208)) [@davidwendt](https://github.com/davidwendt) +- Update dask-cudf parquet tests to reflect upstream bugfixes to `_metadata` ([#10206](https://github.com/rapidsai/cudf/pull/10206)) [@charlesbluca](https://github.com/charlesbluca) +- Remove unnecessary nunique function in Series. ([#10205](https://github.com/rapidsai/cudf/pull/10205)) [@martinfalisse](https://github.com/martinfalisse) +- Refactor DataFrame tests. ([#10204](https://github.com/rapidsai/cudf/pull/10204)) [@bdice](https://github.com/bdice) +- Rewrites `column.__setitem__`, Use `boolean_mask_scatter` ([#10202](https://github.com/rapidsai/cudf/pull/10202)) [@isVoid](https://github.com/isVoid) +- Java utilities to aid in accelerating aggregations on 128-bit types ([#10201](https://github.com/rapidsai/cudf/pull/10201)) [@jlowe](https://github.com/jlowe) +- Fix docstrings alignment in `Frame` methods ([#10199](https://github.com/rapidsai/cudf/pull/10199)) [@galipremsagar](https://github.com/galipremsagar) +- Fix cuco pair issue in hash join ([#10195](https://github.com/rapidsai/cudf/pull/10195)) [@PointKernel](https://github.com/PointKernel) +- Replace `dask` groupby `.index` usages with `.by` ([#10193](https://github.com/rapidsai/cudf/pull/10193)) [@galipremsagar](https://github.com/galipremsagar) +- Add regex flags to strings extract function ([#10192](https://github.com/rapidsai/cudf/pull/10192)) [@davidwendt](https://github.com/davidwendt) +- Forward-merge branch-22.02 to branch-22.04 ([#10191](https://github.com/rapidsai/cudf/pull/10191)) [@bdice](https://github.com/bdice) +- Add CMake `install` rule for tests ([#10190](https://github.com/rapidsai/cudf/pull/10190)) [@ajschmidt8](https://github.com/ajschmidt8) +- Unpin `dask` & `distributed` ([#10182](https://github.com/rapidsai/cudf/pull/10182)) [@galipremsagar](https://github.com/galipremsagar) +- Add comments to explain test validation ([#10176](https://github.com/rapidsai/cudf/pull/10176)) [@galipremsagar](https://github.com/galipremsagar) +- Reduce warnings in pytest output ([#10168](https://github.com/rapidsai/cudf/pull/10168)) [@bdice](https://github.com/bdice) +- Some consolidation of indexed frame methods ([#10167](https://github.com/rapidsai/cudf/pull/10167)) [@vyasr](https://github.com/vyasr) +- Refactor isin implementations ([#10165](https://github.com/rapidsai/cudf/pull/10165)) [@vyasr](https://github.com/vyasr) +- Faster struct row comparator ([#10164](https://github.com/rapidsai/cudf/pull/10164)) [@devavret](https://github.com/devavret) +- Refactor groupby::get_groups. ([#10161](https://github.com/rapidsai/cudf/pull/10161)) [@bdice](https://github.com/bdice) +- Deprecate `decimal_cols_as_float` in ORC reader (C++ layer) ([#10152](https://github.com/rapidsai/cudf/pull/10152)) [@vuule](https://github.com/vuule) +- Replace `ccache` with `sccache` ([#10146](https://github.com/rapidsai/cudf/pull/10146)) [@ajschmidt8](https://github.com/ajschmidt8) +- Murmur3 hash kernel cleanup ([#10143](https://github.com/rapidsai/cudf/pull/10143)) [@rwlee](https://github.com/rwlee) +- Deprecate `decimal_cols_as_float` in ORC reader ([#10142](https://github.com/rapidsai/cudf/pull/10142)) [@galipremsagar](https://github.com/galipremsagar) +- Run pyupgrade 2.31.0. ([#10141](https://github.com/rapidsai/cudf/pull/10141)) [@bdice](https://github.com/bdice) +- Remove `drop_nan` from internal `IndexedFrame._drop_na_rows`. ([#10140](https://github.com/rapidsai/cudf/pull/10140)) [@bdice](https://github.com/bdice) +- Change cudf::strings::find_multiple to return a lists column ([#10134](https://github.com/rapidsai/cudf/pull/10134)) [@davidwendt](https://github.com/davidwendt) +- Update cmake-format script for branch 22.04. ([#10132](https://github.com/rapidsai/cudf/pull/10132)) [@bdice](https://github.com/bdice) +- Accept r-value references in convert_table_for_return(): ([#10131](https://github.com/rapidsai/cudf/pull/10131)) [@mythrocks](https://github.com/mythrocks) +- Remove the option to completely disable decimal128 columns in the ORC reader ([#10127](https://github.com/rapidsai/cudf/pull/10127)) [@vuule](https://github.com/vuule) +- Remove deprecated code ([#10124](https://github.com/rapidsai/cudf/pull/10124)) [@vyasr](https://github.com/vyasr) +- Update gpu_utils.py to reflect current CUDA support. ([#10113](https://github.com/rapidsai/cudf/pull/10113)) [@bdice](https://github.com/bdice) +- Remove benchmarks suffix ([#10112](https://github.com/rapidsai/cudf/pull/10112)) [@bdice](https://github.com/bdice) +- Update cudf java binding version to 22.04.0-SNAPSHOT ([#10084](https://github.com/rapidsai/cudf/pull/10084)) [@pxLi](https://github.com/pxLi) +- Remove unnecessary docker files. ([#10069](https://github.com/rapidsai/cudf/pull/10069)) [@vyasr](https://github.com/vyasr) +- Limit benchmark iterations using environment variable ([#10060](https://github.com/rapidsai/cudf/pull/10060)) [@karthikeyann](https://github.com/karthikeyann) +- Add timing chart for libcudf build metrics report page ([#10038](https://github.com/rapidsai/cudf/pull/10038)) [@davidwendt](https://github.com/davidwendt) +- JNI: Rewrite growBuffersAndRows to accelerate the HostColumnBuilder ([#10025](https://github.com/rapidsai/cudf/pull/10025)) [@sperlingxx](https://github.com/sperlingxx) +- Reduce redundant code in CUDF JNI ([#10019](https://github.com/rapidsai/cudf/pull/10019)) [@mythrocks](https://github.com/mythrocks) +- Make snappy decompress check more efficient ([#9995](https://github.com/rapidsai/cudf/pull/9995)) [@cheinger](https://github.com/cheinger) +- Remove deprecated method Series.set_index. ([#9945](https://github.com/rapidsai/cudf/pull/9945)) [@bdice](https://github.com/bdice) +- Implement a mixin for reductions ([#9925](https://github.com/rapidsai/cudf/pull/9925)) [@vyasr](https://github.com/vyasr) +- JNI: Push back decimal utils from spark-rapids ([#9907](https://github.com/rapidsai/cudf/pull/9907)) [@sperlingxx](https://github.com/sperlingxx) +- Add `assert_column_memory_*` ([#9882](https://github.com/rapidsai/cudf/pull/9882)) [@isVoid](https://github.com/isVoid) +- Add CUDF_UNREACHABLE macro. ([#9727](https://github.com/rapidsai/cudf/pull/9727)) [@bdice](https://github.com/bdice) +- Upgrade `arrow` & `pyarrow` to `6.0.1` ([#9686](https://github.com/rapidsai/cudf/pull/9686)) [@galipremsagar](https://github.com/galipremsagar) + +# cuDF 22.02.00 (2 Feb 2022) + +## 🚨 Breaking Changes + +- ORC writer API changes for granular statistics ([#10058](https://github.com/rapidsai/cudf/pull/10058)) [@mythrocks](https://github.com/mythrocks) +- `decimal128` Support for `to/from_arrow` ([#9986](https://github.com/rapidsai/cudf/pull/9986)) [@codereport](https://github.com/codereport) +- Remove deprecated method `one_hot_encoding` ([#9977](https://github.com/rapidsai/cudf/pull/9977)) [@isVoid](https://github.com/isVoid) +- Remove str.subword_tokenize ([#9968](https://github.com/rapidsai/cudf/pull/9968)) [@VibhuJawa](https://github.com/VibhuJawa) +- Remove deprecated `method` parameter from `merge` and `join`. ([#9944](https://github.com/rapidsai/cudf/pull/9944)) [@bdice](https://github.com/bdice) +- Remove deprecated method DataFrame.hash_columns. ([#9943](https://github.com/rapidsai/cudf/pull/9943)) [@bdice](https://github.com/bdice) +- Remove deprecated method Series.hash_encode. ([#9942](https://github.com/rapidsai/cudf/pull/9942)) [@bdice](https://github.com/bdice) +- Refactoring ceil/round/floor code for datetime64 types ([#9926](https://github.com/rapidsai/cudf/pull/9926)) [@mayankanand007](https://github.com/mayankanand007) +- Introduce `nan_as_null` parameter for `cudf.Index` ([#9893](https://github.com/rapidsai/cudf/pull/9893)) [@galipremsagar](https://github.com/galipremsagar) +- Add regex_flags parameter to strings replace_re functions ([#9878](https://github.com/rapidsai/cudf/pull/9878)) [@davidwendt](https://github.com/davidwendt) +- Break tie for `top` categorical columns in `Series.describe` ([#9867](https://github.com/rapidsai/cudf/pull/9867)) [@isVoid](https://github.com/isVoid) +- Add partitioning support in parquet writer ([#9810](https://github.com/rapidsai/cudf/pull/9810)) [@devavret](https://github.com/devavret) +- Move `drop_duplicates`, `drop_na`, `_gather`, `take` to IndexFrame and create their `_base_index` counterparts ([#9807](https://github.com/rapidsai/cudf/pull/9807)) [@isVoid](https://github.com/isVoid) +- Raise temporary error for `decimal128` types in parquet reader ([#9804](https://github.com/rapidsai/cudf/pull/9804)) [@galipremsagar](https://github.com/galipremsagar) +- Change default `dtype` of all nulls column from `float` to `object` ([#9803](https://github.com/rapidsai/cudf/pull/9803)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unused masked udf cython/c++ code ([#9792](https://github.com/rapidsai/cudf/pull/9792)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Pick smallest decimal type with required precision in ORC reader ([#9775](https://github.com/rapidsai/cudf/pull/9775)) [@vuule](https://github.com/vuule) +- Add decimal128 support to Parquet reader and writer ([#9765](https://github.com/rapidsai/cudf/pull/9765)) [@vuule](https://github.com/vuule) +- Refactor TableTest assertion methods to a separate utility class ([#9762](https://github.com/rapidsai/cudf/pull/9762)) [@jlowe](https://github.com/jlowe) +- Use cuFile direct device reads/writes by default in cuIO ([#9722](https://github.com/rapidsai/cudf/pull/9722)) [@vuule](https://github.com/vuule) +- Match pandas scalar result types in reductions ([#9717](https://github.com/rapidsai/cudf/pull/9717)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add parameters to control row group size in Parquet writer ([#9677](https://github.com/rapidsai/cudf/pull/9677)) [@vuule](https://github.com/vuule) +- Refactor bit counting APIs, introduce valid/null count functions, and split host/device side code for segmented counts. ([#9588](https://github.com/rapidsai/cudf/pull/9588)) [@bdice](https://github.com/bdice) +- Add support for `decimal128` in cudf python ([#9533](https://github.com/rapidsai/cudf/pull/9533)) [@galipremsagar](https://github.com/galipremsagar) +- Implement `lists::index_of()` to find positions in list rows ([#9510](https://github.com/rapidsai/cudf/pull/9510)) [@mythrocks](https://github.com/mythrocks) +- Rewriting row/column conversions for Spark <-> cudf data conversions ([#8444](https://github.com/rapidsai/cudf/pull/8444)) [@hyperbolic2346](https://github.com/hyperbolic2346) + +## 🐛 Bug Fixes + +- Add check for negative stripe index in ORC reader ([#10074](https://github.com/rapidsai/cudf/pull/10074)) [@vuule](https://github.com/vuule) +- Update Java tests to expect DECIMAL128 from Arrow ([#10073](https://github.com/rapidsai/cudf/pull/10073)) [@jlowe](https://github.com/jlowe) +- Avoid index materialization when `DataFrame` is created with un-named `Series` objects ([#10071](https://github.com/rapidsai/cudf/pull/10071)) [@galipremsagar](https://github.com/galipremsagar) +- fix gcc 11 compilation errors ([#10067](https://github.com/rapidsai/cudf/pull/10067)) [@rongou](https://github.com/rongou) +- Fix `columns` ordering issue in parquet reader ([#10066](https://github.com/rapidsai/cudf/pull/10066)) [@galipremsagar](https://github.com/galipremsagar) +- Fix dataframe setitem with `ndarray` types ([#10056](https://github.com/rapidsai/cudf/pull/10056)) [@galipremsagar](https://github.com/galipremsagar) +- Remove implicit copy due to conversion from cudf::size_type and size_t ([#10045](https://github.com/rapidsai/cudf/pull/10045)) [@robertmaynard](https://github.com/robertmaynard) +- Include <optional> in headers that use std::optional ([#10044](https://github.com/rapidsai/cudf/pull/10044)) [@robertmaynard](https://github.com/robertmaynard) +- Fix repr and concat of `StructColumn` ([#10042](https://github.com/rapidsai/cudf/pull/10042)) [@galipremsagar](https://github.com/galipremsagar) +- Include row group level stats when writing ORC files ([#10041](https://github.com/rapidsai/cudf/pull/10041)) [@vuule](https://github.com/vuule) +- build.sh respects the `--build_metrics` and `--incl_cache_stats` flags ([#10035](https://github.com/rapidsai/cudf/pull/10035)) [@robertmaynard](https://github.com/robertmaynard) +- Fix memory leaks in JNI native code. ([#10029](https://github.com/rapidsai/cudf/pull/10029)) [@mythrocks](https://github.com/mythrocks) +- Update JNI to use new arena mr constructor ([#10027](https://github.com/rapidsai/cudf/pull/10027)) [@rongou](https://github.com/rongou) +- Fix null check when comparing structs in `arg_min` operation of reduction/groupby ([#10026](https://github.com/rapidsai/cudf/pull/10026)) [@ttnghia](https://github.com/ttnghia) +- Wrap CI script shell variables in quotes to fix local testing. ([#10018](https://github.com/rapidsai/cudf/pull/10018)) [@bdice](https://github.com/bdice) +- cudftestutil no longer propagates compiler flags to external users ([#10017](https://github.com/rapidsai/cudf/pull/10017)) [@robertmaynard](https://github.com/robertmaynard) +- Remove `CUDA_DEVICE_CALLABLE` macro usage ([#10015](https://github.com/rapidsai/cudf/pull/10015)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add missing list filling header in meta.yaml ([#10007](https://github.com/rapidsai/cudf/pull/10007)) [@devavret](https://github.com/devavret) +- Fix `conda` recipes for `custreamz` & `cudf_kafka` ([#10003](https://github.com/rapidsai/cudf/pull/10003)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix matching regex word-boundary () in strings replace ([#9997](https://github.com/rapidsai/cudf/pull/9997)) [@davidwendt](https://github.com/davidwendt) +- Fix null check when comparing structs in `min` and `max` reduction/groupby operations ([#9994](https://github.com/rapidsai/cudf/pull/9994)) [@ttnghia](https://github.com/ttnghia) +- Fix octal pattern matching in regex string ([#9993](https://github.com/rapidsai/cudf/pull/9993)) [@davidwendt](https://github.com/davidwendt) +- `decimal128` Support for `to/from_arrow` ([#9986](https://github.com/rapidsai/cudf/pull/9986)) [@codereport](https://github.com/codereport) +- Fix groupby shift/diff/fill after selecting from a `GroupBy` ([#9984](https://github.com/rapidsai/cudf/pull/9984)) [@shwina](https://github.com/shwina) +- Fix the overflow problem of decimal rescale ([#9966](https://github.com/rapidsai/cudf/pull/9966)) [@sperlingxx](https://github.com/sperlingxx) +- Use default value for decimal precision in parquet writer when not specified ([#9963](https://github.com/rapidsai/cudf/pull/9963)) [@devavret](https://github.com/devavret) +- Fix cudf java build error. ([#9958](https://github.com/rapidsai/cudf/pull/9958)) [@firestarman](https://github.com/firestarman) +- Use gpuci_mamba_retry to install local artifacts. ([#9951](https://github.com/rapidsai/cudf/pull/9951)) [@bdice](https://github.com/bdice) +- Fix regression HostColumnVectorCore requiring native libs ([#9948](https://github.com/rapidsai/cudf/pull/9948)) [@jlowe](https://github.com/jlowe) +- Rename aggregate_metadata in writer to fix name collision ([#9938](https://github.com/rapidsai/cudf/pull/9938)) [@devavret](https://github.com/devavret) +- Fixed issue with percentile_approx where output tdigests could have uninitialized data at the end. ([#9931](https://github.com/rapidsai/cudf/pull/9931)) [@nvdbaranec](https://github.com/nvdbaranec) +- Resolve racecheck errors in ORC kernels ([#9916](https://github.com/rapidsai/cudf/pull/9916)) [@vuule](https://github.com/vuule) +- Fix the java build after parquet partitioning support ([#9908](https://github.com/rapidsai/cudf/pull/9908)) [@revans2](https://github.com/revans2) +- Fix compilation of benchmark for parquet writer. ([#9905](https://github.com/rapidsai/cudf/pull/9905)) [@bdice](https://github.com/bdice) +- Fix a memcheck error in ORC writer ([#9896](https://github.com/rapidsai/cudf/pull/9896)) [@vuule](https://github.com/vuule) +- Introduce `nan_as_null` parameter for `cudf.Index` ([#9893](https://github.com/rapidsai/cudf/pull/9893)) [@galipremsagar](https://github.com/galipremsagar) +- Fix fallback to sort aggregation for grouping only hash aggregate ([#9891](https://github.com/rapidsai/cudf/pull/9891)) [@abellina](https://github.com/abellina) +- Add zlib to cudfjni link when using static libcudf library dependency ([#9890](https://github.com/rapidsai/cudf/pull/9890)) [@jlowe](https://github.com/jlowe) +- TimedeltaIndex constructor raises an AttributeError. ([#9884](https://github.com/rapidsai/cudf/pull/9884)) [@skirui-source](https://github.com/skirui-source) +- Fix cudf.Scalar string datetime construction ([#9875](https://github.com/rapidsai/cudf/pull/9875)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Load libcufile.so with RTLD_NODELETE flag ([#9872](https://github.com/rapidsai/cudf/pull/9872)) [@vuule](https://github.com/vuule) +- Break tie for `top` categorical columns in `Series.describe` ([#9867](https://github.com/rapidsai/cudf/pull/9867)) [@isVoid](https://github.com/isVoid) +- Fix null handling for structs `min` and `arg_min` in groupby, groupby scan, reduction, and inclusive_scan ([#9864](https://github.com/rapidsai/cudf/pull/9864)) [@ttnghia](https://github.com/ttnghia) +- Add one-level list encoding support in parquet reader ([#9848](https://github.com/rapidsai/cudf/pull/9848)) [@PointKernel](https://github.com/PointKernel) +- Fix an out-of-bounds read in validity copying in contiguous_split. ([#9842](https://github.com/rapidsai/cudf/pull/9842)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix join of MultiIndex to Index with one column and overlapping name. ([#9830](https://github.com/rapidsai/cudf/pull/9830)) [@vyasr](https://github.com/vyasr) +- Fix caching in `Series.applymap` ([#9821](https://github.com/rapidsai/cudf/pull/9821)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enforce boolean `ascending` for dask-cudf `sort_values` ([#9814](https://github.com/rapidsai/cudf/pull/9814)) [@charlesbluca](https://github.com/charlesbluca) +- Fix ORC writer crash with empty input columns ([#9808](https://github.com/rapidsai/cudf/pull/9808)) [@vuule](https://github.com/vuule) +- Change default `dtype` of all nulls column from `float` to `object` ([#9803](https://github.com/rapidsai/cudf/pull/9803)) [@galipremsagar](https://github.com/galipremsagar) +- Load native dependencies when Java ColumnView is loaded ([#9800](https://github.com/rapidsai/cudf/pull/9800)) [@jlowe](https://github.com/jlowe) +- Fix dtype-argument bug in dask_cudf read_csv ([#9796](https://github.com/rapidsai/cudf/pull/9796)) [@rjzamora](https://github.com/rjzamora) +- Fix overflow for min calculation in strings::from_timestamps ([#9793](https://github.com/rapidsai/cudf/pull/9793)) [@revans2](https://github.com/revans2) +- Fix memory error due to lambda return type deduction limitation ([#9778](https://github.com/rapidsai/cudf/pull/9778)) [@karthikeyann](https://github.com/karthikeyann) +- Revert regex $/EOL end-of-string new-line special case handling ([#9774](https://github.com/rapidsai/cudf/pull/9774)) [@davidwendt](https://github.com/davidwendt) +- Fix missing streams ([#9767](https://github.com/rapidsai/cudf/pull/9767)) [@karthikeyann](https://github.com/karthikeyann) +- Fix make_empty_scalar_like on list_type ([#9759](https://github.com/rapidsai/cudf/pull/9759)) [@sperlingxx](https://github.com/sperlingxx) +- Update cmake and conda to 22.02 ([#9746](https://github.com/rapidsai/cudf/pull/9746)) [@devavret](https://github.com/devavret) +- Fix out-of-bounds memory write in decimal128-to-string conversion ([#9740](https://github.com/rapidsai/cudf/pull/9740)) [@davidwendt](https://github.com/davidwendt) +- Match pandas scalar result types in reductions ([#9717](https://github.com/rapidsai/cudf/pull/9717)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix regex non-multiline EOL/$ matching strings ending with a new-line ([#9715](https://github.com/rapidsai/cudf/pull/9715)) [@davidwendt](https://github.com/davidwendt) +- Fixed build by adding more checks for int8, int16 ([#9707](https://github.com/rapidsai/cudf/pull/9707)) [@razajafri](https://github.com/razajafri) +- Fix `null` handling when `boolean` dtype is passed ([#9691](https://github.com/rapidsai/cudf/pull/9691)) [@galipremsagar](https://github.com/galipremsagar) +- Fix stream usage in `segmented_gather()` ([#9679](https://github.com/rapidsai/cudf/pull/9679)) [@mythrocks](https://github.com/mythrocks) + +## 📖 Documentation + +- Update `decimal` dtypes related docs entries ([#10072](https://github.com/rapidsai/cudf/pull/10072)) [@galipremsagar](https://github.com/galipremsagar) +- Fix regex doc describing hexadecimal escape characters ([#10009](https://github.com/rapidsai/cudf/pull/10009)) [@davidwendt](https://github.com/davidwendt) +- Fix cudf compilation instructions. ([#9956](https://github.com/rapidsai/cudf/pull/9956)) [@esoha-nvidia](https://github.com/esoha-nvidia) +- Fix see also links for IO APIs ([#9895](https://github.com/rapidsai/cudf/pull/9895)) [@galipremsagar](https://github.com/galipremsagar) +- Fix build instructions for libcudf doxygen ([#9837](https://github.com/rapidsai/cudf/pull/9837)) [@davidwendt](https://github.com/davidwendt) +- Fix some doxygen warnings and add missing documentation ([#9770](https://github.com/rapidsai/cudf/pull/9770)) [@karthikeyann](https://github.com/karthikeyann) +- update cuda version in local build ([#9736](https://github.com/rapidsai/cudf/pull/9736)) [@karthikeyann](https://github.com/karthikeyann) +- Fix doxygen for enum types in libcudf ([#9724](https://github.com/rapidsai/cudf/pull/9724)) [@davidwendt](https://github.com/davidwendt) +- Spell check fixes ([#9682](https://github.com/rapidsai/cudf/pull/9682)) [@karthikeyann](https://github.com/karthikeyann) +- Fix links in C++ Developer Guide. ([#9675](https://github.com/rapidsai/cudf/pull/9675)) [@bdice](https://github.com/bdice) + +## 🚀 New Features + +- Remove libcudacxx patch needed for nvcc 11.4 ([#10057](https://github.com/rapidsai/cudf/pull/10057)) [@robertmaynard](https://github.com/robertmaynard) +- Allow CuPy 10 ([#10048](https://github.com/rapidsai/cudf/pull/10048)) [@jakirkham](https://github.com/jakirkham) +- Add in support for NULL_LOGICAL_AND and NULL_LOGICAL_OR binops ([#10016](https://github.com/rapidsai/cudf/pull/10016)) [@revans2](https://github.com/revans2) +- Add `groupby.transform` (only support for aggregations) ([#10005](https://github.com/rapidsai/cudf/pull/10005)) [@shwina](https://github.com/shwina) +- Add partitioning support to Parquet chunked writer ([#10000](https://github.com/rapidsai/cudf/pull/10000)) [@devavret](https://github.com/devavret) +- Add jni for sequences ([#9972](https://github.com/rapidsai/cudf/pull/9972)) [@wbo4958](https://github.com/wbo4958) +- Java bindings for mixed left, inner, and full joins ([#9941](https://github.com/rapidsai/cudf/pull/9941)) [@jlowe](https://github.com/jlowe) +- Java bindings for JSON reader support ([#9940](https://github.com/rapidsai/cudf/pull/9940)) [@wbo4958](https://github.com/wbo4958) +- Enable transpose for string columns in cudf python ([#9937](https://github.com/rapidsai/cudf/pull/9937)) [@galipremsagar](https://github.com/galipremsagar) +- Support structs for `cudf::contains` with column/scalar input ([#9929](https://github.com/rapidsai/cudf/pull/9929)) [@ttnghia](https://github.com/ttnghia) +- Implement mixed equality/conditional joins ([#9917](https://github.com/rapidsai/cudf/pull/9917)) [@vyasr](https://github.com/vyasr) +- Add cudf::strings::extract_all API ([#9909](https://github.com/rapidsai/cudf/pull/9909)) [@davidwendt](https://github.com/davidwendt) +- Implement JNI for `cudf::scatter` APIs ([#9903](https://github.com/rapidsai/cudf/pull/9903)) [@ttnghia](https://github.com/ttnghia) +- JNI: Function to copy and set validity from bool column. ([#9901](https://github.com/rapidsai/cudf/pull/9901)) [@mythrocks](https://github.com/mythrocks) +- Add dictionary support to cudf::copy_if_else ([#9887](https://github.com/rapidsai/cudf/pull/9887)) [@davidwendt](https://github.com/davidwendt) +- add run_benchmarks target for running benchmarks with json output ([#9879](https://github.com/rapidsai/cudf/pull/9879)) [@karthikeyann](https://github.com/karthikeyann) +- Add regex_flags parameter to strings replace_re functions ([#9878](https://github.com/rapidsai/cudf/pull/9878)) [@davidwendt](https://github.com/davidwendt) +- Add_suffix and add_prefix for DataFrames and Series ([#9846](https://github.com/rapidsai/cudf/pull/9846)) [@mayankanand007](https://github.com/mayankanand007) +- Add JNI for `cudf::drop_duplicates` ([#9841](https://github.com/rapidsai/cudf/pull/9841)) [@ttnghia](https://github.com/ttnghia) +- Implement per-list sequence ([#9839](https://github.com/rapidsai/cudf/pull/9839)) [@ttnghia](https://github.com/ttnghia) +- adding `series.transpose` ([#9835](https://github.com/rapidsai/cudf/pull/9835)) [@mayankanand007](https://github.com/mayankanand007) +- Adding support for `Series.autocorr` ([#9833](https://github.com/rapidsai/cudf/pull/9833)) [@mayankanand007](https://github.com/mayankanand007) +- Support round operation on datetime64 datatypes ([#9820](https://github.com/rapidsai/cudf/pull/9820)) [@mayankanand007](https://github.com/mayankanand007) +- Add partitioning support in parquet writer ([#9810](https://github.com/rapidsai/cudf/pull/9810)) [@devavret](https://github.com/devavret) +- Raise temporary error for `decimal128` types in parquet reader ([#9804](https://github.com/rapidsai/cudf/pull/9804)) [@galipremsagar](https://github.com/galipremsagar) +- Add decimal128 support to Parquet reader and writer ([#9765](https://github.com/rapidsai/cudf/pull/9765)) [@vuule](https://github.com/vuule) +- Optimize `groupby::scan` ([#9754](https://github.com/rapidsai/cudf/pull/9754)) [@PointKernel](https://github.com/PointKernel) +- Add sample JNI API ([#9728](https://github.com/rapidsai/cudf/pull/9728)) [@res-life](https://github.com/res-life) +- Support `min` and `max` in inclusive scan for structs ([#9725](https://github.com/rapidsai/cudf/pull/9725)) [@ttnghia](https://github.com/ttnghia) +- Add `first` and `last` method to `IndexedFrame` ([#9710](https://github.com/rapidsai/cudf/pull/9710)) [@isVoid](https://github.com/isVoid) +- Support `min` and `max` reduction for structs ([#9697](https://github.com/rapidsai/cudf/pull/9697)) [@ttnghia](https://github.com/ttnghia) +- Add parameters to control row group size in Parquet writer ([#9677](https://github.com/rapidsai/cudf/pull/9677)) [@vuule](https://github.com/vuule) +- Run compute-sanitizer in nightly build ([#9641](https://github.com/rapidsai/cudf/pull/9641)) [@karthikeyann](https://github.com/karthikeyann) +- Implement Series.datetime.floor ([#9571](https://github.com/rapidsai/cudf/pull/9571)) [@skirui-source](https://github.com/skirui-source) +- ceil/floor for `DatetimeIndex` ([#9554](https://github.com/rapidsai/cudf/pull/9554)) [@mayankanand007](https://github.com/mayankanand007) +- Add support for `decimal128` in cudf python ([#9533](https://github.com/rapidsai/cudf/pull/9533)) [@galipremsagar](https://github.com/galipremsagar) +- Implement `lists::index_of()` to find positions in list rows ([#9510](https://github.com/rapidsai/cudf/pull/9510)) [@mythrocks](https://github.com/mythrocks) +- custreamz oauth callback for kafka (librdkafka) ([#9486](https://github.com/rapidsai/cudf/pull/9486)) [@jdye64](https://github.com/jdye64) +- Add Pearson correlation for sort groupby (python) ([#9166](https://github.com/rapidsai/cudf/pull/9166)) [@skirui-source](https://github.com/skirui-source) +- Interchange dataframe protocol ([#9071](https://github.com/rapidsai/cudf/pull/9071)) [@iskode](https://github.com/iskode) +- Rewriting row/column conversions for Spark <-> cudf data conversions ([#8444](https://github.com/rapidsai/cudf/pull/8444)) [@hyperbolic2346](https://github.com/hyperbolic2346) + +## 🛠️ Improvements + +- Prepare upload scripts for Python 3.7 removal ([#10092](https://github.com/rapidsai/cudf/pull/10092)) [@Ethyling](https://github.com/Ethyling) +- Simplify custreamz and cudf_kafka recipes files ([#10065](https://github.com/rapidsai/cudf/pull/10065)) [@Ethyling](https://github.com/Ethyling) +- ORC writer API changes for granular statistics ([#10058](https://github.com/rapidsai/cudf/pull/10058)) [@mythrocks](https://github.com/mythrocks) +- Remove python constraints in cutreamz and cudf_kafka recipes ([#10052](https://github.com/rapidsai/cudf/pull/10052)) [@Ethyling](https://github.com/Ethyling) +- Unpin `dask` and `distributed` in CI ([#10028](https://github.com/rapidsai/cudf/pull/10028)) [@galipremsagar](https://github.com/galipremsagar) +- Add `_from_column_like_self` factory ([#10022](https://github.com/rapidsai/cudf/pull/10022)) [@isVoid](https://github.com/isVoid) +- Replace custom CUDA bindings previously provided by RMM with official CUDA Python bindings ([#10008](https://github.com/rapidsai/cudf/pull/10008)) [@shwina](https://github.com/shwina) +- Use `cuda::std::is_arithmetic` in `cudf::is_numeric` trait. ([#9996](https://github.com/rapidsai/cudf/pull/9996)) [@bdice](https://github.com/bdice) +- Clean up CUDA stream use in cuIO ([#9991](https://github.com/rapidsai/cudf/pull/9991)) [@vuule](https://github.com/vuule) +- Use addressed-ordered first fit for the pinned memory pool ([#9989](https://github.com/rapidsai/cudf/pull/9989)) [@rongou](https://github.com/rongou) +- Add strings tests to transpose_test.cpp ([#9985](https://github.com/rapidsai/cudf/pull/9985)) [@davidwendt](https://github.com/davidwendt) +- Use gpuci_mamba_retry on Java CI. ([#9983](https://github.com/rapidsai/cudf/pull/9983)) [@bdice](https://github.com/bdice) +- Remove deprecated method `one_hot_encoding` ([#9977](https://github.com/rapidsai/cudf/pull/9977)) [@isVoid](https://github.com/isVoid) +- Minor cleanup of unused Python functions ([#9974](https://github.com/rapidsai/cudf/pull/9974)) [@vyasr](https://github.com/vyasr) +- Use new efficient partitioned parquet writing in cuDF ([#9971](https://github.com/rapidsai/cudf/pull/9971)) [@devavret](https://github.com/devavret) +- Remove str.subword_tokenize ([#9968](https://github.com/rapidsai/cudf/pull/9968)) [@VibhuJawa](https://github.com/VibhuJawa) +- Forward-merge branch-21.12 to branch-22.02 ([#9947](https://github.com/rapidsai/cudf/pull/9947)) [@bdice](https://github.com/bdice) +- Remove deprecated `method` parameter from `merge` and `join`. ([#9944](https://github.com/rapidsai/cudf/pull/9944)) [@bdice](https://github.com/bdice) +- Remove deprecated method DataFrame.hash_columns. ([#9943](https://github.com/rapidsai/cudf/pull/9943)) [@bdice](https://github.com/bdice) +- Remove deprecated method Series.hash_encode. ([#9942](https://github.com/rapidsai/cudf/pull/9942)) [@bdice](https://github.com/bdice) +- use ninja in java ci build ([#9933](https://github.com/rapidsai/cudf/pull/9933)) [@rongou](https://github.com/rongou) +- Add build-time publish step to cpu build script ([#9927](https://github.com/rapidsai/cudf/pull/9927)) [@davidwendt](https://github.com/davidwendt) +- Refactoring ceil/round/floor code for datetime64 types ([#9926](https://github.com/rapidsai/cudf/pull/9926)) [@mayankanand007](https://github.com/mayankanand007) +- Remove various unused functions ([#9922](https://github.com/rapidsai/cudf/pull/9922)) [@vyasr](https://github.com/vyasr) +- Raise in `query` if dtype is not supported ([#9921](https://github.com/rapidsai/cudf/pull/9921)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add missing imports tests ([#9920](https://github.com/rapidsai/cudf/pull/9920)) [@Ethyling](https://github.com/Ethyling) +- Spark Decimal128 hashing ([#9919](https://github.com/rapidsai/cudf/pull/9919)) [@rwlee](https://github.com/rwlee) +- Replace `thrust/std::get` with structured bindings ([#9915](https://github.com/rapidsai/cudf/pull/9915)) [@codereport](https://github.com/codereport) +- Upgrade thrust version to 1.15 ([#9912](https://github.com/rapidsai/cudf/pull/9912)) [@robertmaynard](https://github.com/robertmaynard) +- Remove conda envs for CUDA 11.0 and 11.2. ([#9910](https://github.com/rapidsai/cudf/pull/9910)) [@bdice](https://github.com/bdice) +- Return count of set bits from inplace_bitmask_and. ([#9904](https://github.com/rapidsai/cudf/pull/9904)) [@bdice](https://github.com/bdice) +- Use dynamic nullate for join hasher and equality comparator ([#9902](https://github.com/rapidsai/cudf/pull/9902)) [@davidwendt](https://github.com/davidwendt) +- Update ucx-py version on release using rvc ([#9897](https://github.com/rapidsai/cudf/pull/9897)) [@Ethyling](https://github.com/Ethyling) +- Remove `IncludeCategories` from `.clang-format` ([#9876](https://github.com/rapidsai/cudf/pull/9876)) [@codereport](https://github.com/codereport) +- Support statically linking CUDA runtime for Java bindings ([#9873](https://github.com/rapidsai/cudf/pull/9873)) [@jlowe](https://github.com/jlowe) +- Add `clang-tidy` to libcudf ([#9860](https://github.com/rapidsai/cudf/pull/9860)) [@codereport](https://github.com/codereport) +- Remove deprecated methods from Java Table class ([#9853](https://github.com/rapidsai/cudf/pull/9853)) [@jlowe](https://github.com/jlowe) +- Add test for map column metadata handling in ORC writer ([#9852](https://github.com/rapidsai/cudf/pull/9852)) [@vuule](https://github.com/vuule) +- Use pandas `to_offset` to parse frequency string in `date_range` ([#9843](https://github.com/rapidsai/cudf/pull/9843)) [@isVoid](https://github.com/isVoid) +- add templated benchmark with fixture ([#9838](https://github.com/rapidsai/cudf/pull/9838)) [@karthikeyann](https://github.com/karthikeyann) +- Use list of column inputs for `apply_boolean_mask` ([#9832](https://github.com/rapidsai/cudf/pull/9832)) [@isVoid](https://github.com/isVoid) +- Added a few more tests for Decimal to String cast ([#9818](https://github.com/rapidsai/cudf/pull/9818)) [@razajafri](https://github.com/razajafri) +- Run doctests. ([#9815](https://github.com/rapidsai/cudf/pull/9815)) [@bdice](https://github.com/bdice) +- Avoid overflow for fixed_point round ([#9809](https://github.com/rapidsai/cudf/pull/9809)) [@sperlingxx](https://github.com/sperlingxx) +- Move `drop_duplicates`, `drop_na`, `_gather`, `take` to IndexFrame and create their `_base_index` counterparts ([#9807](https://github.com/rapidsai/cudf/pull/9807)) [@isVoid](https://github.com/isVoid) +- Use vector factories for host-device copies. ([#9806](https://github.com/rapidsai/cudf/pull/9806)) [@bdice](https://github.com/bdice) +- Refactor host device macros ([#9797](https://github.com/rapidsai/cudf/pull/9797)) [@vyasr](https://github.com/vyasr) +- Remove unused masked udf cython/c++ code ([#9792](https://github.com/rapidsai/cudf/pull/9792)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Allow custom sort functions for dask-cudf `sort_values` ([#9789](https://github.com/rapidsai/cudf/pull/9789)) [@charlesbluca](https://github.com/charlesbluca) +- Improve build time of libcudf iterator tests ([#9788](https://github.com/rapidsai/cudf/pull/9788)) [@davidwendt](https://github.com/davidwendt) +- Copy Java native dependencies directly into classpath ([#9787](https://github.com/rapidsai/cudf/pull/9787)) [@jlowe](https://github.com/jlowe) +- Add decimal types to cuIO benchmarks ([#9776](https://github.com/rapidsai/cudf/pull/9776)) [@vuule](https://github.com/vuule) +- Pick smallest decimal type with required precision in ORC reader ([#9775](https://github.com/rapidsai/cudf/pull/9775)) [@vuule](https://github.com/vuule) +- Avoid overflow for `fixed_point` `cudf::cast` and performance optimization ([#9772](https://github.com/rapidsai/cudf/pull/9772)) [@codereport](https://github.com/codereport) +- Use CTAD with Thrust function objects ([#9768](https://github.com/rapidsai/cudf/pull/9768)) [@codereport](https://github.com/codereport) +- Refactor TableTest assertion methods to a separate utility class ([#9762](https://github.com/rapidsai/cudf/pull/9762)) [@jlowe](https://github.com/jlowe) +- Use Java classloader to find test resources ([#9760](https://github.com/rapidsai/cudf/pull/9760)) [@jlowe](https://github.com/jlowe) +- Allow cast decimal128 to string and add tests ([#9756](https://github.com/rapidsai/cudf/pull/9756)) [@razajafri](https://github.com/razajafri) +- Load balance optimization for contiguous_split ([#9755](https://github.com/rapidsai/cudf/pull/9755)) [@nvdbaranec](https://github.com/nvdbaranec) +- Consolidate and improve `reset_index` ([#9750](https://github.com/rapidsai/cudf/pull/9750)) [@isVoid](https://github.com/isVoid) +- Update to UCX-Py 0.24 ([#9748](https://github.com/rapidsai/cudf/pull/9748)) [@pentschev](https://github.com/pentschev) +- Skip cufile tests in JNI build script ([#9744](https://github.com/rapidsai/cudf/pull/9744)) [@pxLi](https://github.com/pxLi) +- Enable string to decimal 128 cast ([#9742](https://github.com/rapidsai/cudf/pull/9742)) [@razajafri](https://github.com/razajafri) +- Use stop instead of stop_. ([#9735](https://github.com/rapidsai/cudf/pull/9735)) [@bdice](https://github.com/bdice) +- Forward-merge branch-21.12 to branch-22.02 ([#9730](https://github.com/rapidsai/cudf/pull/9730)) [@bdice](https://github.com/bdice) +- Improve cmake format script ([#9723](https://github.com/rapidsai/cudf/pull/9723)) [@vyasr](https://github.com/vyasr) +- Use cuFile direct device reads/writes by default in cuIO ([#9722](https://github.com/rapidsai/cudf/pull/9722)) [@vuule](https://github.com/vuule) +- Add directory-partitioned data support to cudf.read_parquet ([#9720](https://github.com/rapidsai/cudf/pull/9720)) [@rjzamora](https://github.com/rjzamora) +- Use stream allocator adaptor for hash join table ([#9704](https://github.com/rapidsai/cudf/pull/9704)) [@PointKernel](https://github.com/PointKernel) +- Update check for inf/nan strings in libcudf float conversion to ignore case ([#9694](https://github.com/rapidsai/cudf/pull/9694)) [@davidwendt](https://github.com/davidwendt) +- Update cudf JNI to 22.02.0-SNAPSHOT ([#9681](https://github.com/rapidsai/cudf/pull/9681)) [@pxLi](https://github.com/pxLi) +- Replace cudf's concurrent_ordered_map with cuco::static_map in semi/anti joins ([#9666](https://github.com/rapidsai/cudf/pull/9666)) [@vyasr](https://github.com/vyasr) +- Some improvements to `parse_decimal` function and bindings for `is_fixed_point` ([#9658](https://github.com/rapidsai/cudf/pull/9658)) [@razajafri](https://github.com/razajafri) +- Add utility to format ninja-log build times ([#9631](https://github.com/rapidsai/cudf/pull/9631)) [@davidwendt](https://github.com/davidwendt) +- Allow runtime has_nulls parameter for row operators ([#9623](https://github.com/rapidsai/cudf/pull/9623)) [@davidwendt](https://github.com/davidwendt) +- Use fsspec.parquet for improved read_parquet performance from remote storage ([#9589](https://github.com/rapidsai/cudf/pull/9589)) [@rjzamora](https://github.com/rjzamora) +- Refactor bit counting APIs, introduce valid/null count functions, and split host/device side code for segmented counts. ([#9588](https://github.com/rapidsai/cudf/pull/9588)) [@bdice](https://github.com/bdice) +- Use List of Columns as Input for `drop_nulls`, `gather` and `drop_duplicates` ([#9558](https://github.com/rapidsai/cudf/pull/9558)) [@isVoid](https://github.com/isVoid) +- Simplify merge internals and reduce overhead ([#9516](https://github.com/rapidsai/cudf/pull/9516)) [@vyasr](https://github.com/vyasr) +- Add `struct` generation support in datagenerator & fuzz tests ([#9180](https://github.com/rapidsai/cudf/pull/9180)) [@galipremsagar](https://github.com/galipremsagar) +- Simplify write_csv by removing unnecessary writer/impl classes ([#9089](https://github.com/rapidsai/cudf/pull/9089)) [@cwharris](https://github.com/cwharris) + +# cuDF 21.12.00 (9 Dec 2021) + +## 🚨 Breaking Changes + +- Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits ([#9616](https://github.com/rapidsai/cudf/pull/9616)) [@PointKernel](https://github.com/PointKernel) +- Remove sizeof and standardize on memory_usage ([#9544](https://github.com/rapidsai/cudf/pull/9544)) [@vyasr](https://github.com/vyasr) +- Add support for single-line regex anchors ^/$ in contains_re ([#9482](https://github.com/rapidsai/cudf/pull/9482)) [@davidwendt](https://github.com/davidwendt) +- Refactor sorting APIs ([#9464](https://github.com/rapidsai/cudf/pull/9464)) [@vyasr](https://github.com/vyasr) +- Update Java nvcomp JNI bindings to nvcomp 2.x API ([#9384](https://github.com/rapidsai/cudf/pull/9384)) [@jbrennan333](https://github.com/jbrennan333) +- Support Python UDFs written in terms of rows ([#9343](https://github.com/rapidsai/cudf/pull/9343)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- JNI: Support nested types in ORC writer ([#9334](https://github.com/rapidsai/cudf/pull/9334)) [@firestarman](https://github.com/firestarman) +- Optionally nullify out-of-bounds indices in segmented_gather(). ([#9318](https://github.com/rapidsai/cudf/pull/9318)) [@mythrocks](https://github.com/mythrocks) +- Refactor cuIO timestamp processing with `cuda::std::chrono` ([#9278](https://github.com/rapidsai/cudf/pull/9278)) [@PointKernel](https://github.com/PointKernel) +- Various internal MultiIndex improvements ([#9243](https://github.com/rapidsai/cudf/pull/9243)) [@vyasr](https://github.com/vyasr) + +## 🐛 Bug Fixes + +- Fix read_parquet bug for bytes input ([#9669](https://github.com/rapidsai/cudf/pull/9669)) [@rjzamora](https://github.com/rjzamora) +- Use `_gather` internal for `sort_*` ([#9668](https://github.com/rapidsai/cudf/pull/9668)) [@isVoid](https://github.com/isVoid) +- Fix behavior of equals for non-DataFrame Frames and add tests. ([#9653](https://github.com/rapidsai/cudf/pull/9653)) [@vyasr](https://github.com/vyasr) +- Dont recompute output size if it is already available ([#9649](https://github.com/rapidsai/cudf/pull/9649)) [@abellina](https://github.com/abellina) +- Fix read_parquet bug for extended dtypes from remote storage ([#9638](https://github.com/rapidsai/cudf/pull/9638)) [@rjzamora](https://github.com/rjzamora) +- add const when getting data from a JNI data wrapper ([#9637](https://github.com/rapidsai/cudf/pull/9637)) [@wjxiz1992](https://github.com/wjxiz1992) +- Fix debrotli issue on CUDA 11.5 ([#9632](https://github.com/rapidsai/cudf/pull/9632)) [@vuule](https://github.com/vuule) +- Use std::size_t when computing join output size ([#9626](https://github.com/rapidsai/cudf/pull/9626)) [@jlowe](https://github.com/jlowe) +- Fix `usecols` parameter handling in `dask_cudf.read_csv` ([#9618](https://github.com/rapidsai/cudf/pull/9618)) [@galipremsagar](https://github.com/galipremsagar) +- Add support for string `'nan', 'inf' & '-inf'` values while type-casting to `float` ([#9613](https://github.com/rapidsai/cudf/pull/9613)) [@galipremsagar](https://github.com/galipremsagar) +- Avoid passing NativeFileDatasource to pyarrow in read_parquet ([#9608](https://github.com/rapidsai/cudf/pull/9608)) [@rjzamora](https://github.com/rjzamora) +- Fix test failure with cuda 11.5 in row_bit_count tests. ([#9581](https://github.com/rapidsai/cudf/pull/9581)) [@nvdbaranec](https://github.com/nvdbaranec) +- Correct _LIBCUDACXX_CUDACC_VER value computation ([#9579](https://github.com/rapidsai/cudf/pull/9579)) [@robertmaynard](https://github.com/robertmaynard) +- Increase max RLE stream size estimate to avoid potential overflows ([#9568](https://github.com/rapidsai/cudf/pull/9568)) [@vuule](https://github.com/vuule) +- Fix edge case in tdigest scalar generation for groups containing all nulls. ([#9551](https://github.com/rapidsai/cudf/pull/9551)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix pytests failing in `cuda-11.5` environment ([#9547](https://github.com/rapidsai/cudf/pull/9547)) [@galipremsagar](https://github.com/galipremsagar) +- compile libnvcomp with PTDS if requested ([#9540](https://github.com/rapidsai/cudf/pull/9540)) [@jbrennan333](https://github.com/jbrennan333) +- Fix `segmented_gather()` for null LIST rows ([#9537](https://github.com/rapidsai/cudf/pull/9537)) [@mythrocks](https://github.com/mythrocks) +- Deprecate DataFrame.label_encoding, use private _label_encoding method internally. ([#9535](https://github.com/rapidsai/cudf/pull/9535)) [@bdice](https://github.com/bdice) +- Fix several test and benchmark issues related to bitmask allocations. ([#9521](https://github.com/rapidsai/cudf/pull/9521)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix for inserting duplicates in groupby result cache ([#9508](https://github.com/rapidsai/cudf/pull/9508)) [@karthikeyann](https://github.com/karthikeyann) +- Fix mismatched types error in clip() when using non int64 numeric types ([#9498](https://github.com/rapidsai/cudf/pull/9498)) [@davidwendt](https://github.com/davidwendt) +- Match conda pinnings for style checks (revert part of #9412, #9433). ([#9490](https://github.com/rapidsai/cudf/pull/9490)) [@bdice](https://github.com/bdice) +- Make sure all dask-cudf supported aggs are handled in `_tree_node_agg` ([#9487](https://github.com/rapidsai/cudf/pull/9487)) [@charlesbluca](https://github.com/charlesbluca) +- Resolve `hash_columns` `FutureWarning` in `dask_cudf` ([#9481](https://github.com/rapidsai/cudf/pull/9481)) [@pentschev](https://github.com/pentschev) +- Add fixed point to AllTypes in libcudf unit tests ([#9472](https://github.com/rapidsai/cudf/pull/9472)) [@karthikeyann](https://github.com/karthikeyann) +- Fix regex handling of embedded null characters ([#9470](https://github.com/rapidsai/cudf/pull/9470)) [@davidwendt](https://github.com/davidwendt) +- Fix memcheck error in copy-if-else ([#9467](https://github.com/rapidsai/cudf/pull/9467)) [@davidwendt](https://github.com/davidwendt) +- Fix bug in dask_cudf.read_parquet for index=False ([#9453](https://github.com/rapidsai/cudf/pull/9453)) [@rjzamora](https://github.com/rjzamora) +- Preserve the decimal scale when creating a default scalar ([#9449](https://github.com/rapidsai/cudf/pull/9449)) [@revans2](https://github.com/revans2) +- Push down parent nulls when flattening nested columns. ([#9443](https://github.com/rapidsai/cudf/pull/9443)) [@mythrocks](https://github.com/mythrocks) +- Fix memcheck error in gtest SegmentedGatherTest/GatherSliced ([#9442](https://github.com/rapidsai/cudf/pull/9442)) [@davidwendt](https://github.com/davidwendt) +- Revert "Fix quantile division / partition handling for dask-cudf sort… ([#9438](https://github.com/rapidsai/cudf/pull/9438)) [@charlesbluca](https://github.com/charlesbluca) +- Allow int-like objects for the `decimals` argument in `round` ([#9428](https://github.com/rapidsai/cudf/pull/9428)) [@shwina](https://github.com/shwina) +- Fix stream compaction's `drop_duplicates` API to use stable sort ([#9417](https://github.com/rapidsai/cudf/pull/9417)) [@ttnghia](https://github.com/ttnghia) +- Skip Comparing Uniform Window Results in Var/std Tests ([#9416](https://github.com/rapidsai/cudf/pull/9416)) [@isVoid](https://github.com/isVoid) +- Fix `StructColumn.to_pandas` type handling issues ([#9388](https://github.com/rapidsai/cudf/pull/9388)) [@galipremsagar](https://github.com/galipremsagar) +- Correct issues in the build dir cudf-config.cmake ([#9386](https://github.com/rapidsai/cudf/pull/9386)) [@robertmaynard](https://github.com/robertmaynard) +- Fix Java table partition test to account for non-deterministic ordering ([#9385](https://github.com/rapidsai/cudf/pull/9385)) [@jlowe](https://github.com/jlowe) +- Fix timestamp truncation/overflow bugs in orc/parquet ([#9382](https://github.com/rapidsai/cudf/pull/9382)) [@PointKernel](https://github.com/PointKernel) +- Fix the crash in stats code ([#9368](https://github.com/rapidsai/cudf/pull/9368)) [@devavret](https://github.com/devavret) +- Make Series.hash_encode results reproducible. ([#9366](https://github.com/rapidsai/cudf/pull/9366)) [@bdice](https://github.com/bdice) +- Fix libcudf compile warnings on debug 11.4 build ([#9360](https://github.com/rapidsai/cudf/pull/9360)) [@davidwendt](https://github.com/davidwendt) +- Fail gracefully when compiling python UDFs that attempt to access columns with unsupported dtypes ([#9359](https://github.com/rapidsai/cudf/pull/9359)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Set pass_filenames: false in mypy pre-commit configuration. ([#9349](https://github.com/rapidsai/cudf/pull/9349)) [@bdice](https://github.com/bdice) +- Fix cudf_assert in cudf::io::orc::gpu::gpuDecodeOrcColumnData ([#9348](https://github.com/rapidsai/cudf/pull/9348)) [@davidwendt](https://github.com/davidwendt) +- Fix memcheck error in groupby-tdigest get_scalar_minmax ([#9339](https://github.com/rapidsai/cudf/pull/9339)) [@davidwendt](https://github.com/davidwendt) +- Optimizations for `cudf.concat` when `axis=1` ([#9333](https://github.com/rapidsai/cudf/pull/9333)) [@galipremsagar](https://github.com/galipremsagar) +- Use f-string in join helper warning message. ([#9325](https://github.com/rapidsai/cudf/pull/9325)) [@bdice](https://github.com/bdice) +- Avoid casting to list or struct dtypes in dask_cudf.read_parquet ([#9314](https://github.com/rapidsai/cudf/pull/9314)) [@rjzamora](https://github.com/rjzamora) +- Fix null count in statistics for parquet ([#9303](https://github.com/rapidsai/cudf/pull/9303)) [@devavret](https://github.com/devavret) +- Potential overflow of `decimal32` when casting to `int64_t` ([#9287](https://github.com/rapidsai/cudf/pull/9287)) [@codereport](https://github.com/codereport) +- Fix quantile division / partition handling for dask-cudf sort on null dataframes ([#9259](https://github.com/rapidsai/cudf/pull/9259)) [@charlesbluca](https://github.com/charlesbluca) +- Updating cudf version also updates rapids cmake branch ([#9249](https://github.com/rapidsai/cudf/pull/9249)) [@robertmaynard](https://github.com/robertmaynard) +- Implement `one_hot_encoding` in libcudf and bind to python ([#9229](https://github.com/rapidsai/cudf/pull/9229)) [@isVoid](https://github.com/isVoid) +- BUG FIX: CSV Writer ignores the header parameter when no metadata is provided ([#8740](https://github.com/rapidsai/cudf/pull/8740)) [@skirui-source](https://github.com/skirui-source) + +## 📖 Documentation + +- Update Documentation to use `TYPED_TEST_SUITE` ([#9654](https://github.com/rapidsai/cudf/pull/9654)) [@codereport](https://github.com/codereport) +- Add dedicated page for `StringHandling` in python docs ([#9624](https://github.com/rapidsai/cudf/pull/9624)) [@galipremsagar](https://github.com/galipremsagar) +- Update docstring of `DataFrame.merge` ([#9572](https://github.com/rapidsai/cudf/pull/9572)) [@galipremsagar](https://github.com/galipremsagar) +- Use raw strings to avoid SyntaxErrors in parsed docstrings. ([#9526](https://github.com/rapidsai/cudf/pull/9526)) [@bdice](https://github.com/bdice) +- Add example to docstrings in `rolling.apply` ([#9522](https://github.com/rapidsai/cudf/pull/9522)) [@isVoid](https://github.com/isVoid) +- Update help message to escape quotes in ./build.sh --cmake-args. ([#9494](https://github.com/rapidsai/cudf/pull/9494)) [@bdice](https://github.com/bdice) +- Improve Python docstring formatting. ([#9493](https://github.com/rapidsai/cudf/pull/9493)) [@bdice](https://github.com/bdice) +- Update table of I/O supported types ([#9476](https://github.com/rapidsai/cudf/pull/9476)) [@vuule](https://github.com/vuule) +- Document invalid regex patterns as undefined behavior ([#9473](https://github.com/rapidsai/cudf/pull/9473)) [@davidwendt](https://github.com/davidwendt) +- Miscellaneous documentation fixes to `cudf` ([#9471](https://github.com/rapidsai/cudf/pull/9471)) [@galipremsagar](https://github.com/galipremsagar) +- Fix many documentation errors in libcudf. ([#9355](https://github.com/rapidsai/cudf/pull/9355)) [@karthikeyann](https://github.com/karthikeyann) +- Fixing SubwordTokenizer docs issue ([#9354](https://github.com/rapidsai/cudf/pull/9354)) [@mayankanand007](https://github.com/mayankanand007) +- Improved deprecation warnings. ([#9347](https://github.com/rapidsai/cudf/pull/9347)) [@bdice](https://github.com/bdice) +- doc reorder mr, stream to stream, mr ([#9308](https://github.com/rapidsai/cudf/pull/9308)) [@karthikeyann](https://github.com/karthikeyann) +- Deprecate method parameters to DataFrame.join, DataFrame.merge. ([#9291](https://github.com/rapidsai/cudf/pull/9291)) [@bdice](https://github.com/bdice) +- Added deprecation warning for `.label_encoding()` ([#9289](https://github.com/rapidsai/cudf/pull/9289)) [@mayankanand007](https://github.com/mayankanand007) + +## 🚀 New Features + +- Enable Series.divide and DataFrame.divide ([#9630](https://github.com/rapidsai/cudf/pull/9630)) [@vyasr](https://github.com/vyasr) +- Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits ([#9616](https://github.com/rapidsai/cudf/pull/9616)) [@PointKernel](https://github.com/PointKernel) +- Add handling of mixed numeric types in `to_dlpack` ([#9585](https://github.com/rapidsai/cudf/pull/9585)) [@galipremsagar](https://github.com/galipremsagar) +- Support re.Pattern object for pat arg in str.replace ([#9573](https://github.com/rapidsai/cudf/pull/9573)) [@davidwendt](https://github.com/davidwendt) +- Add JNI for `lists::drop_list_duplicates` with keys-values input column ([#9553](https://github.com/rapidsai/cudf/pull/9553)) [@ttnghia](https://github.com/ttnghia) +- Support structs column in `min`, `max`, `argmin` and `argmax` groupby aggregate() and scan() ([#9545](https://github.com/rapidsai/cudf/pull/9545)) [@ttnghia](https://github.com/ttnghia) +- Move libcudacxx to use `rapids_cpm` and use newer versions ([#9539](https://github.com/rapidsai/cudf/pull/9539)) [@robertmaynard](https://github.com/robertmaynard) +- Add scan min/max support for chrono types to libcudf reduction-scan (not groupby scan) ([#9518](https://github.com/rapidsai/cudf/pull/9518)) [@davidwendt](https://github.com/davidwendt) +- Support `args=` in `apply` ([#9514](https://github.com/rapidsai/cudf/pull/9514)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add groupby scan min/max support for strings values ([#9502](https://github.com/rapidsai/cudf/pull/9502)) [@davidwendt](https://github.com/davidwendt) +- Add list output option to character_ngrams() function ([#9499](https://github.com/rapidsai/cudf/pull/9499)) [@davidwendt](https://github.com/davidwendt) +- More granular column selection in ORC reader ([#9496](https://github.com/rapidsai/cudf/pull/9496)) [@vuule](https://github.com/vuule) +- add min_periods, ddof to groupby covariance, & correlation aggregation ([#9492](https://github.com/rapidsai/cudf/pull/9492)) [@karthikeyann](https://github.com/karthikeyann) +- Implement Series.datetime.floor ([#9488](https://github.com/rapidsai/cudf/pull/9488)) [@skirui-source](https://github.com/skirui-source) +- Enable linting of CMake files using pre-commit ([#9484](https://github.com/rapidsai/cudf/pull/9484)) [@vyasr](https://github.com/vyasr) +- Add support for single-line regex anchors ^/$ in contains_re ([#9482](https://github.com/rapidsai/cudf/pull/9482)) [@davidwendt](https://github.com/davidwendt) +- Augment `order_by` to Accept a List of `null_precedence` ([#9455](https://github.com/rapidsai/cudf/pull/9455)) [@isVoid](https://github.com/isVoid) +- Add format API for list column of strings ([#9454](https://github.com/rapidsai/cudf/pull/9454)) [@davidwendt](https://github.com/davidwendt) +- Enable Datetime/Timedelta dtypes in Masked UDFs ([#9451](https://github.com/rapidsai/cudf/pull/9451)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add cudf python groupby.diff ([#9446](https://github.com/rapidsai/cudf/pull/9446)) [@karthikeyann](https://github.com/karthikeyann) +- Implement `lists::stable_sort_lists` for stable sorting of elements within each row of lists column ([#9425](https://github.com/rapidsai/cudf/pull/9425)) [@ttnghia](https://github.com/ttnghia) +- add ctest memcheck using cuda-sanitizer ([#9414](https://github.com/rapidsai/cudf/pull/9414)) [@karthikeyann](https://github.com/karthikeyann) +- Support Unary Operations in Masked UDF ([#9409](https://github.com/rapidsai/cudf/pull/9409)) [@isVoid](https://github.com/isVoid) +- Move Several Series Function to Frame ([#9394](https://github.com/rapidsai/cudf/pull/9394)) [@isVoid](https://github.com/isVoid) +- MD5 Python hash API ([#9390](https://github.com/rapidsai/cudf/pull/9390)) [@bdice](https://github.com/bdice) +- Add cudf strings is_title API ([#9380](https://github.com/rapidsai/cudf/pull/9380)) [@davidwendt](https://github.com/davidwendt) +- Enable casting to int64, uint64, and double in AST code. ([#9379](https://github.com/rapidsai/cudf/pull/9379)) [@vyasr](https://github.com/vyasr) +- Add support for writing ORC with map columns ([#9369](https://github.com/rapidsai/cudf/pull/9369)) [@vuule](https://github.com/vuule) +- extract_list_elements() with column_view indices ([#9367](https://github.com/rapidsai/cudf/pull/9367)) [@mythrocks](https://github.com/mythrocks) +- Reimplement `lists::drop_list_duplicates` for keys-values lists columns ([#9345](https://github.com/rapidsai/cudf/pull/9345)) [@ttnghia](https://github.com/ttnghia) +- Support Python UDFs written in terms of rows ([#9343](https://github.com/rapidsai/cudf/pull/9343)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- JNI: Support nested types in ORC writer ([#9334](https://github.com/rapidsai/cudf/pull/9334)) [@firestarman](https://github.com/firestarman) +- Optionally nullify out-of-bounds indices in segmented_gather(). ([#9318](https://github.com/rapidsai/cudf/pull/9318)) [@mythrocks](https://github.com/mythrocks) +- Add shallow hash function and shallow equality comparison for column_view ([#9312](https://github.com/rapidsai/cudf/pull/9312)) [@karthikeyann](https://github.com/karthikeyann) +- Add CudaMemoryBuffer for cudaMalloc memory using RMM cuda_memory_resource ([#9311](https://github.com/rapidsai/cudf/pull/9311)) [@rongou](https://github.com/rongou) +- Add parameters to control row index stride and stripe size in ORC writer ([#9310](https://github.com/rapidsai/cudf/pull/9310)) [@vuule](https://github.com/vuule) +- Add `na_position` param to dask-cudf `sort_values` ([#9264](https://github.com/rapidsai/cudf/pull/9264)) [@charlesbluca](https://github.com/charlesbluca) +- Add `ascending` parameter for dask-cudf `sort_values` ([#9250](https://github.com/rapidsai/cudf/pull/9250)) [@charlesbluca](https://github.com/charlesbluca) +- New array conversion methods ([#9236](https://github.com/rapidsai/cudf/pull/9236)) [@vyasr](https://github.com/vyasr) +- Series `apply` method backed by masked UDFs ([#9217](https://github.com/rapidsai/cudf/pull/9217)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Grouping by frequency and resampling ([#9178](https://github.com/rapidsai/cudf/pull/9178)) [@shwina](https://github.com/shwina) +- Pure-python masked UDFs ([#9174](https://github.com/rapidsai/cudf/pull/9174)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add Covariance, Pearson correlation for sort groupby (libcudf) ([#9154](https://github.com/rapidsai/cudf/pull/9154)) [@karthikeyann](https://github.com/karthikeyann) +- Add `calendrical_month_sequence` in c++ and `date_range` in python ([#8886](https://github.com/rapidsai/cudf/pull/8886)) [@shwina](https://github.com/shwina) + +## 🛠️ Improvements + +- Followup to PR 9088 comments ([#9659](https://github.com/rapidsai/cudf/pull/9659)) [@cwharris](https://github.com/cwharris) +- Update cuCollections to version that supports installed libcudacxx ([#9633](https://github.com/rapidsai/cudf/pull/9633)) [@robertmaynard](https://github.com/robertmaynard) +- Add `11.5` dev.yml to `cudf` ([#9617](https://github.com/rapidsai/cudf/pull/9617)) [@galipremsagar](https://github.com/galipremsagar) +- Add `xfail` for parquet reader `11.5` issue ([#9612](https://github.com/rapidsai/cudf/pull/9612)) [@galipremsagar](https://github.com/galipremsagar) +- remove deprecated Rmm.initialize method ([#9607](https://github.com/rapidsai/cudf/pull/9607)) [@rongou](https://github.com/rongou) +- Use HostColumnVectorCore for child columns in JCudfSerialization.unpackHostColumnVectors ([#9596](https://github.com/rapidsai/cudf/pull/9596)) [@sperlingxx](https://github.com/sperlingxx) +- Set RMM pool to a fixed size in JNI ([#9583](https://github.com/rapidsai/cudf/pull/9583)) [@rongou](https://github.com/rongou) +- Use nvCOMP for Snappy compression/decompression ([#9582](https://github.com/rapidsai/cudf/pull/9582)) [@vuule](https://github.com/vuule) +- Build CUDA version agnostic packages for dask-cudf ([#9578](https://github.com/rapidsai/cudf/pull/9578)) [@Ethyling](https://github.com/Ethyling) +- Fixed tests warning: "TYPED_TEST_CASE is deprecated, please use TYPED_TEST_SUITE" ([#9574](https://github.com/rapidsai/cudf/pull/9574)) [@ttnghia](https://github.com/ttnghia) +- Enable CMake format in CI and fix style ([#9570](https://github.com/rapidsai/cudf/pull/9570)) [@vyasr](https://github.com/vyasr) +- Add NVTX Start/End Ranges to JNI ([#9563](https://github.com/rapidsai/cudf/pull/9563)) [@abellina](https://github.com/abellina) +- Add librdkafka and python-confluent-kafka to dev conda environments s… ([#9562](https://github.com/rapidsai/cudf/pull/9562)) [@jdye64](https://github.com/jdye64) +- Add offsets_begin/end() to strings_column_view ([#9559](https://github.com/rapidsai/cudf/pull/9559)) [@davidwendt](https://github.com/davidwendt) +- remove alignment options for RMM jni ([#9550](https://github.com/rapidsai/cudf/pull/9550)) [@rongou](https://github.com/rongou) +- Add axis parameter passthrough to `DataFrame` and `Series` take for pandas API compatibility ([#9549](https://github.com/rapidsai/cudf/pull/9549)) [@dantegd](https://github.com/dantegd) +- Remove sizeof and standardize on memory_usage ([#9544](https://github.com/rapidsai/cudf/pull/9544)) [@vyasr](https://github.com/vyasr) +- Adds cudaProfilerStart/cudaProfilerStop in JNI api ([#9543](https://github.com/rapidsai/cudf/pull/9543)) [@abellina](https://github.com/abellina) +- Generalize comparison binary operations ([#9542](https://github.com/rapidsai/cudf/pull/9542)) [@vyasr](https://github.com/vyasr) +- Expose APIs to wrap CUDA or RMM allocations with a Java device buffer instance ([#9538](https://github.com/rapidsai/cudf/pull/9538)) [@jlowe](https://github.com/jlowe) +- Add scan sum support for duration types to libcudf ([#9536](https://github.com/rapidsai/cudf/pull/9536)) [@davidwendt](https://github.com/davidwendt) +- Force inlining to improve AST performance ([#9530](https://github.com/rapidsai/cudf/pull/9530)) [@vyasr](https://github.com/vyasr) +- Generalize some more indexed frame methods ([#9529](https://github.com/rapidsai/cudf/pull/9529)) [@vyasr](https://github.com/vyasr) +- Add Java bindings for rolling window stddev aggregation ([#9527](https://github.com/rapidsai/cudf/pull/9527)) [@razajafri](https://github.com/razajafri) +- catch rmm::out_of_memory exceptions in jni ([#9525](https://github.com/rapidsai/cudf/pull/9525)) [@rongou](https://github.com/rongou) +- Add an overload of `make_empty_column` with `type_id` parameter ([#9524](https://github.com/rapidsai/cudf/pull/9524)) [@ttnghia](https://github.com/ttnghia) +- Accelerate conditional inner joins with larger right tables ([#9523](https://github.com/rapidsai/cudf/pull/9523)) [@vyasr](https://github.com/vyasr) +- Initial pass of generalizing `decimal` support in `cudf` python layer ([#9517](https://github.com/rapidsai/cudf/pull/9517)) [@galipremsagar](https://github.com/galipremsagar) +- Cleanup for flattening nested columns ([#9509](https://github.com/rapidsai/cudf/pull/9509)) [@rwlee](https://github.com/rwlee) +- Enable running tests using RMM arena and async memory resources ([#9506](https://github.com/rapidsai/cudf/pull/9506)) [@rongou](https://github.com/rongou) +- Remove dependency on six. ([#9495](https://github.com/rapidsai/cudf/pull/9495)) [@bdice](https://github.com/bdice) +- Cleanup some libcudf strings gtests ([#9489](https://github.com/rapidsai/cudf/pull/9489)) [@davidwendt](https://github.com/davidwendt) +- Rename strings/array_tests.cu to strings/array_tests.cpp ([#9480](https://github.com/rapidsai/cudf/pull/9480)) [@davidwendt](https://github.com/davidwendt) +- Refactor sorting APIs ([#9464](https://github.com/rapidsai/cudf/pull/9464)) [@vyasr](https://github.com/vyasr) +- Implement DataFrame.hash_values, deprecate DataFrame.hash_columns. ([#9458](https://github.com/rapidsai/cudf/pull/9458)) [@bdice](https://github.com/bdice) +- Deprecate Series.hash_encode. ([#9457](https://github.com/rapidsai/cudf/pull/9457)) [@bdice](https://github.com/bdice) +- Update `conda` recipes for Enhanced Compatibility effort ([#9456](https://github.com/rapidsai/cudf/pull/9456)) [@ajschmidt8](https://github.com/ajschmidt8) +- Small clean up to simplify column selection code in ORC reader ([#9444](https://github.com/rapidsai/cudf/pull/9444)) [@vuule](https://github.com/vuule) +- add missing stream to scalar.is_valid() wherever stream is available ([#9436](https://github.com/rapidsai/cudf/pull/9436)) [@karthikeyann](https://github.com/karthikeyann) +- Adds Deprecation Warnings to `one_hot_encoding` and Implement `get_dummies` with Cython API ([#9435](https://github.com/rapidsai/cudf/pull/9435)) [@isVoid](https://github.com/isVoid) +- Update pre-commit hook URLs. ([#9433](https://github.com/rapidsai/cudf/pull/9433)) [@bdice](https://github.com/bdice) +- Remove pyarrow import in `dask_cudf.io.parquet` ([#9429](https://github.com/rapidsai/cudf/pull/9429)) [@charlesbluca](https://github.com/charlesbluca) +- Miscellaneous improvements for UDFs ([#9422](https://github.com/rapidsai/cudf/pull/9422)) [@isVoid](https://github.com/isVoid) +- Use pre-commit for CI ([#9412](https://github.com/rapidsai/cudf/pull/9412)) [@vyasr](https://github.com/vyasr) +- Update to UCX-Py 0.23 ([#9407](https://github.com/rapidsai/cudf/pull/9407)) [@pentschev](https://github.com/pentschev) +- Expose OutOfBoundsPolicy in JNI for Table.gather ([#9406](https://github.com/rapidsai/cudf/pull/9406)) [@abellina](https://github.com/abellina) +- Improvements to tdigest aggregation code. ([#9403](https://github.com/rapidsai/cudf/pull/9403)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add Java API to deserialize a table to host columns ([#9402](https://github.com/rapidsai/cudf/pull/9402)) [@jlowe](https://github.com/jlowe) +- Frame copy to use __class__ instead of type() ([#9397](https://github.com/rapidsai/cudf/pull/9397)) [@madsbk](https://github.com/madsbk) +- Change all DeprecationWarnings to FutureWarning. ([#9392](https://github.com/rapidsai/cudf/pull/9392)) [@bdice](https://github.com/bdice) +- Update Java nvcomp JNI bindings to nvcomp 2.x API ([#9384](https://github.com/rapidsai/cudf/pull/9384)) [@jbrennan333](https://github.com/jbrennan333) +- Add IndexedFrame class and move SingleColumnFrame to a separate module ([#9378](https://github.com/rapidsai/cudf/pull/9378)) [@vyasr](https://github.com/vyasr) +- Support Arrow NativeFile and PythonFile for remote ORC storage ([#9377](https://github.com/rapidsai/cudf/pull/9377)) [@rjzamora](https://github.com/rjzamora) +- Use Arrow PythonFile for remote CSV storage ([#9376](https://github.com/rapidsai/cudf/pull/9376)) [@rjzamora](https://github.com/rjzamora) +- Add multi-threaded writing to GDS writes ([#9372](https://github.com/rapidsai/cudf/pull/9372)) [@devavret](https://github.com/devavret) +- Miscellaneous column cleanup ([#9370](https://github.com/rapidsai/cudf/pull/9370)) [@vyasr](https://github.com/vyasr) +- Use single kernel to extract all groups in cudf::strings::extract ([#9358](https://github.com/rapidsai/cudf/pull/9358)) [@davidwendt](https://github.com/davidwendt) +- Consolidate binary ops into `Frame` ([#9357](https://github.com/rapidsai/cudf/pull/9357)) [@isVoid](https://github.com/isVoid) +- Move rank scan implementations from scan_inclusive.cu to rank_scan.cu ([#9351](https://github.com/rapidsai/cudf/pull/9351)) [@davidwendt](https://github.com/davidwendt) +- Remove usage of deprecated thrust::host_space_tag. ([#9350](https://github.com/rapidsai/cudf/pull/9350)) [@bdice](https://github.com/bdice) +- Use Default Memory Resource for Temporaries in `reduction.cpp` ([#9344](https://github.com/rapidsai/cudf/pull/9344)) [@isVoid](https://github.com/isVoid) +- Fix Cython compilation warnings. ([#9327](https://github.com/rapidsai/cudf/pull/9327)) [@bdice](https://github.com/bdice) +- Fix some unused variable warnings in libcudf ([#9326](https://github.com/rapidsai/cudf/pull/9326)) [@davidwendt](https://github.com/davidwendt) +- Use optional-iterator for copy-if-else kernel ([#9324](https://github.com/rapidsai/cudf/pull/9324)) [@davidwendt](https://github.com/davidwendt) +- Remove Table class ([#9315](https://github.com/rapidsai/cudf/pull/9315)) [@vyasr](https://github.com/vyasr) +- Unpin `dask` and `distributed` in CI ([#9307](https://github.com/rapidsai/cudf/pull/9307)) [@galipremsagar](https://github.com/galipremsagar) +- Add optional-iterator support to indexalator ([#9306](https://github.com/rapidsai/cudf/pull/9306)) [@davidwendt](https://github.com/davidwendt) +- Consolidate more methods in Frame ([#9305](https://github.com/rapidsai/cudf/pull/9305)) [@vyasr](https://github.com/vyasr) +- Add Arrow-NativeFile and PythonFile support to read_parquet and read_csv in cudf ([#9304](https://github.com/rapidsai/cudf/pull/9304)) [@rjzamora](https://github.com/rjzamora) +- Pin mypy in .pre-commit-config.yaml to match conda environment pinning. ([#9300](https://github.com/rapidsai/cudf/pull/9300)) [@bdice](https://github.com/bdice) +- Use gather.hpp when gather-map exists in device memory ([#9299](https://github.com/rapidsai/cudf/pull/9299)) [@davidwendt](https://github.com/davidwendt) +- Fix Automerger for `Branch-21.12` from `branch-21.10` ([#9285](https://github.com/rapidsai/cudf/pull/9285)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor cuIO timestamp processing with `cuda::std::chrono` ([#9278](https://github.com/rapidsai/cudf/pull/9278)) [@PointKernel](https://github.com/PointKernel) +- Change strings copy_if_else to use optional-iterator instead of pair-iterator ([#9266](https://github.com/rapidsai/cudf/pull/9266)) [@davidwendt](https://github.com/davidwendt) +- Update cudf java bindings to 21.12.0-SNAPSHOT ([#9248](https://github.com/rapidsai/cudf/pull/9248)) [@pxLi](https://github.com/pxLi) +- Various internal MultiIndex improvements ([#9243](https://github.com/rapidsai/cudf/pull/9243)) [@vyasr](https://github.com/vyasr) +- Add detail interface for `split` and `slice(table_view)`, refactors both function with `host_span` ([#9226](https://github.com/rapidsai/cudf/pull/9226)) [@isVoid](https://github.com/isVoid) +- Refactor MD5 implementation. ([#9212](https://github.com/rapidsai/cudf/pull/9212)) [@bdice](https://github.com/bdice) +- Update groupby result_cache to allow sharing intermediate results based on column_view instead of requests. ([#9195](https://github.com/rapidsai/cudf/pull/9195)) [@karthikeyann](https://github.com/karthikeyann) +- Use nvcomp's snappy decompressor in avro reader ([#9181](https://github.com/rapidsai/cudf/pull/9181)) [@devavret](https://github.com/devavret) +- Add `isocalendar` API support ([#9169](https://github.com/rapidsai/cudf/pull/9169)) [@marlenezw](https://github.com/marlenezw) +- Simplify read_json by removing unnecessary reader/impl classes ([#9088](https://github.com/rapidsai/cudf/pull/9088)) [@cwharris](https://github.com/cwharris) +- Simplify read_csv by removing unnecessary reader/impl classes ([#9041](https://github.com/rapidsai/cudf/pull/9041)) [@cwharris](https://github.com/cwharris) +- Refactor hash join with cuCollections multimap ([#8934](https://github.com/rapidsai/cudf/pull/8934)) [@PointKernel](https://github.com/PointKernel) + +# cuDF 21.10.00 (7 Oct 2021) + +## 🚨 Breaking Changes + +- Remove Cython APIs for table view generation ([#9199](https://github.com/rapidsai/cudf/pull/9199)) [@vyasr](https://github.com/vyasr) +- Upgrade `pandas` version in `cudf` ([#9147](https://github.com/rapidsai/cudf/pull/9147)) [@galipremsagar](https://github.com/galipremsagar) +- Make AST operators nullable ([#9096](https://github.com/rapidsai/cudf/pull/9096)) [@vyasr](https://github.com/vyasr) +- Remove the option to pass data types as strings to `read_csv` and `read_json` ([#9079](https://github.com/rapidsai/cudf/pull/9079)) [@vuule](https://github.com/vuule) +- Update JNI java CSV APIs to not use deprecated API ([#9066](https://github.com/rapidsai/cudf/pull/9066)) [@revans2](https://github.com/revans2) +- Support additional format specifiers in from_timestamps ([#9047](https://github.com/rapidsai/cudf/pull/9047)) [@davidwendt](https://github.com/davidwendt) +- Expose expression base class publicly and simplify public AST API ([#9045](https://github.com/rapidsai/cudf/pull/9045)) [@vyasr](https://github.com/vyasr) +- Add support for struct type in ORC writer ([#9025](https://github.com/rapidsai/cudf/pull/9025)) [@vuule](https://github.com/vuule) +- Remove aliases of various api.types APIs from utils.dtypes. ([#9011](https://github.com/rapidsai/cudf/pull/9011)) [@vyasr](https://github.com/vyasr) +- Java bindings for conditional join output sizes ([#9002](https://github.com/rapidsai/cudf/pull/9002)) [@jlowe](https://github.com/jlowe) +- Move compute_column API out of ast namespace ([#8957](https://github.com/rapidsai/cudf/pull/8957)) [@vyasr](https://github.com/vyasr) +- `cudf.dtype` function ([#8949](https://github.com/rapidsai/cudf/pull/8949)) [@shwina](https://github.com/shwina) +- Refactor Frame reductions ([#8944](https://github.com/rapidsai/cudf/pull/8944)) [@vyasr](https://github.com/vyasr) +- Add nested column selection to parquet reader ([#8933](https://github.com/rapidsai/cudf/pull/8933)) [@devavret](https://github.com/devavret) +- JNI Aggregation Type Changes ([#8919](https://github.com/rapidsai/cudf/pull/8919)) [@revans2](https://github.com/revans2) +- Add groupby_aggregation and groupby_scan_aggregation classes and force their usage. ([#8906](https://github.com/rapidsai/cudf/pull/8906)) [@nvdbaranec](https://github.com/nvdbaranec) +- Expand CSV and JSON reader APIs to accept `dtypes` as a vector or map of `data_type` objects ([#8856](https://github.com/rapidsai/cudf/pull/8856)) [@vuule](https://github.com/vuule) +- Change cudf docs theme to pydata theme ([#8746](https://github.com/rapidsai/cudf/pull/8746)) [@galipremsagar](https://github.com/galipremsagar) +- Enable compiled binary ops in libcudf, python and java ([#8741](https://github.com/rapidsai/cudf/pull/8741)) [@karthikeyann](https://github.com/karthikeyann) +- Make groupby transform-like op order match original data order ([#8720](https://github.com/rapidsai/cudf/pull/8720)) [@isVoid](https://github.com/isVoid) + +## 🐛 Bug Fixes + +- `fixed_point` `cudf::groupby` for `mean` aggregation ([#9296](https://github.com/rapidsai/cudf/pull/9296)) [@codereport](https://github.com/codereport) +- Fix `interleave_columns` when the input string lists column having empty child column ([#9292](https://github.com/rapidsai/cudf/pull/9292)) [@ttnghia](https://github.com/ttnghia) +- Update nvcomp to include fixes for installation of headers ([#9276](https://github.com/rapidsai/cudf/pull/9276)) [@devavret](https://github.com/devavret) +- Fix Java column leak in testParquetWriteMap ([#9271](https://github.com/rapidsai/cudf/pull/9271)) [@jlowe](https://github.com/jlowe) +- Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby ([#9263](https://github.com/rapidsai/cudf/pull/9263)) [@davidwendt](https://github.com/davidwendt) +- Fixing empty input to getMapValue crashing ([#9262](https://github.com/rapidsai/cudf/pull/9262)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix duplicate names issue in `MultiIndex.deserialize ` ([#9258](https://github.com/rapidsai/cudf/pull/9258)) [@galipremsagar](https://github.com/galipremsagar) +- `Dataframe.sort_index` optimizations ([#9238](https://github.com/rapidsai/cudf/pull/9238)) [@galipremsagar](https://github.com/galipremsagar) +- Temporarily disabling problematic test in parquet writer ([#9230](https://github.com/rapidsai/cudf/pull/9230)) [@devavret](https://github.com/devavret) +- Explicitly disable groupby on unsupported key types. ([#9227](https://github.com/rapidsai/cudf/pull/9227)) [@mythrocks](https://github.com/mythrocks) +- Fix `gather` for sliced input structs column ([#9218](https://github.com/rapidsai/cudf/pull/9218)) [@ttnghia](https://github.com/ttnghia) +- Fix JNI code for left semi and anti joins ([#9207](https://github.com/rapidsai/cudf/pull/9207)) [@jlowe](https://github.com/jlowe) +- Only install thrust when using a non 'system' version ([#9206](https://github.com/rapidsai/cudf/pull/9206)) [@robertmaynard](https://github.com/robertmaynard) +- Remove zlib from libcudf public CMake dependencies ([#9204](https://github.com/rapidsai/cudf/pull/9204)) [@robertmaynard](https://github.com/robertmaynard) +- Fix out-of-bounds memory read in orc gpuEncodeOrcColumnData ([#9196](https://github.com/rapidsai/cudf/pull/9196)) [@davidwendt](https://github.com/davidwendt) +- Fix `gather()` for `STRUCT` inputs with no nulls in members. ([#9194](https://github.com/rapidsai/cudf/pull/9194)) [@mythrocks](https://github.com/mythrocks) +- get_cucollections properly uses rapids_cpm_find ([#9189](https://github.com/rapidsai/cudf/pull/9189)) [@robertmaynard](https://github.com/robertmaynard) +- rapids-export correctly reference build code block and doc strings ([#9186](https://github.com/rapidsai/cudf/pull/9186)) [@robertmaynard](https://github.com/robertmaynard) +- Fix logic while parsing the sum statistic for numerical orc columns ([#9183](https://github.com/rapidsai/cudf/pull/9183)) [@ayushdg](https://github.com/ayushdg) +- Add handling for nulls in `dask_cudf.sorting.quantile_divisions` ([#9171](https://github.com/rapidsai/cudf/pull/9171)) [@charlesbluca](https://github.com/charlesbluca) +- Approximate overflow detection in ORC statistics ([#9163](https://github.com/rapidsai/cudf/pull/9163)) [@vuule](https://github.com/vuule) +- Use decimal precision metadata when reading from parquet files ([#9162](https://github.com/rapidsai/cudf/pull/9162)) [@shwina](https://github.com/shwina) +- Fix variable name in Java build script ([#9161](https://github.com/rapidsai/cudf/pull/9161)) [@jlowe](https://github.com/jlowe) +- Import rapids-cmake modules using the correct cmake variable. ([#9149](https://github.com/rapidsai/cudf/pull/9149)) [@robertmaynard](https://github.com/robertmaynard) +- Fix conditional joins with empty left table ([#9146](https://github.com/rapidsai/cudf/pull/9146)) [@vyasr](https://github.com/vyasr) +- Fix joining on indexes with duplicate level names ([#9137](https://github.com/rapidsai/cudf/pull/9137)) [@shwina](https://github.com/shwina) +- Fixes missing child column name in dtype while reading ORC file. ([#9134](https://github.com/rapidsai/cudf/pull/9134)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Apply type metadata after column is slice-copied ([#9131](https://github.com/rapidsai/cudf/pull/9131)) [@isVoid](https://github.com/isVoid) +- Fix a bug: inner_join_size return zero if build table is empty ([#9128](https://github.com/rapidsai/cudf/pull/9128)) [@PointKernel](https://github.com/PointKernel) +- Fix multi hive-partition parquet reading in dask-cudf ([#9122](https://github.com/rapidsai/cudf/pull/9122)) [@rjzamora](https://github.com/rjzamora) +- Support null literals in expressions ([#9117](https://github.com/rapidsai/cudf/pull/9117)) [@vyasr](https://github.com/vyasr) +- Fix cudf::hash_join output size for struct joins ([#9107](https://github.com/rapidsai/cudf/pull/9107)) [@jlowe](https://github.com/jlowe) +- Import fix ([#9104](https://github.com/rapidsai/cudf/pull/9104)) [@shwina](https://github.com/shwina) +- Fix cudf::strings::is_fixed_point checking of overflow for decimal32 ([#9093](https://github.com/rapidsai/cudf/pull/9093)) [@davidwendt](https://github.com/davidwendt) +- Fix branch_stack calculation in `row_bit_count()` ([#9076](https://github.com/rapidsai/cudf/pull/9076)) [@mythrocks](https://github.com/mythrocks) +- Fetch rapids-cmake to work around cuCollection cmake issue ([#9075](https://github.com/rapidsai/cudf/pull/9075)) [@jlowe](https://github.com/jlowe) +- Fix compilation errors in groupby benchmarks. ([#9072](https://github.com/rapidsai/cudf/pull/9072)) [@nvdbaranec](https://github.com/nvdbaranec) +- Preserve float16 upscaling ([#9069](https://github.com/rapidsai/cudf/pull/9069)) [@galipremsagar](https://github.com/galipremsagar) +- Fix memcheck read error in libcudf contiguous_split ([#9067](https://github.com/rapidsai/cudf/pull/9067)) [@davidwendt](https://github.com/davidwendt) +- Add support for reading ORC file with no row group index ([#9060](https://github.com/rapidsai/cudf/pull/9060)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Various multiindex related fixes ([#9036](https://github.com/rapidsai/cudf/pull/9036)) [@shwina](https://github.com/shwina) +- Avoid rebuilding cython in build.sh ([#9034](https://github.com/rapidsai/cudf/pull/9034)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add support for percentile dispatch in `dask_cudf` ([#9031](https://github.com/rapidsai/cudf/pull/9031)) [@galipremsagar](https://github.com/galipremsagar) +- cudf resolve nvcc 11.0 compiler crashes during codegen ([#9028](https://github.com/rapidsai/cudf/pull/9028)) [@robertmaynard](https://github.com/robertmaynard) +- Fetch correct grouping keys `agg` of dask groupby ([#9022](https://github.com/rapidsai/cudf/pull/9022)) [@galipremsagar](https://github.com/galipremsagar) +- Allow `where()` to work with a Series and `other=cudf.NA` ([#9019](https://github.com/rapidsai/cudf/pull/9019)) [@sarahyurick](https://github.com/sarahyurick) +- Use correct index when returning Series from `GroupBy.apply()` ([#9016](https://github.com/rapidsai/cudf/pull/9016)) [@charlesbluca](https://github.com/charlesbluca) +- Fix `Dataframe` indexer setitem when array is passed ([#9006](https://github.com/rapidsai/cudf/pull/9006)) [@galipremsagar](https://github.com/galipremsagar) +- Fix ORC reading of files with struct columns that have null values ([#9005](https://github.com/rapidsai/cudf/pull/9005)) [@vuule](https://github.com/vuule) +- Ensure JNI native libraries load when CompiledExpression loads ([#8997](https://github.com/rapidsai/cudf/pull/8997)) [@jlowe](https://github.com/jlowe) +- Fix memory read error in get_dremel_data in page_enc.cu ([#8995](https://github.com/rapidsai/cudf/pull/8995)) [@davidwendt](https://github.com/davidwendt) +- Fix memory write error in get_list_child_to_list_row_mapping utility ([#8994](https://github.com/rapidsai/cudf/pull/8994)) [@davidwendt](https://github.com/davidwendt) +- Fix debug compile error for csv_test.cpp ([#8981](https://github.com/rapidsai/cudf/pull/8981)) [@davidwendt](https://github.com/davidwendt) +- Fix memory read/write error in concatenate_lists_ignore_null ([#8978](https://github.com/rapidsai/cudf/pull/8978)) [@davidwendt](https://github.com/davidwendt) +- Fix concatenation of `cudf.RangeIndex` ([#8970](https://github.com/rapidsai/cudf/pull/8970)) [@galipremsagar](https://github.com/galipremsagar) +- Java conditional joins should not require matching column counts ([#8955](https://github.com/rapidsai/cudf/pull/8955)) [@jlowe](https://github.com/jlowe) +- Fix concatenate empty structs ([#8947](https://github.com/rapidsai/cudf/pull/8947)) [@sperlingxx](https://github.com/sperlingxx) +- Fix cuda-memcheck errors for some libcudf functions ([#8941](https://github.com/rapidsai/cudf/pull/8941)) [@davidwendt](https://github.com/davidwendt) +- Apply series name to result of `SeriesGroupby.apply()` ([#8939](https://github.com/rapidsai/cudf/pull/8939)) [@charlesbluca](https://github.com/charlesbluca) +- `cdef packed_columns` as `cppclass` instead of `struct` ([#8936](https://github.com/rapidsai/cudf/pull/8936)) [@charlesbluca](https://github.com/charlesbluca) +- Inserting a `cudf.NA` into a DataFrame ([#8923](https://github.com/rapidsai/cudf/pull/8923)) [@sarahyurick](https://github.com/sarahyurick) +- Support casting with Pandas dtype aliases ([#8920](https://github.com/rapidsai/cudf/pull/8920)) [@sarahyurick](https://github.com/sarahyurick) +- Allow `sort_values` to accept same `kind` values as Pandas ([#8912](https://github.com/rapidsai/cudf/pull/8912)) [@sarahyurick](https://github.com/sarahyurick) +- Enable casting to pandas nullable dtypes ([#8889](https://github.com/rapidsai/cudf/pull/8889)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix libcudf memory errors ([#8884](https://github.com/rapidsai/cudf/pull/8884)) [@karthikeyann](https://github.com/karthikeyann) +- Throw KeyError when accessing field from struct with nonexistent key ([#8880](https://github.com/rapidsai/cudf/pull/8880)) [@NV-jpt](https://github.com/NV-jpt) +- replace auto with auto& ref for cast<&> ([#8866](https://github.com/rapidsai/cudf/pull/8866)) [@karthikeyann](https://github.com/karthikeyann) +- Add missing include<optional> in binops ([#8864](https://github.com/rapidsai/cudf/pull/8864)) [@karthikeyann](https://github.com/karthikeyann) +- Fix `select_dtypes` to work when non-class dtypes present in dataframe ([#8849](https://github.com/rapidsai/cudf/pull/8849)) [@sarahyurick](https://github.com/sarahyurick) +- Re-enable JSON tests ([#8843](https://github.com/rapidsai/cudf/pull/8843)) [@vuule](https://github.com/vuule) +- Support header with embedded delimiter in csv writer ([#8798](https://github.com/rapidsai/cudf/pull/8798)) [@davidwendt](https://github.com/davidwendt) + +## 📖 Documentation + +- Add IO docs page in `cudf` documentation ([#9145](https://github.com/rapidsai/cudf/pull/9145)) [@galipremsagar](https://github.com/galipremsagar) +- use correct namespace in cuio code examples ([#9037](https://github.com/rapidsai/cudf/pull/9037)) [@cwharris](https://github.com/cwharris) +- Restructuring `Contributing doc` ([#9026](https://github.com/rapidsai/cudf/pull/9026)) [@iskode](https://github.com/iskode) +- Update stable version in readme ([#9008](https://github.com/rapidsai/cudf/pull/9008)) [@galipremsagar](https://github.com/galipremsagar) +- Add spans and more include guidelines to libcudf developer guide ([#8931](https://github.com/rapidsai/cudf/pull/8931)) [@harrism](https://github.com/harrism) +- Update Java build instructions to mention Arrow S3 and Docker ([#8867](https://github.com/rapidsai/cudf/pull/8867)) [@jlowe](https://github.com/jlowe) +- List GDS-enabled formats in the docs ([#8805](https://github.com/rapidsai/cudf/pull/8805)) [@vuule](https://github.com/vuule) +- Change cudf docs theme to pydata theme ([#8746](https://github.com/rapidsai/cudf/pull/8746)) [@galipremsagar](https://github.com/galipremsagar) + +## 🚀 New Features + +- Revert "Add shallow hash function and shallow equality comparison for column_view ([#9185)" (#9283](https://github.com/rapidsai/cudf/pull/9185)" (#9283)) [@karthikeyann](https://github.com/karthikeyann) +- Align `DataFrame.apply` signature with pandas ([#9275](https://github.com/rapidsai/cudf/pull/9275)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add struct type support for `drop_list_duplicates` ([#9202](https://github.com/rapidsai/cudf/pull/9202)) [@ttnghia](https://github.com/ttnghia) +- support CUDA async memory resource in JNI ([#9201](https://github.com/rapidsai/cudf/pull/9201)) [@rongou](https://github.com/rongou) +- Add shallow hash function and shallow equality comparison for column_view ([#9185](https://github.com/rapidsai/cudf/pull/9185)) [@karthikeyann](https://github.com/karthikeyann) +- Superimpose null masks for STRUCT columns. ([#9144](https://github.com/rapidsai/cudf/pull/9144)) [@mythrocks](https://github.com/mythrocks) +- Implemented bindings for `ceil` timestamp operation ([#9141](https://github.com/rapidsai/cudf/pull/9141)) [@shaneding](https://github.com/shaneding) +- Adding MAP type support for ORC Reader ([#9132](https://github.com/rapidsai/cudf/pull/9132)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Implement `interleave_columns` for lists with arbitrary nested type ([#9130](https://github.com/rapidsai/cudf/pull/9130)) [@ttnghia](https://github.com/ttnghia) +- Add python bindings to fixed-size window and groupby `rolling.var`, `rolling.std` ([#9097](https://github.com/rapidsai/cudf/pull/9097)) [@isVoid](https://github.com/isVoid) +- Make AST operators nullable ([#9096](https://github.com/rapidsai/cudf/pull/9096)) [@vyasr](https://github.com/vyasr) +- Java bindings for approx_percentile ([#9094](https://github.com/rapidsai/cudf/pull/9094)) [@andygrove](https://github.com/andygrove) +- Add `dseries.struct.explode` ([#9086](https://github.com/rapidsai/cudf/pull/9086)) [@isVoid](https://github.com/isVoid) +- Add support for BaseIndexer in Rolling APIs ([#9085](https://github.com/rapidsai/cudf/pull/9085)) [@galipremsagar](https://github.com/galipremsagar) +- Remove the option to pass data types as strings to `read_csv` and `read_json` ([#9079](https://github.com/rapidsai/cudf/pull/9079)) [@vuule](https://github.com/vuule) +- Add handling for nested dicts in dask-cudf groupby ([#9054](https://github.com/rapidsai/cudf/pull/9054)) [@charlesbluca](https://github.com/charlesbluca) +- Added Series.dt.is_quarter_start and Series.dt.is_quarter_end ([#9046](https://github.com/rapidsai/cudf/pull/9046)) [@TravisHester](https://github.com/TravisHester) +- Support nested types for nth_element reduction ([#9043](https://github.com/rapidsai/cudf/pull/9043)) [@sperlingxx](https://github.com/sperlingxx) +- Update sort groupby to use non-atomic operation ([#9035](https://github.com/rapidsai/cudf/pull/9035)) [@karthikeyann](https://github.com/karthikeyann) +- Add support for struct type in ORC writer ([#9025](https://github.com/rapidsai/cudf/pull/9025)) [@vuule](https://github.com/vuule) +- Implement `interleave_columns` for structs columns ([#9012](https://github.com/rapidsai/cudf/pull/9012)) [@ttnghia](https://github.com/ttnghia) +- Add groupby first and last aggregations ([#9004](https://github.com/rapidsai/cudf/pull/9004)) [@shwina](https://github.com/shwina) +- Add `DecimalBaseColumn` and move `as_decimal_column` ([#9001](https://github.com/rapidsai/cudf/pull/9001)) [@isVoid](https://github.com/isVoid) +- Python/Cython bindings for multibyte_split ([#8998](https://github.com/rapidsai/cudf/pull/8998)) [@jdye64](https://github.com/jdye64) +- Support scalar `months` in `add_calendrical_months`, extends API to INT32 support ([#8991](https://github.com/rapidsai/cudf/pull/8991)) [@isVoid](https://github.com/isVoid) +- Added Series.dt.is_month_end ([#8989](https://github.com/rapidsai/cudf/pull/8989)) [@TravisHester](https://github.com/TravisHester) +- Support for using tdigests to compute approximate percentiles. ([#8983](https://github.com/rapidsai/cudf/pull/8983)) [@nvdbaranec](https://github.com/nvdbaranec) +- Support "unflatten" of columns flattened via `flatten_nested_columns()`: ([#8956](https://github.com/rapidsai/cudf/pull/8956)) [@mythrocks](https://github.com/mythrocks) +- Implement timestamp ceil ([#8942](https://github.com/rapidsai/cudf/pull/8942)) [@shaneding](https://github.com/shaneding) +- Add nested column selection to parquet reader ([#8933](https://github.com/rapidsai/cudf/pull/8933)) [@devavret](https://github.com/devavret) +- Expose conditional join size calculation ([#8928](https://github.com/rapidsai/cudf/pull/8928)) [@vyasr](https://github.com/vyasr) +- Support Nulls in Timeseries Generator ([#8925](https://github.com/rapidsai/cudf/pull/8925)) [@isVoid](https://github.com/isVoid) +- Avoid index equality check in `_CPackedColumns.from_py_table()` ([#8917](https://github.com/rapidsai/cudf/pull/8917)) [@charlesbluca](https://github.com/charlesbluca) +- Add dot product binary op ([#8909](https://github.com/rapidsai/cudf/pull/8909)) [@charlesbluca](https://github.com/charlesbluca) +- Expose `days_in_month` function in libcudf and add python bindings ([#8892](https://github.com/rapidsai/cudf/pull/8892)) [@isVoid](https://github.com/isVoid) +- Series string repeat ([#8882](https://github.com/rapidsai/cudf/pull/8882)) [@sarahyurick](https://github.com/sarahyurick) +- Python binding for quarters ([#8862](https://github.com/rapidsai/cudf/pull/8862)) [@shaneding](https://github.com/shaneding) +- Expand CSV and JSON reader APIs to accept `dtypes` as a vector or map of `data_type` objects ([#8856](https://github.com/rapidsai/cudf/pull/8856)) [@vuule](https://github.com/vuule) +- Add Java bindings for AST transform ([#8846](https://github.com/rapidsai/cudf/pull/8846)) [@jlowe](https://github.com/jlowe) +- Series datetime is_month_start ([#8844](https://github.com/rapidsai/cudf/pull/8844)) [@sarahyurick](https://github.com/sarahyurick) +- Support bracket syntax for cudf::strings::replace_with_backrefs group index values ([#8841](https://github.com/rapidsai/cudf/pull/8841)) [@davidwendt](https://github.com/davidwendt) +- Support `VARIANCE` and `STD` aggregation in rolling op ([#8809](https://github.com/rapidsai/cudf/pull/8809)) [@isVoid](https://github.com/isVoid) +- Add quarters to libcudf datetime ([#8779](https://github.com/rapidsai/cudf/pull/8779)) [@shaneding](https://github.com/shaneding) +- Linear Interpolation of `nan`s via `cupy` ([#8767](https://github.com/rapidsai/cudf/pull/8767)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enable compiled binary ops in libcudf, python and java ([#8741](https://github.com/rapidsai/cudf/pull/8741)) [@karthikeyann](https://github.com/karthikeyann) +- Make groupby transform-like op order match original data order ([#8720](https://github.com/rapidsai/cudf/pull/8720)) [@isVoid](https://github.com/isVoid) +- multibyte_split ([#8702](https://github.com/rapidsai/cudf/pull/8702)) [@cwharris](https://github.com/cwharris) +- Implement JNI for `strings:repeat_strings` that repeats each string separately by different numbers of times ([#8572](https://github.com/rapidsai/cudf/pull/8572)) [@ttnghia](https://github.com/ttnghia) + +## 🛠️ Improvements + +- Pin max `dask` and `distributed` versions to `2021.09.1` ([#9286](https://github.com/rapidsai/cudf/pull/9286)) [@galipremsagar](https://github.com/galipremsagar) +- Optimized fsspec data transfer for remote file-systems ([#9265](https://github.com/rapidsai/cudf/pull/9265)) [@rjzamora](https://github.com/rjzamora) +- Skip dask-cudf tests on arm64 ([#9252](https://github.com/rapidsai/cudf/pull/9252)) [@Ethyling](https://github.com/Ethyling) +- Use nvcomp's snappy compressor in ORC writer ([#9242](https://github.com/rapidsai/cudf/pull/9242)) [@devavret](https://github.com/devavret) +- Only run imports tests on x86_64 ([#9241](https://github.com/rapidsai/cudf/pull/9241)) [@Ethyling](https://github.com/Ethyling) +- Remove unnecessary call to device_uvector::release() ([#9237](https://github.com/rapidsai/cudf/pull/9237)) [@harrism](https://github.com/harrism) +- Use nvcomp's snappy decompression in ORC reader ([#9235](https://github.com/rapidsai/cudf/pull/9235)) [@devavret](https://github.com/devavret) +- Add grouped_rolling test with STRUCT groupby keys. ([#9228](https://github.com/rapidsai/cudf/pull/9228)) [@mythrocks](https://github.com/mythrocks) +- Optimize `cudf.concat` for `axis=0` ([#9222](https://github.com/rapidsai/cudf/pull/9222)) [@galipremsagar](https://github.com/galipremsagar) +- Fix some libcudf calls not passing the stream parameter ([#9220](https://github.com/rapidsai/cudf/pull/9220)) [@davidwendt](https://github.com/davidwendt) +- Add min and max bounds for random dataframe generator numeric types ([#9211](https://github.com/rapidsai/cudf/pull/9211)) [@galipremsagar](https://github.com/galipremsagar) +- Improve performance of expression evaluation ([#9210](https://github.com/rapidsai/cudf/pull/9210)) [@vyasr](https://github.com/vyasr) +- Misc optimizations in `cudf` ([#9203](https://github.com/rapidsai/cudf/pull/9203)) [@galipremsagar](https://github.com/galipremsagar) +- Remove Cython APIs for table view generation ([#9199](https://github.com/rapidsai/cudf/pull/9199)) [@vyasr](https://github.com/vyasr) +- Add JNI support for drop_list_duplicates ([#9198](https://github.com/rapidsai/cudf/pull/9198)) [@revans2](https://github.com/revans2) +- Update pandas versions in conda recipes and requirements.txt files ([#9197](https://github.com/rapidsai/cudf/pull/9197)) [@galipremsagar](https://github.com/galipremsagar) +- Minor C++17 cleanup of `groupby.cu`: structured bindings, more concise lambda, etc ([#9193](https://github.com/rapidsai/cudf/pull/9193)) [@codereport](https://github.com/codereport) +- Explicit about bitwidth difference between cudf boolean and arrow boolean ([#9192](https://github.com/rapidsai/cudf/pull/9192)) [@isVoid](https://github.com/isVoid) +- Remove _source_index from MultiIndex ([#9191](https://github.com/rapidsai/cudf/pull/9191)) [@vyasr](https://github.com/vyasr) +- Fix typo in the name of `cudf-testing-targets.cmake` ([#9190](https://github.com/rapidsai/cudf/pull/9190)) [@trxcllnt](https://github.com/trxcllnt) +- Add support for single-digits in cudf::to_timestamps ([#9173](https://github.com/rapidsai/cudf/pull/9173)) [@davidwendt](https://github.com/davidwendt) +- Fix cufilejni build include path ([#9168](https://github.com/rapidsai/cudf/pull/9168)) [@pxLi](https://github.com/pxLi) +- `dask_cudf` dispatch registering cleanup ([#9160](https://github.com/rapidsai/cudf/pull/9160)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unneeded stream/mr from a cudf::make_strings_column ([#9148](https://github.com/rapidsai/cudf/pull/9148)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `pandas` version in `cudf` ([#9147](https://github.com/rapidsai/cudf/pull/9147)) [@galipremsagar](https://github.com/galipremsagar) +- make data chunk reader return unique_ptr ([#9129](https://github.com/rapidsai/cudf/pull/9129)) [@cwharris](https://github.com/cwharris) +- Add backend for `percentile_lookup` dispatch ([#9118](https://github.com/rapidsai/cudf/pull/9118)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor implementation of column setitem ([#9110](https://github.com/rapidsai/cudf/pull/9110)) [@vyasr](https://github.com/vyasr) +- Fix compile warnings found using nvcc 11.4 ([#9101](https://github.com/rapidsai/cudf/pull/9101)) [@davidwendt](https://github.com/davidwendt) +- Update to UCX-Py 0.22 ([#9099](https://github.com/rapidsai/cudf/pull/9099)) [@pentschev](https://github.com/pentschev) +- Simplify read_avro by removing unnecessary writer/impl classes ([#9090](https://github.com/rapidsai/cudf/pull/9090)) [@cwharris](https://github.com/cwharris) +- Allowing %f in format to return nanoseconds ([#9081](https://github.com/rapidsai/cudf/pull/9081)) [@marlenezw](https://github.com/marlenezw) +- Java bindings for cudf::hash_join ([#9080](https://github.com/rapidsai/cudf/pull/9080)) [@jlowe](https://github.com/jlowe) +- Remove stale code in `ColumnBase._fill` ([#9078](https://github.com/rapidsai/cudf/pull/9078)) [@isVoid](https://github.com/isVoid) +- Add support for `get_group` in GroupBy ([#9070](https://github.com/rapidsai/cudf/pull/9070)) [@galipremsagar](https://github.com/galipremsagar) +- Remove remaining "support" methods from DataFrame ([#9068](https://github.com/rapidsai/cudf/pull/9068)) [@vyasr](https://github.com/vyasr) +- Update JNI java CSV APIs to not use deprecated API ([#9066](https://github.com/rapidsai/cudf/pull/9066)) [@revans2](https://github.com/revans2) +- Added method to remove null_masks if the column has no nulls ([#9061](https://github.com/rapidsai/cudf/pull/9061)) [@razajafri](https://github.com/razajafri) +- Consolidate Several Series and Dataframe Methods ([#9059](https://github.com/rapidsai/cudf/pull/9059)) [@isVoid](https://github.com/isVoid) +- Remove usage of string based `set_dtypes` for `csv` & `json` readers ([#9049](https://github.com/rapidsai/cudf/pull/9049)) [@galipremsagar](https://github.com/galipremsagar) +- Remove some debug print statements from gtests ([#9048](https://github.com/rapidsai/cudf/pull/9048)) [@davidwendt](https://github.com/davidwendt) +- Support additional format specifiers in from_timestamps ([#9047](https://github.com/rapidsai/cudf/pull/9047)) [@davidwendt](https://github.com/davidwendt) +- Expose expression base class publicly and simplify public AST API ([#9045](https://github.com/rapidsai/cudf/pull/9045)) [@vyasr](https://github.com/vyasr) +- move filepath and mmap logic out of json/csv up to functions.cpp ([#9040](https://github.com/rapidsai/cudf/pull/9040)) [@cwharris](https://github.com/cwharris) +- Refactor Index hierarchy ([#9039](https://github.com/rapidsai/cudf/pull/9039)) [@vyasr](https://github.com/vyasr) +- cudf now leverages rapids-cmake to reduce CMake boilerplate ([#9030](https://github.com/rapidsai/cudf/pull/9030)) [@robertmaynard](https://github.com/robertmaynard) +- Add support for `STRUCT` input to `groupby` ([#9024](https://github.com/rapidsai/cudf/pull/9024)) [@mythrocks](https://github.com/mythrocks) +- Refactor Frame scans ([#9021](https://github.com/rapidsai/cudf/pull/9021)) [@vyasr](https://github.com/vyasr) +- Remove duplicate `set_categories` code ([#9018](https://github.com/rapidsai/cudf/pull/9018)) [@isVoid](https://github.com/isVoid) +- Map support for ParquetWriter ([#9013](https://github.com/rapidsai/cudf/pull/9013)) [@razajafri](https://github.com/razajafri) +- Remove aliases of various api.types APIs from utils.dtypes. ([#9011](https://github.com/rapidsai/cudf/pull/9011)) [@vyasr](https://github.com/vyasr) +- Java bindings for conditional join output sizes ([#9002](https://github.com/rapidsai/cudf/pull/9002)) [@jlowe](https://github.com/jlowe) +- Remove _copy_construct factory ([#8999](https://github.com/rapidsai/cudf/pull/8999)) [@vyasr](https://github.com/vyasr) +- ENH Allow arbitrary CMake config options in build.sh ([#8996](https://github.com/rapidsai/cudf/pull/8996)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- A small optimization for JNI copy column view to column vector ([#8985](https://github.com/rapidsai/cudf/pull/8985)) [@revans2](https://github.com/revans2) +- Fix nvcc warnings in ORC writer ([#8975](https://github.com/rapidsai/cudf/pull/8975)) [@devavret](https://github.com/devavret) +- Support nested structs in rank and dense rank ([#8962](https://github.com/rapidsai/cudf/pull/8962)) [@rwlee](https://github.com/rwlee) +- Move compute_column API out of ast namespace ([#8957](https://github.com/rapidsai/cudf/pull/8957)) [@vyasr](https://github.com/vyasr) +- Series datetime is_year_end and is_year_start ([#8954](https://github.com/rapidsai/cudf/pull/8954)) [@marlenezw](https://github.com/marlenezw) +- Make Java AstNode public ([#8953](https://github.com/rapidsai/cudf/pull/8953)) [@jlowe](https://github.com/jlowe) +- Replace allocate with device_uvector for subword_tokenize internal tables ([#8952](https://github.com/rapidsai/cudf/pull/8952)) [@davidwendt](https://github.com/davidwendt) +- `cudf.dtype` function ([#8949](https://github.com/rapidsai/cudf/pull/8949)) [@shwina](https://github.com/shwina) +- Refactor Frame reductions ([#8944](https://github.com/rapidsai/cudf/pull/8944)) [@vyasr](https://github.com/vyasr) +- Add deprecation warning for `Series.set_mask` API ([#8943](https://github.com/rapidsai/cudf/pull/8943)) [@galipremsagar](https://github.com/galipremsagar) +- Move AST evaluator into a separate header ([#8930](https://github.com/rapidsai/cudf/pull/8930)) [@vyasr](https://github.com/vyasr) +- JNI Aggregation Type Changes ([#8919](https://github.com/rapidsai/cudf/pull/8919)) [@revans2](https://github.com/revans2) +- Move template parameter to function parameter in cudf::detail::left_semi_anti_join ([#8914](https://github.com/rapidsai/cudf/pull/8914)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `arrow` & `pyarrow` to `5.0.0` ([#8908](https://github.com/rapidsai/cudf/pull/8908)) [@galipremsagar](https://github.com/galipremsagar) +- Add groupby_aggregation and groupby_scan_aggregation classes and force their usage. ([#8906](https://github.com/rapidsai/cudf/pull/8906)) [@nvdbaranec](https://github.com/nvdbaranec) +- Move `structs_column_tests.cu` to `.cpp`. ([#8902](https://github.com/rapidsai/cudf/pull/8902)) [@mythrocks](https://github.com/mythrocks) +- Add stream and memory-resource parameters to struct-scalar copy ctor ([#8901](https://github.com/rapidsai/cudf/pull/8901)) [@davidwendt](https://github.com/davidwendt) +- Combine linearizer and ast_plan ([#8900](https://github.com/rapidsai/cudf/pull/8900)) [@vyasr](https://github.com/vyasr) +- Add Java bindings for conditional join gather maps ([#8888](https://github.com/rapidsai/cudf/pull/8888)) [@jlowe](https://github.com/jlowe) +- Remove max version pin for `dask` & `distributed` on development branch ([#8881](https://github.com/rapidsai/cudf/pull/8881)) [@galipremsagar](https://github.com/galipremsagar) +- fix cufilejni build w/ c++17 ([#8877](https://github.com/rapidsai/cudf/pull/8877)) [@pxLi](https://github.com/pxLi) +- Add struct accessor to dask-cudf ([#8874](https://github.com/rapidsai/cudf/pull/8874)) [@NV-jpt](https://github.com/NV-jpt) +- Migrate dask-cudf CudfEngine to leverage ArrowDatasetEngine ([#8871](https://github.com/rapidsai/cudf/pull/8871)) [@rjzamora](https://github.com/rjzamora) +- Add JNI for extract_quarter, add_calendrical_months, and is_leap_year ([#8863](https://github.com/rapidsai/cudf/pull/8863)) [@revans2](https://github.com/revans2) +- Change cudf::scalar copy and move constructors to protected ([#8857](https://github.com/rapidsai/cudf/pull/8857)) [@davidwendt](https://github.com/davidwendt) +- Replace `is_same<>::value` with `is_same_v<>` ([#8852](https://github.com/rapidsai/cudf/pull/8852)) [@codereport](https://github.com/codereport) +- Add min `pytorch` version to `importorskip` in pytest ([#8851](https://github.com/rapidsai/cudf/pull/8851)) [@galipremsagar](https://github.com/galipremsagar) +- Java bindings for regex replace ([#8847](https://github.com/rapidsai/cudf/pull/8847)) [@jlowe](https://github.com/jlowe) +- Remove make strings children with null mask ([#8830](https://github.com/rapidsai/cudf/pull/8830)) [@davidwendt](https://github.com/davidwendt) +- Refactor conditional joins ([#8815](https://github.com/rapidsai/cudf/pull/8815)) [@vyasr](https://github.com/vyasr) +- Small cleanup (unused headers / commented code removals) ([#8799](https://github.com/rapidsai/cudf/pull/8799)) [@codereport](https://github.com/codereport) +- ENH Replace gpuci_conda_retry with gpuci_mamba_retry ([#8770](https://github.com/rapidsai/cudf/pull/8770)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Update cudf java bindings to 21.10.0-SNAPSHOT ([#8765](https://github.com/rapidsai/cudf/pull/8765)) [@pxLi](https://github.com/pxLi) +- Refactor and improve join benchmarks with nvbench ([#8734](https://github.com/rapidsai/cudf/pull/8734)) [@PointKernel](https://github.com/PointKernel) +- Refactor Python factories and remove usage of Table for libcudf output handling ([#8687](https://github.com/rapidsai/cudf/pull/8687)) [@vyasr](https://github.com/vyasr) +- Optimize URL Decoding ([#8622](https://github.com/rapidsai/cudf/pull/8622)) [@gaohao95](https://github.com/gaohao95) +- Parquet writer dictionary encoding refactor ([#8476](https://github.com/rapidsai/cudf/pull/8476)) [@devavret](https://github.com/devavret) +- Use nvcomp's snappy decompression in parquet reader ([#8252](https://github.com/rapidsai/cudf/pull/8252)) [@devavret](https://github.com/devavret) +- Use nvcomp's snappy compressor in parquet writer ([#8229](https://github.com/rapidsai/cudf/pull/8229)) [@devavret](https://github.com/devavret) + +# cuDF 21.08.00 (4 Aug 2021) + +## 🚨 Breaking Changes + +- Fix a crash in pack() when being handed tables with no columns. ([#8697](https://github.com/rapidsai/cudf/pull/8697)) [@nvdbaranec](https://github.com/nvdbaranec) +- Remove unused cudf::strings::create_offsets ([#8663](https://github.com/rapidsai/cudf/pull/8663)) [@davidwendt](https://github.com/davidwendt) +- Add delimiter parameter to cudf::strings::capitalize() ([#8620](https://github.com/rapidsai/cudf/pull/8620)) [@davidwendt](https://github.com/davidwendt) +- Change default datetime index resolution to ns to match pandas ([#8611](https://github.com/rapidsai/cudf/pull/8611)) [@vyasr](https://github.com/vyasr) +- Add sequence_type parameter to cudf::strings::title function ([#8602](https://github.com/rapidsai/cudf/pull/8602)) [@davidwendt](https://github.com/davidwendt) +- Add `strings::repeat_strings` API that can repeat each string a different number of times ([#8561](https://github.com/rapidsai/cudf/pull/8561)) [@ttnghia](https://github.com/ttnghia) +- String-to-boolean conversion is different from Pandas ([#8549](https://github.com/rapidsai/cudf/pull/8549)) [@skirui-source](https://github.com/skirui-source) +- Add accurate hash join size functions ([#8453](https://github.com/rapidsai/cudf/pull/8453)) [@PointKernel](https://github.com/PointKernel) +- Expose a Decimal32Dtype in cuDF Python ([#8438](https://github.com/rapidsai/cudf/pull/8438)) [@skirui-source](https://github.com/skirui-source) +- Update dask make_meta changes to be compatible with dask upstream ([#8426](https://github.com/rapidsai/cudf/pull/8426)) [@galipremsagar](https://github.com/galipremsagar) +- Adapt `cudf::scalar` classes to changes in `rmm::device_scalar` ([#8411](https://github.com/rapidsai/cudf/pull/8411)) [@harrism](https://github.com/harrism) +- Remove special Index class from the general index class hierarchy ([#8309](https://github.com/rapidsai/cudf/pull/8309)) [@vyasr](https://github.com/vyasr) +- Add first-class dtype utilities ([#8308](https://github.com/rapidsai/cudf/pull/8308)) [@vyasr](https://github.com/vyasr) +- ORC - Support reading multiple orc files/buffers in a single operation ([#8142](https://github.com/rapidsai/cudf/pull/8142)) [@jdye64](https://github.com/jdye64) +- Upgrade arrow to 4.0.1 ([#7495](https://github.com/rapidsai/cudf/pull/7495)) [@galipremsagar](https://github.com/galipremsagar) + +## 🐛 Bug Fixes + +- Fix `contains` check in string column ([#8834](https://github.com/rapidsai/cudf/pull/8834)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unused variable from `row_bit_count_test`. ([#8829](https://github.com/rapidsai/cudf/pull/8829)) [@mythrocks](https://github.com/mythrocks) +- Fixes issue with null struct columns in ORC reader ([#8819](https://github.com/rapidsai/cudf/pull/8819)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Set CMake vars for python/parquet support in libarrow builds ([#8808](https://github.com/rapidsai/cudf/pull/8808)) [@vyasr](https://github.com/vyasr) +- Handle empty child columns in row_bit_count() ([#8791](https://github.com/rapidsai/cudf/pull/8791)) [@mythrocks](https://github.com/mythrocks) +- Revert "Remove cudf unneeded build time requirement of the cuda driver" ([#8784](https://github.com/rapidsai/cudf/pull/8784)) [@robertmaynard](https://github.com/robertmaynard) +- Fix isort error in utils.pyx ([#8771](https://github.com/rapidsai/cudf/pull/8771)) [@charlesbluca](https://github.com/charlesbluca) +- Handle sliced struct/list columns properly in concatenate() bounds checking. ([#8760](https://github.com/rapidsai/cudf/pull/8760)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix issues with `_CPackedColumns.serialize()` handling of host and device data ([#8759](https://github.com/rapidsai/cudf/pull/8759)) [@charlesbluca](https://github.com/charlesbluca) +- Fix issues with `MultiIndex` in `dropna`, `stack` & `reset_index` ([#8753](https://github.com/rapidsai/cudf/pull/8753)) [@galipremsagar](https://github.com/galipremsagar) +- Write pandas extension types to parquet file metadata ([#8749](https://github.com/rapidsai/cudf/pull/8749)) [@devavret](https://github.com/devavret) +- Fix `where` to handle `DataFrame` & `Series` input combination ([#8747](https://github.com/rapidsai/cudf/pull/8747)) [@galipremsagar](https://github.com/galipremsagar) +- Fix `replace` to handle null values correctly ([#8744](https://github.com/rapidsai/cudf/pull/8744)) [@galipremsagar](https://github.com/galipremsagar) +- Handle sliced structs properly in pack/contiguous_split. ([#8739](https://github.com/rapidsai/cudf/pull/8739)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix issue in slice() where columns with a positive offset were computing null counts incorrectly. ([#8738](https://github.com/rapidsai/cudf/pull/8738)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix `cudf.Series` constructor to handle list of sequences ([#8735](https://github.com/rapidsai/cudf/pull/8735)) [@galipremsagar](https://github.com/galipremsagar) +- Fix min/max sorted groupby aggregation on string column with nulls (argmin, argmax sentinel value missing on nulls) ([#8731](https://github.com/rapidsai/cudf/pull/8731)) [@karthikeyann](https://github.com/karthikeyann) +- Fix orc reader assert on create data_type in debug ([#8706](https://github.com/rapidsai/cudf/pull/8706)) [@davidwendt](https://github.com/davidwendt) +- Fix min/max inclusive cudf::scan for strings column ([#8705](https://github.com/rapidsai/cudf/pull/8705)) [@davidwendt](https://github.com/davidwendt) +- JNI: Fix driver version assertion logic in testGetCudaRuntimeInfo ([#8701](https://github.com/rapidsai/cudf/pull/8701)) [@sperlingxx](https://github.com/sperlingxx) +- Adding fix for skip_rows and crash in orc reader ([#8700](https://github.com/rapidsai/cudf/pull/8700)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Bug fix: `replace_nulls_policy` functor not returning correct indices for gathermap ([#8699](https://github.com/rapidsai/cudf/pull/8699)) [@isVoid](https://github.com/isVoid) +- Fix a crash in pack() when being handed tables with no columns. ([#8697](https://github.com/rapidsai/cudf/pull/8697)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add post-processing steps to `dask_cudf.groupby.CudfSeriesGroupby.aggregate` ([#8694](https://github.com/rapidsai/cudf/pull/8694)) [@charlesbluca](https://github.com/charlesbluca) +- JNI build no longer looks for Arrow in conda environment ([#8686](https://github.com/rapidsai/cudf/pull/8686)) [@jlowe](https://github.com/jlowe) +- Handle arbitrarily different data in null list column rows when checking for equivalency. ([#8666](https://github.com/rapidsai/cudf/pull/8666)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add ConfigureNVBench to avoid concurrent main() entry points ([#8662](https://github.com/rapidsai/cudf/pull/8662)) [@PointKernel](https://github.com/PointKernel) +- Pin `*arrow` to use `*cuda` in `run` ([#8651](https://github.com/rapidsai/cudf/pull/8651)) [@jakirkham](https://github.com/jakirkham) +- Add proper support for tolerances in testing methods. ([#8649](https://github.com/rapidsai/cudf/pull/8649)) [@vyasr](https://github.com/vyasr) +- Support multi-char case conversion in capitalize function ([#8647](https://github.com/rapidsai/cudf/pull/8647)) [@davidwendt](https://github.com/davidwendt) +- Fix repeated mangled names in read_csv with duplicate column names ([#8645](https://github.com/rapidsai/cudf/pull/8645)) [@karthikeyann](https://github.com/karthikeyann) +- Temporarily disable libcudf example build tests ([#8642](https://github.com/rapidsai/cudf/pull/8642)) [@isVoid](https://github.com/isVoid) +- Use conda-sourced cudf artifacts for libcudf example in CI ([#8638](https://github.com/rapidsai/cudf/pull/8638)) [@isVoid](https://github.com/isVoid) +- Ensure dev environment uses Arrow GPU packages ([#8637](https://github.com/rapidsai/cudf/pull/8637)) [@charlesbluca](https://github.com/charlesbluca) +- Fix bug that columns only initialized once when specified `columns` and `index` in dataframe ctor ([#8628](https://github.com/rapidsai/cudf/pull/8628)) [@isVoid](https://github.com/isVoid) +- Propagate **kwargs through to as_*_column methods ([#8618](https://github.com/rapidsai/cudf/pull/8618)) [@shwina](https://github.com/shwina) +- Fix orc_reader_benchmark.cpp compile error ([#8609](https://github.com/rapidsai/cudf/pull/8609)) [@davidwendt](https://github.com/davidwendt) +- Fix missed renumbering of Aggregation values ([#8600](https://github.com/rapidsai/cudf/pull/8600)) [@revans2](https://github.com/revans2) +- Update cmake to 3.20.5 in the Java Docker image ([#8593](https://github.com/rapidsai/cudf/pull/8593)) [@NvTimLiu](https://github.com/NvTimLiu) +- Fix bug in replace_with_backrefs when group has greedy quantifier ([#8575](https://github.com/rapidsai/cudf/pull/8575)) [@davidwendt](https://github.com/davidwendt) +- Apply metadata to keys before returning in `Frame._encode` ([#8560](https://github.com/rapidsai/cudf/pull/8560)) [@charlesbluca](https://github.com/charlesbluca) +- Fix for strings containing special JSON characters in get_json_object(). ([#8556](https://github.com/rapidsai/cudf/pull/8556)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix debug compile error in gather_struct_tests.cpp ([#8554](https://github.com/rapidsai/cudf/pull/8554)) [@davidwendt](https://github.com/davidwendt) +- String-to-boolean conversion is different from Pandas ([#8549](https://github.com/rapidsai/cudf/pull/8549)) [@skirui-source](https://github.com/skirui-source) +- Fix `__repr__` output with `display.max_rows` is `None` ([#8547](https://github.com/rapidsai/cudf/pull/8547)) [@galipremsagar](https://github.com/galipremsagar) +- Fix size passed to column constructors in _with_type_metadata ([#8539](https://github.com/rapidsai/cudf/pull/8539)) [@shwina](https://github.com/shwina) +- Properly retrieve last column when `-1` is specified for column index ([#8529](https://github.com/rapidsai/cudf/pull/8529)) [@isVoid](https://github.com/isVoid) +- Fix importing `apply` from `dask` ([#8517](https://github.com/rapidsai/cudf/pull/8517)) [@galipremsagar](https://github.com/galipremsagar) +- Fix offset of the string dictionary length stream ([#8515](https://github.com/rapidsai/cudf/pull/8515)) [@vuule](https://github.com/vuule) +- Fix double counting of selected columns in CSV reader ([#8508](https://github.com/rapidsai/cudf/pull/8508)) [@ochan1](https://github.com/ochan1) +- Incorrect map size in scatter_to_gather corrupts struct columns ([#8507](https://github.com/rapidsai/cudf/pull/8507)) [@gerashegalov](https://github.com/gerashegalov) +- replace_nulls properly propagates memory resource to gather calls ([#8500](https://github.com/rapidsai/cudf/pull/8500)) [@robertmaynard](https://github.com/robertmaynard) +- Disallow groupby aggs for `StructColumns` ([#8499](https://github.com/rapidsai/cudf/pull/8499)) [@charlesbluca](https://github.com/charlesbluca) +- Fixes out-of-bounds access for small files in unzip ([#8498](https://github.com/rapidsai/cudf/pull/8498)) [@elstehle](https://github.com/elstehle) +- Adding support for writing empty dataframe ([#8490](https://github.com/rapidsai/cudf/pull/8490)) [@shaneding](https://github.com/shaneding) +- Fix exclusive scan when including nulls and improve testing ([#8478](https://github.com/rapidsai/cudf/pull/8478)) [@harrism](https://github.com/harrism) +- Add workaround for crash in libcudf debug build using output_indexalator in thrust::lower_bound ([#8432](https://github.com/rapidsai/cudf/pull/8432)) [@davidwendt](https://github.com/davidwendt) +- Install only the same Thrust files that Thrust itself installs ([#8420](https://github.com/rapidsai/cudf/pull/8420)) [@robertmaynard](https://github.com/robertmaynard) +- Add nightly version for ucx-py in ci script ([#8419](https://github.com/rapidsai/cudf/pull/8419)) [@galipremsagar](https://github.com/galipremsagar) +- Fix null_equality config of rolling_collect_set ([#8415](https://github.com/rapidsai/cudf/pull/8415)) [@sperlingxx](https://github.com/sperlingxx) +- CollectSetAggregation: implement RollingAggregation interface ([#8406](https://github.com/rapidsai/cudf/pull/8406)) [@sperlingxx](https://github.com/sperlingxx) +- Handle pre-sliced nested columns in contiguous_split. ([#8391](https://github.com/rapidsai/cudf/pull/8391)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix bitmask_tests.cpp host accessing device memory ([#8370](https://github.com/rapidsai/cudf/pull/8370)) [@davidwendt](https://github.com/davidwendt) +- Fix concurrent_unordered_map to prevent accessing padding bits in pair_type ([#8348](https://github.com/rapidsai/cudf/pull/8348)) [@davidwendt](https://github.com/davidwendt) +- BUG FIX: Raise appropriate strings error when concatenating strings column ([#8290](https://github.com/rapidsai/cudf/pull/8290)) [@skirui-source](https://github.com/skirui-source) +- Make gpuCI and pre-commit style configurations consistent ([#8215](https://github.com/rapidsai/cudf/pull/8215)) [@charlesbluca](https://github.com/charlesbluca) +- Add collect list to dask-cudf groupby aggregations ([#8045](https://github.com/rapidsai/cudf/pull/8045)) [@charlesbluca](https://github.com/charlesbluca) + +## 📖 Documentation + +- Update Python UDFs notebook ([#8810](https://github.com/rapidsai/cudf/pull/8810)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix dask.dataframe API docs links after reorg ([#8772](https://github.com/rapidsai/cudf/pull/8772)) [@jsignell](https://github.com/jsignell) +- Fix instructions for running cuDF/dask-cuDF tests in CONTRIBUTING.md ([#8724](https://github.com/rapidsai/cudf/pull/8724)) [@shwina](https://github.com/shwina) +- Translate Markdown documentation to rST and remove recommonmark ([#8698](https://github.com/rapidsai/cudf/pull/8698)) [@vyasr](https://github.com/vyasr) +- Fixed spelling mistakes in libcudf documentation ([#8664](https://github.com/rapidsai/cudf/pull/8664)) [@karthikeyann](https://github.com/karthikeyann) +- Custom Sphinx Extension: `PandasCompat` ([#8643](https://github.com/rapidsai/cudf/pull/8643)) [@isVoid](https://github.com/isVoid) +- Fix README.md ([#8535](https://github.com/rapidsai/cudf/pull/8535)) [@ajschmidt8](https://github.com/ajschmidt8) +- Change namespace contains_nulls to struct ([#8523](https://github.com/rapidsai/cudf/pull/8523)) [@davidwendt](https://github.com/davidwendt) +- Add info about NVTX ranges to dev guide ([#8461](https://github.com/rapidsai/cudf/pull/8461)) [@jrhemstad](https://github.com/jrhemstad) +- Fixed documentation bug in groupby agg method ([#8325](https://github.com/rapidsai/cudf/pull/8325)) [@ahmet-uyar](https://github.com/ahmet-uyar) + +## 🚀 New Features + +- Fix concatenating structs ([#8811](https://github.com/rapidsai/cudf/pull/8811)) [@shaneding](https://github.com/shaneding) +- Implement JNI for groupby aggregations `M2` and `MERGE_M2` ([#8763](https://github.com/rapidsai/cudf/pull/8763)) [@ttnghia](https://github.com/ttnghia) +- Bump `isort` to `5.6.4` and remove `isort` overrides made for 5.0.7 ([#8755](https://github.com/rapidsai/cudf/pull/8755)) [@charlesbluca](https://github.com/charlesbluca) +- Implement `__setitem__` for `StructColumn` ([#8737](https://github.com/rapidsai/cudf/pull/8737)) [@shaneding](https://github.com/shaneding) +- Add `is_leap_year` to `DateTimeProperties` and `DatetimeIndex` ([#8736](https://github.com/rapidsai/cudf/pull/8736)) [@isVoid](https://github.com/isVoid) +- Add `struct.explode()` method ([#8729](https://github.com/rapidsai/cudf/pull/8729)) [@shwina](https://github.com/shwina) +- Add `DataFrame.to_struct()` method to convert a DataFrame to a struct Series ([#8728](https://github.com/rapidsai/cudf/pull/8728)) [@shwina](https://github.com/shwina) +- Add support for list type in ORC writer ([#8723](https://github.com/rapidsai/cudf/pull/8723)) [@vuule](https://github.com/vuule) +- Fix slicing from struct columns and accessing struct columns ([#8719](https://github.com/rapidsai/cudf/pull/8719)) [@shaneding](https://github.com/shaneding) +- Add `datetime::is_leap_year` ([#8711](https://github.com/rapidsai/cudf/pull/8711)) [@isVoid](https://github.com/isVoid) +- Accessing struct columns from `dask_cudf` ([#8675](https://github.com/rapidsai/cudf/pull/8675)) [@shaneding](https://github.com/shaneding) +- Added pct_change to Series ([#8650](https://github.com/rapidsai/cudf/pull/8650)) [@TravisHester](https://github.com/TravisHester) +- Add strings support to cudf::shift function ([#8648](https://github.com/rapidsai/cudf/pull/8648)) [@davidwendt](https://github.com/davidwendt) +- Support Scatter `struct_scalar` ([#8630](https://github.com/rapidsai/cudf/pull/8630)) [@isVoid](https://github.com/isVoid) +- Struct scalar from host dictionary ([#8629](https://github.com/rapidsai/cudf/pull/8629)) [@shaneding](https://github.com/shaneding) +- Add dayofyear and day_of_year to Series, DatetimeColumn, and DatetimeIndex ([#8626](https://github.com/rapidsai/cudf/pull/8626)) [@beckernick](https://github.com/beckernick) +- JNI support for capitalize ([#8624](https://github.com/rapidsai/cudf/pull/8624)) [@firestarman](https://github.com/firestarman) +- Add delimiter parameter to cudf::strings::capitalize() ([#8620](https://github.com/rapidsai/cudf/pull/8620)) [@davidwendt](https://github.com/davidwendt) +- Add NVBench in CMake ([#8619](https://github.com/rapidsai/cudf/pull/8619)) [@PointKernel](https://github.com/PointKernel) +- Change default datetime index resolution to ns to match pandas ([#8611](https://github.com/rapidsai/cudf/pull/8611)) [@vyasr](https://github.com/vyasr) +- ListColumn `__setitem__` ([#8606](https://github.com/rapidsai/cudf/pull/8606)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Implement groupby aggregations `M2` and `MERGE_M2` ([#8605](https://github.com/rapidsai/cudf/pull/8605)) [@ttnghia](https://github.com/ttnghia) +- Add sequence_type parameter to cudf::strings::title function ([#8602](https://github.com/rapidsai/cudf/pull/8602)) [@davidwendt](https://github.com/davidwendt) +- Adding support for list and struct type in ORC Reader ([#8599](https://github.com/rapidsai/cudf/pull/8599)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Benchmark for `strings::repeat_strings` APIs ([#8589](https://github.com/rapidsai/cudf/pull/8589)) [@ttnghia](https://github.com/ttnghia) +- Nested scalar support for copy if else ([#8588](https://github.com/rapidsai/cudf/pull/8588)) [@gerashegalov](https://github.com/gerashegalov) +- User specified decimal columns to float64 ([#8587](https://github.com/rapidsai/cudf/pull/8587)) [@jdye64](https://github.com/jdye64) +- Add `get_element` for struct column ([#8578](https://github.com/rapidsai/cudf/pull/8578)) [@isVoid](https://github.com/isVoid) +- Python changes for adding `__getitem__` for `struct` ([#8577](https://github.com/rapidsai/cudf/pull/8577)) [@shaneding](https://github.com/shaneding) +- Add `strings::repeat_strings` API that can repeat each string a different number of times ([#8561](https://github.com/rapidsai/cudf/pull/8561)) [@ttnghia](https://github.com/ttnghia) +- Refactor `tests/iterator_utilities.hpp` functions ([#8540](https://github.com/rapidsai/cudf/pull/8540)) [@ttnghia](https://github.com/ttnghia) +- Support MERGE_LISTS and MERGE_SETS in Java package ([#8516](https://github.com/rapidsai/cudf/pull/8516)) [@sperlingxx](https://github.com/sperlingxx) +- Decimal support csv reader ([#8511](https://github.com/rapidsai/cudf/pull/8511)) [@elstehle](https://github.com/elstehle) +- Add column type tests ([#8505](https://github.com/rapidsai/cudf/pull/8505)) [@isVoid](https://github.com/isVoid) +- Warn when downscaling decimal columns ([#8492](https://github.com/rapidsai/cudf/pull/8492)) [@ChrisJar](https://github.com/ChrisJar) +- Add JNI for `strings::repeat_strings` ([#8491](https://github.com/rapidsai/cudf/pull/8491)) [@ttnghia](https://github.com/ttnghia) +- Add `Index.get_loc` for Numerical, String Index support ([#8489](https://github.com/rapidsai/cudf/pull/8489)) [@isVoid](https://github.com/isVoid) +- Expose half_up rounding in cuDF ([#8477](https://github.com/rapidsai/cudf/pull/8477)) [@shwina](https://github.com/shwina) +- Java APIs to fetch CUDA runtime info ([#8465](https://github.com/rapidsai/cudf/pull/8465)) [@sperlingxx](https://github.com/sperlingxx) +- Add `str.edit_distance_matrix` ([#8463](https://github.com/rapidsai/cudf/pull/8463)) [@isVoid](https://github.com/isVoid) +- Support constructing `cudf.Scalar` objects from host side lists ([#8459](https://github.com/rapidsai/cudf/pull/8459)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add accurate hash join size functions ([#8453](https://github.com/rapidsai/cudf/pull/8453)) [@PointKernel](https://github.com/PointKernel) +- Add cudf::strings::integer_to_hex convert API ([#8450](https://github.com/rapidsai/cudf/pull/8450)) [@davidwendt](https://github.com/davidwendt) +- Create objects from iterables that contain cudf.NA ([#8442](https://github.com/rapidsai/cudf/pull/8442)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- JNI bindings for sort_lists ([#8439](https://github.com/rapidsai/cudf/pull/8439)) [@sperlingxx](https://github.com/sperlingxx) +- Expose a Decimal32Dtype in cuDF Python ([#8438](https://github.com/rapidsai/cudf/pull/8438)) [@skirui-source](https://github.com/skirui-source) +- Replace `all_null()` and `all_valid()` by `iterator_all_nulls()` and `iterator_no_null()` in tests ([#8437](https://github.com/rapidsai/cudf/pull/8437)) [@ttnghia](https://github.com/ttnghia) +- Implement groupby `MERGE_LISTS` and `MERGE_SETS` aggregates ([#8436](https://github.com/rapidsai/cudf/pull/8436)) [@ttnghia](https://github.com/ttnghia) +- Add public libcudf match_dictionaries API ([#8429](https://github.com/rapidsai/cudf/pull/8429)) [@davidwendt](https://github.com/davidwendt) +- Add move constructors for `string_scalar` and `struct_scalar` ([#8428](https://github.com/rapidsai/cudf/pull/8428)) [@ttnghia](https://github.com/ttnghia) +- Implement `strings::repeat_strings` ([#8423](https://github.com/rapidsai/cudf/pull/8423)) [@ttnghia](https://github.com/ttnghia) +- STRUCT column support for cudf::merge. ([#8422](https://github.com/rapidsai/cudf/pull/8422)) [@nvdbaranec](https://github.com/nvdbaranec) +- Implement reverse in libcudf ([#8410](https://github.com/rapidsai/cudf/pull/8410)) [@shaneding](https://github.com/shaneding) +- Support multiple input files/buffers for read_json ([#8403](https://github.com/rapidsai/cudf/pull/8403)) [@jdye64](https://github.com/jdye64) +- Improve test coverage for struct search ([#8396](https://github.com/rapidsai/cudf/pull/8396)) [@ttnghia](https://github.com/ttnghia) +- Add `groupby.fillna` ([#8362](https://github.com/rapidsai/cudf/pull/8362)) [@isVoid](https://github.com/isVoid) +- Enable AST-based joining ([#8214](https://github.com/rapidsai/cudf/pull/8214)) [@vyasr](https://github.com/vyasr) +- Generalized null support in user defined functions ([#8213](https://github.com/rapidsai/cudf/pull/8213)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add compiled binary operation ([#8192](https://github.com/rapidsai/cudf/pull/8192)) [@karthikeyann](https://github.com/karthikeyann) +- Implement `.describe() ` for `DataFrameGroupBy` ([#8179](https://github.com/rapidsai/cudf/pull/8179)) [@skirui-source](https://github.com/skirui-source) +- ORC - Support reading multiple orc files/buffers in a single operation ([#8142](https://github.com/rapidsai/cudf/pull/8142)) [@jdye64](https://github.com/jdye64) +- Add Python bindings for `lists::concatenate_list_elements` and expose them as `.list.concat()` ([#8006](https://github.com/rapidsai/cudf/pull/8006)) [@shwina](https://github.com/shwina) +- Use Arrow URI FileSystem backed instance to retrieve remote files ([#7709](https://github.com/rapidsai/cudf/pull/7709)) [@jdye64](https://github.com/jdye64) +- Example to build custom application and link to libcudf ([#7671](https://github.com/rapidsai/cudf/pull/7671)) [@isVoid](https://github.com/isVoid) +- Upgrade arrow to 4.0.1 ([#7495](https://github.com/rapidsai/cudf/pull/7495)) [@galipremsagar](https://github.com/galipremsagar) + +## 🛠️ Improvements + +- Provide a better error message when `CUDA::cuda_driver` not found ([#8794](https://github.com/rapidsai/cudf/pull/8794)) [@robertmaynard](https://github.com/robertmaynard) +- Remove anonymous namespace from null_mask.cuh ([#8786](https://github.com/rapidsai/cudf/pull/8786)) [@nvdbaranec](https://github.com/nvdbaranec) +- Allow cudf to be built without libcuda.so existing ([#8751](https://github.com/rapidsai/cudf/pull/8751)) [@robertmaynard](https://github.com/robertmaynard) +- Pin `mimesis` to `<4.1` ([#8745](https://github.com/rapidsai/cudf/pull/8745)) [@galipremsagar](https://github.com/galipremsagar) +- Update `conda` environment name for CI ([#8692](https://github.com/rapidsai/cudf/pull/8692)) [@ajschmidt8](https://github.com/ajschmidt8) +- Remove flatbuffers dependency ([#8671](https://github.com/rapidsai/cudf/pull/8671)) [@Ethyling](https://github.com/Ethyling) +- Add options to build Arrow with Python and Parquet support ([#8670](https://github.com/rapidsai/cudf/pull/8670)) [@trxcllnt](https://github.com/trxcllnt) +- Remove unused cudf::strings::create_offsets ([#8663](https://github.com/rapidsai/cudf/pull/8663)) [@davidwendt](https://github.com/davidwendt) +- Update GDS lib version to 1.0.0 ([#8654](https://github.com/rapidsai/cudf/pull/8654)) [@pxLi](https://github.com/pxLi) +- Support for groupby/scan rank and dense_rank aggregations ([#8652](https://github.com/rapidsai/cudf/pull/8652)) [@rwlee](https://github.com/rwlee) +- Fix usage of deprecated arrow ipc API ([#8632](https://github.com/rapidsai/cudf/pull/8632)) [@revans2](https://github.com/revans2) +- Use absolute imports in `cudf` ([#8631](https://github.com/rapidsai/cudf/pull/8631)) [@galipremsagar](https://github.com/galipremsagar) +- ENH Add Java CI build script ([#8627](https://github.com/rapidsai/cudf/pull/8627)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Add DeprecationWarning to `ser.str.subword_tokenize` ([#8603](https://github.com/rapidsai/cudf/pull/8603)) [@VibhuJawa](https://github.com/VibhuJawa) +- Rewrite binary operations for improved performance and additional type support ([#8598](https://github.com/rapidsai/cudf/pull/8598)) [@vyasr](https://github.com/vyasr) +- Fix `mypy` errors surfacing because of `numpy-1.21.0` ([#8595](https://github.com/rapidsai/cudf/pull/8595)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unneeded includes from cudf::string_view headers ([#8594](https://github.com/rapidsai/cudf/pull/8594)) [@davidwendt](https://github.com/davidwendt) +- Use cmake 3.20.1 as it is now required by rmm ([#8586](https://github.com/rapidsai/cudf/pull/8586)) [@robertmaynard](https://github.com/robertmaynard) +- Remove device debug symbols from cmake CUDF_CUDA_FLAGS ([#8584](https://github.com/rapidsai/cudf/pull/8584)) [@davidwendt](https://github.com/davidwendt) +- Dask-CuDF: use default Dask Dataframe optimizer ([#8581](https://github.com/rapidsai/cudf/pull/8581)) [@madsbk](https://github.com/madsbk) +- Remove checking if an unsigned value is less than zero ([#8579](https://github.com/rapidsai/cudf/pull/8579)) [@robertmaynard](https://github.com/robertmaynard) +- Remove strings_count parameter from cudf::strings::detail::create_chars_child_column ([#8576](https://github.com/rapidsai/cudf/pull/8576)) [@davidwendt](https://github.com/davidwendt) +- Make `cudf.api.types` imports consistent ([#8571](https://github.com/rapidsai/cudf/pull/8571)) [@galipremsagar](https://github.com/galipremsagar) +- Modernize libcudf basic example CMakeFile; updates CI build tests ([#8568](https://github.com/rapidsai/cudf/pull/8568)) [@isVoid](https://github.com/isVoid) +- Rename concatenate_tests.cu to .cpp ([#8555](https://github.com/rapidsai/cudf/pull/8555)) [@davidwendt](https://github.com/davidwendt) +- enable window lead/lag test on struct ([#8548](https://github.com/rapidsai/cudf/pull/8548)) [@wbo4958](https://github.com/wbo4958) +- Add Java methods to split and write column views ([#8546](https://github.com/rapidsai/cudf/pull/8546)) [@razajafri](https://github.com/razajafri) +- Small cleanup ([#8534](https://github.com/rapidsai/cudf/pull/8534)) [@codereport](https://github.com/codereport) +- Unpin `dask` version in CI ([#8533](https://github.com/rapidsai/cudf/pull/8533)) [@galipremsagar](https://github.com/galipremsagar) +- Added optional flag for building Arrow with S3 filesystem support ([#8531](https://github.com/rapidsai/cudf/pull/8531)) [@jdye64](https://github.com/jdye64) +- Minor clean up of various internal column and frame utilities ([#8528](https://github.com/rapidsai/cudf/pull/8528)) [@vyasr](https://github.com/vyasr) +- Rename some copying_test source files .cu to .cpp ([#8527](https://github.com/rapidsai/cudf/pull/8527)) [@davidwendt](https://github.com/davidwendt) +- Correct the last warnings and issues when using newer cuda versions ([#8525](https://github.com/rapidsai/cudf/pull/8525)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in transform and unary ops ([#8521](https://github.com/rapidsai/cudf/pull/8521)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in string algorithms ([#8509](https://github.com/rapidsai/cudf/pull/8509)) [@robertmaynard](https://github.com/robertmaynard) +- Add in JNI APIs for scan, replace_nulls, group_by.scan, and group_by.replace_nulls ([#8503](https://github.com/rapidsai/cudf/pull/8503)) [@revans2](https://github.com/revans2) +- Fix `21.08` forward-merge conflicts ([#8502](https://github.com/rapidsai/cudf/pull/8502)) [@ajschmidt8](https://github.com/ajschmidt8) +- Fix Cython formatting command in Contributing.md. ([#8496](https://github.com/rapidsai/cudf/pull/8496)) [@marlenezw](https://github.com/marlenezw) +- Bug/correct unused parameters in reshape and text ([#8495](https://github.com/rapidsai/cudf/pull/8495)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in partitioning and stream compact ([#8494](https://github.com/rapidsai/cudf/pull/8494)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in labelling and list algorithms ([#8493](https://github.com/rapidsai/cudf/pull/8493)) [@robertmaynard](https://github.com/robertmaynard) +- Refactor index construction ([#8485](https://github.com/rapidsai/cudf/pull/8485)) [@vyasr](https://github.com/vyasr) +- Correct unused parameter warnings in replace algorithms ([#8483](https://github.com/rapidsai/cudf/pull/8483)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in reduction algorithms ([#8481](https://github.com/rapidsai/cudf/pull/8481)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in io algorithms ([#8480](https://github.com/rapidsai/cudf/pull/8480)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in interop algorithms ([#8479](https://github.com/rapidsai/cudf/pull/8479)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in filling algorithms ([#8468](https://github.com/rapidsai/cudf/pull/8468)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameter warnings in groupby ([#8467](https://github.com/rapidsai/cudf/pull/8467)) [@robertmaynard](https://github.com/robertmaynard) +- use libcu++ time_point as timestamp ([#8466](https://github.com/rapidsai/cudf/pull/8466)) [@karthikeyann](https://github.com/karthikeyann) +- Modify reprog_device::extract to return groups in a single pass ([#8460](https://github.com/rapidsai/cudf/pull/8460)) [@davidwendt](https://github.com/davidwendt) +- Update minimum Dask requirement to 2021.6.0 ([#8458](https://github.com/rapidsai/cudf/pull/8458)) [@pentschev](https://github.com/pentschev) +- Fix failures when performing binary operations on DataFrames with empty columns ([#8452](https://github.com/rapidsai/cudf/pull/8452)) [@ChrisJar](https://github.com/ChrisJar) +- Fix conflicts in `8447` ([#8448](https://github.com/rapidsai/cudf/pull/8448)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add serialization methods for `List` and `StructDtype` ([#8441](https://github.com/rapidsai/cudf/pull/8441)) [@charlesbluca](https://github.com/charlesbluca) +- Replace make_empty_strings_column with make_empty_column ([#8435](https://github.com/rapidsai/cudf/pull/8435)) [@davidwendt](https://github.com/davidwendt) +- JNI bindings for get_element ([#8433](https://github.com/rapidsai/cudf/pull/8433)) [@revans2](https://github.com/revans2) +- Update dask make_meta changes to be compatible with dask upstream ([#8426](https://github.com/rapidsai/cudf/pull/8426)) [@galipremsagar](https://github.com/galipremsagar) +- Unpin dask version on CI ([#8425](https://github.com/rapidsai/cudf/pull/8425)) [@galipremsagar](https://github.com/galipremsagar) +- Add benchmark for strings/fixed_point convert APIs ([#8417](https://github.com/rapidsai/cudf/pull/8417)) [@davidwendt](https://github.com/davidwendt) +- Adapt `cudf::scalar` classes to changes in `rmm::device_scalar` ([#8411](https://github.com/rapidsai/cudf/pull/8411)) [@harrism](https://github.com/harrism) +- Add benchmark for strings/integers convert APIs ([#8402](https://github.com/rapidsai/cudf/pull/8402)) [@davidwendt](https://github.com/davidwendt) +- Enable multi-file partitioning in dask_cudf.read_parquet ([#8393](https://github.com/rapidsai/cudf/pull/8393)) [@rjzamora](https://github.com/rjzamora) +- Correct unused parameter warnings in rolling algorithms ([#8390](https://github.com/rapidsai/cudf/pull/8390)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameters in column round and search ([#8389](https://github.com/rapidsai/cudf/pull/8389)) [@robertmaynard](https://github.com/robertmaynard) +- Add functionality to apply `Dtype` metadata to `ColumnBase` ([#8373](https://github.com/rapidsai/cudf/pull/8373)) [@charlesbluca](https://github.com/charlesbluca) +- Refactor setting stack size in regex code ([#8358](https://github.com/rapidsai/cudf/pull/8358)) [@davidwendt](https://github.com/davidwendt) +- Update Java bindings to 21.08-SNAPSHOT ([#8344](https://github.com/rapidsai/cudf/pull/8344)) [@pxLi](https://github.com/pxLi) +- Replace remaining uses of device_vector ([#8343](https://github.com/rapidsai/cudf/pull/8343)) [@harrism](https://github.com/harrism) +- Statically link libnvcomp into libcudfjni ([#8334](https://github.com/rapidsai/cudf/pull/8334)) [@jlowe](https://github.com/jlowe) +- Resolve auto merge conflicts for Branch 21.08 from branch 21.06 ([#8329](https://github.com/rapidsai/cudf/pull/8329)) [@galipremsagar](https://github.com/galipremsagar) +- Minor code refactor for sorted_order ([#8326](https://github.com/rapidsai/cudf/pull/8326)) [@wbo4958](https://github.com/wbo4958) +- Remove special Index class from the general index class hierarchy ([#8309](https://github.com/rapidsai/cudf/pull/8309)) [@vyasr](https://github.com/vyasr) +- Add first-class dtype utilities ([#8308](https://github.com/rapidsai/cudf/pull/8308)) [@vyasr](https://github.com/vyasr) +- Add option to link Java bindings with Arrow dynamically ([#8307](https://github.com/rapidsai/cudf/pull/8307)) [@jlowe](https://github.com/jlowe) +- Refactor ColumnMethods and its subclasses to remove `column` argument and require `parent` argument ([#8306](https://github.com/rapidsai/cudf/pull/8306)) [@shwina](https://github.com/shwina) +- Refactor `scatter` for list columns ([#8255](https://github.com/rapidsai/cudf/pull/8255)) [@isVoid](https://github.com/isVoid) +- Expose pack/unpack API to Python ([#8153](https://github.com/rapidsai/cudf/pull/8153)) [@charlesbluca](https://github.com/charlesbluca) +- Adding cudf.cut method ([#8002](https://github.com/rapidsai/cudf/pull/8002)) [@marlenezw](https://github.com/marlenezw) +- Optimize string gather performance for large strings ([#7980](https://github.com/rapidsai/cudf/pull/7980)) [@gaohao95](https://github.com/gaohao95) +- Add peak memory usage tracking to cuIO benchmarks ([#7770](https://github.com/rapidsai/cudf/pull/7770)) [@devavret](https://github.com/devavret) +- Updating Clang Version to 11.0.0 ([#6695](https://github.com/rapidsai/cudf/pull/6695)) [@codereport](https://github.com/codereport) + +# cuDF 21.06.00 (9 Jun 2021) + +## 🚨 Breaking Changes + +- Add support for `make_meta_obj` dispatch in `dask-cudf` ([#8342](https://github.com/rapidsai/cudf/pull/8342)) [@galipremsagar](https://github.com/galipremsagar) +- Add separator-on-null parameter to strings concatenate APIs ([#8282](https://github.com/rapidsai/cudf/pull/8282)) [@davidwendt](https://github.com/davidwendt) +- Introduce a common parent class for NumericalColumn and DecimalColumn ([#8278](https://github.com/rapidsai/cudf/pull/8278)) [@vyasr](https://github.com/vyasr) +- Update ORC statistics API to use C++17 standard library ([#8241](https://github.com/rapidsai/cudf/pull/8241)) [@vuule](https://github.com/vuule) +- Preserve column hierarchy when getting NULL row from `LIST` column ([#8206](https://github.com/rapidsai/cudf/pull/8206)) [@isVoid](https://github.com/isVoid) +- `Groupby.shift` c++ API refactor and python binding ([#8131](https://github.com/rapidsai/cudf/pull/8131)) [@isVoid](https://github.com/isVoid) + +## 🐛 Bug Fixes + +- Fix struct flattening to add a validity column only when the input column has null element ([#8374](https://github.com/rapidsai/cudf/pull/8374)) [@ttnghia](https://github.com/ttnghia) +- Compilation fix: Remove redefinition for `std::is_same_v()` ([#8369](https://github.com/rapidsai/cudf/pull/8369)) [@mythrocks](https://github.com/mythrocks) +- Add backward compatibility for `dask-cudf` to work with other versions of `dask` ([#8368](https://github.com/rapidsai/cudf/pull/8368)) [@galipremsagar](https://github.com/galipremsagar) +- Handle empty results with nested types in copy_if_else ([#8359](https://github.com/rapidsai/cudf/pull/8359)) [@nvdbaranec](https://github.com/nvdbaranec) +- Handle nested column types properly for empty parquet files. ([#8350](https://github.com/rapidsai/cudf/pull/8350)) [@nvdbaranec](https://github.com/nvdbaranec) +- Raise error when unsupported arguments are passed to `dask_cudf.DataFrame.sort_values` ([#8349](https://github.com/rapidsai/cudf/pull/8349)) [@galipremsagar](https://github.com/galipremsagar) +- Raise `NotImplementedError` for axis=1 in `rank` ([#8347](https://github.com/rapidsai/cudf/pull/8347)) [@galipremsagar](https://github.com/galipremsagar) +- Add support for `make_meta_obj` dispatch in `dask-cudf` ([#8342](https://github.com/rapidsai/cudf/pull/8342)) [@galipremsagar](https://github.com/galipremsagar) +- Update Java string concatenate test for single column ([#8330](https://github.com/rapidsai/cudf/pull/8330)) [@tgravescs](https://github.com/tgravescs) +- Use empty_like in scatter ([#8314](https://github.com/rapidsai/cudf/pull/8314)) [@revans2](https://github.com/revans2) +- Fix concatenate_lists_ignore_null on rows of all_nulls ([#8312](https://github.com/rapidsai/cudf/pull/8312)) [@sperlingxx](https://github.com/sperlingxx) +- Add separator-on-null parameter to strings concatenate APIs ([#8282](https://github.com/rapidsai/cudf/pull/8282)) [@davidwendt](https://github.com/davidwendt) +- COLLECT_LIST support returning empty output columns. ([#8279](https://github.com/rapidsai/cudf/pull/8279)) [@mythrocks](https://github.com/mythrocks) +- Update io util to convert path like object to string ([#8275](https://github.com/rapidsai/cudf/pull/8275)) [@ayushdg](https://github.com/ayushdg) +- Fix result column types for empty inputs to rolling window ([#8274](https://github.com/rapidsai/cudf/pull/8274)) [@mythrocks](https://github.com/mythrocks) +- Actually test equality in assert_groupby_results_equal ([#8272](https://github.com/rapidsai/cudf/pull/8272)) [@shwina](https://github.com/shwina) +- CMake always explicitly specify a source files extension ([#8270](https://github.com/rapidsai/cudf/pull/8270)) [@robertmaynard](https://github.com/robertmaynard) +- Fix struct binary search and struct flattening ([#8268](https://github.com/rapidsai/cudf/pull/8268)) [@ttnghia](https://github.com/ttnghia) +- Revert "patch thrust to fix intmax num elements limitation in scan_by_key" ([#8263](https://github.com/rapidsai/cudf/pull/8263)) [@cwharris](https://github.com/cwharris) +- upgrade dlpack to 0.5 ([#8262](https://github.com/rapidsai/cudf/pull/8262)) [@cwharris](https://github.com/cwharris) +- Fixes CSV-reader type inference for thousands separator and decimal point ([#8261](https://github.com/rapidsai/cudf/pull/8261)) [@elstehle](https://github.com/elstehle) +- Fix incorrect assertion in Java concat ([#8258](https://github.com/rapidsai/cudf/pull/8258)) [@sperlingxx](https://github.com/sperlingxx) +- Copy nested types upon construction ([#8244](https://github.com/rapidsai/cudf/pull/8244)) [@isVoid](https://github.com/isVoid) +- Preserve column hierarchy when getting NULL row from `LIST` column ([#8206](https://github.com/rapidsai/cudf/pull/8206)) [@isVoid](https://github.com/isVoid) +- Clip decimal binary op precision at max precision ([#8194](https://github.com/rapidsai/cudf/pull/8194)) [@ChrisJar](https://github.com/ChrisJar) + +## 📖 Documentation + +- Add docstring for `dask_cudf.read_csv` ([#8355](https://github.com/rapidsai/cudf/pull/8355)) [@galipremsagar](https://github.com/galipremsagar) +- Fix cudf release version in readme ([#8331](https://github.com/rapidsai/cudf/pull/8331)) [@galipremsagar](https://github.com/galipremsagar) +- Fix structs column description in dev docs ([#8318](https://github.com/rapidsai/cudf/pull/8318)) [@isVoid](https://github.com/isVoid) +- Update readme with correct CUDA versions ([#8315](https://github.com/rapidsai/cudf/pull/8315)) [@raydouglass](https://github.com/raydouglass) +- Add description of the cuIO GDS integration ([#8293](https://github.com/rapidsai/cudf/pull/8293)) [@vuule](https://github.com/vuule) +- Remove unused parameter from copy_partition kernel documentation ([#8283](https://github.com/rapidsai/cudf/pull/8283)) [@robertmaynard](https://github.com/robertmaynard) + +## 🚀 New Features + +- Add support merging b/w categorical data ([#8332](https://github.com/rapidsai/cudf/pull/8332)) [@galipremsagar](https://github.com/galipremsagar) +- Java: Support struct scalar ([#8327](https://github.com/rapidsai/cudf/pull/8327)) [@sperlingxx](https://github.com/sperlingxx) +- added _is_homogeneous property ([#8299](https://github.com/rapidsai/cudf/pull/8299)) [@shaneding](https://github.com/shaneding) +- Added decimal writing for CSV writer ([#8296](https://github.com/rapidsai/cudf/pull/8296)) [@kaatish](https://github.com/kaatish) +- Java: Support creating a scalar from utf8 string ([#8294](https://github.com/rapidsai/cudf/pull/8294)) [@firestarman](https://github.com/firestarman) +- Add Java API for Concatenate strings with separator ([#8289](https://github.com/rapidsai/cudf/pull/8289)) [@tgravescs](https://github.com/tgravescs) +- `strings::join_list_elements` options for empty list inputs ([#8285](https://github.com/rapidsai/cudf/pull/8285)) [@ttnghia](https://github.com/ttnghia) +- Return python lists for __getitem__ calls to list type series ([#8265](https://github.com/rapidsai/cudf/pull/8265)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- add unit tests for lead/lag on list for row window ([#8259](https://github.com/rapidsai/cudf/pull/8259)) [@wbo4958](https://github.com/wbo4958) +- Create a String column from UTF8 String byte arrays ([#8257](https://github.com/rapidsai/cudf/pull/8257)) [@firestarman](https://github.com/firestarman) +- Support scattering `list_scalar` ([#8256](https://github.com/rapidsai/cudf/pull/8256)) [@isVoid](https://github.com/isVoid) +- Implement `lists::concatenate_list_elements` ([#8231](https://github.com/rapidsai/cudf/pull/8231)) [@ttnghia](https://github.com/ttnghia) +- Support for struct scalars. ([#8220](https://github.com/rapidsai/cudf/pull/8220)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add support for decimal types in ORC writer ([#8198](https://github.com/rapidsai/cudf/pull/8198)) [@vuule](https://github.com/vuule) +- Support create lists column from a `list_scalar` ([#8185](https://github.com/rapidsai/cudf/pull/8185)) [@isVoid](https://github.com/isVoid) +- `Groupby.shift` c++ API refactor and python binding ([#8131](https://github.com/rapidsai/cudf/pull/8131)) [@isVoid](https://github.com/isVoid) +- Add `groupby::replace_nulls(replace_policy)` api ([#7118](https://github.com/rapidsai/cudf/pull/7118)) [@isVoid](https://github.com/isVoid) + +## 🛠️ Improvements + +- Support Dask + Distributed 2021.05.1 ([#8392](https://github.com/rapidsai/cudf/pull/8392)) [@jakirkham](https://github.com/jakirkham) +- Add aliases for string methods ([#8353](https://github.com/rapidsai/cudf/pull/8353)) [@shwina](https://github.com/shwina) +- Update environment variable used to determine `cuda_version` ([#8321](https://github.com/rapidsai/cudf/pull/8321)) [@ajschmidt8](https://github.com/ajschmidt8) +- JNI: Refactor the code of making column from scalar ([#8310](https://github.com/rapidsai/cudf/pull/8310)) [@firestarman](https://github.com/firestarman) +- Update `CHANGELOG.md` links for calver ([#8303](https://github.com/rapidsai/cudf/pull/8303)) [@ajschmidt8](https://github.com/ajschmidt8) +- Merge `branch-0.19` into `branch-21.06` ([#8302](https://github.com/rapidsai/cudf/pull/8302)) [@ajschmidt8](https://github.com/ajschmidt8) +- use address and length for GDS reads/writes ([#8301](https://github.com/rapidsai/cudf/pull/8301)) [@rongou](https://github.com/rongou) +- Update cudfjni version to 21.06.0 ([#8292](https://github.com/rapidsai/cudf/pull/8292)) [@pxLi](https://github.com/pxLi) +- Update docs build script ([#8284](https://github.com/rapidsai/cudf/pull/8284)) [@ajschmidt8](https://github.com/ajschmidt8) +- Make device_buffer streams explicit and enforce move construction ([#8280](https://github.com/rapidsai/cudf/pull/8280)) [@harrism](https://github.com/harrism) +- Introduce a common parent class for NumericalColumn and DecimalColumn ([#8278](https://github.com/rapidsai/cudf/pull/8278)) [@vyasr](https://github.com/vyasr) +- Do not add nulls to the hash table when null_equality::NOT_EQUAL is passed to left_semi_join and left_anti_join ([#8277](https://github.com/rapidsai/cudf/pull/8277)) [@nvdbaranec](https://github.com/nvdbaranec) +- Enable implicit casting when concatenating mixed types ([#8276](https://github.com/rapidsai/cudf/pull/8276)) [@ChrisJar](https://github.com/ChrisJar) +- Fix CMake FindPackage rmm, pin dev envs' dlpack to v0.3 ([#8271](https://github.com/rapidsai/cudf/pull/8271)) [@trxcllnt](https://github.com/trxcllnt) +- Update cudfjni version to 21.06 ([#8267](https://github.com/rapidsai/cudf/pull/8267)) [@pxLi](https://github.com/pxLi) +- support RMM aligned resource adapter in JNI ([#8266](https://github.com/rapidsai/cudf/pull/8266)) [@rongou](https://github.com/rongou) +- Pass compiler environment variables to conda python build ([#8260](https://github.com/rapidsai/cudf/pull/8260)) [@Ethyling](https://github.com/Ethyling) +- Remove abc inheritance from Serializable ([#8254](https://github.com/rapidsai/cudf/pull/8254)) [@vyasr](https://github.com/vyasr) +- Move more methods into SingleColumnFrame ([#8253](https://github.com/rapidsai/cudf/pull/8253)) [@vyasr](https://github.com/vyasr) +- Update ORC statistics API to use C++17 standard library ([#8241](https://github.com/rapidsai/cudf/pull/8241)) [@vuule](https://github.com/vuule) +- Correct unused parameter warnings in dictionary algorithms ([#8239](https://github.com/rapidsai/cudf/pull/8239)) [@robertmaynard](https://github.com/robertmaynard) +- Correct unused parameters in the copying algorithms ([#8232](https://github.com/rapidsai/cudf/pull/8232)) [@robertmaynard](https://github.com/robertmaynard) +- IO statistics cleanup ([#8191](https://github.com/rapidsai/cudf/pull/8191)) [@kaatish](https://github.com/kaatish) +- Refactor of rolling_window implementation. ([#8158](https://github.com/rapidsai/cudf/pull/8158)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add a flag for allowing single quotes in JSON strings. ([#8144](https://github.com/rapidsai/cudf/pull/8144)) [@nvdbaranec](https://github.com/nvdbaranec) +- Column refactoring 2 ([#8130](https://github.com/rapidsai/cudf/pull/8130)) [@vyasr](https://github.com/vyasr) +- support space in workspace ([#7956](https://github.com/rapidsai/cudf/pull/7956)) [@jolorunyomi](https://github.com/jolorunyomi) +- Support collect_set on rolling window ([#7881](https://github.com/rapidsai/cudf/pull/7881)) [@sperlingxx](https://github.com/sperlingxx) + +# cuDF 0.19.0 (21 Apr 2021) + +## 🚨 Breaking Changes + +- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee) +- Allow merging index column with data column using keyword "on" ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source) +- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2) +- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism) +- Don't identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr) +- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2) +- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism) +- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt) +- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt) +- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret) +- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina) +- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport) +- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source) +- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt) +- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism) +- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard) +- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar) +- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia) +- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt) + +## 🐛 Bug Fixes + +- Fix a `NameError` in meta dispatch API ([#7996](https://github.com/rapidsai/cudf/pull/7996)) [@galipremsagar](https://github.com/galipremsagar) +- Reindex in `DataFrame.__setitem__` ([#7957](https://github.com/rapidsai/cudf/pull/7957)) [@galipremsagar](https://github.com/galipremsagar) +- jitify direct-to-cubin compilation and caching. ([#7919](https://github.com/rapidsai/cudf/pull/7919)) [@cwharris](https://github.com/cwharris) +- Use dynamic cudart for nvcomp in java build ([#7896](https://github.com/rapidsai/cudf/pull/7896)) [@abellina](https://github.com/abellina) +- fix "incompatible redefinition" warnings ([#7894](https://github.com/rapidsai/cudf/pull/7894)) [@cwharris](https://github.com/cwharris) +- cudf consistently specifies the cuda runtime ([#7887](https://github.com/rapidsai/cudf/pull/7887)) [@robertmaynard](https://github.com/robertmaynard) +- disable verbose output for jitify_preprocess ([#7886](https://github.com/rapidsai/cudf/pull/7886)) [@cwharris](https://github.com/cwharris) +- CMake jit_preprocess_files function only runs when needed ([#7872](https://github.com/rapidsai/cudf/pull/7872)) [@robertmaynard](https://github.com/robertmaynard) +- Push DeviceScalar construction into cython for list.contains ([#7864](https://github.com/rapidsai/cudf/pull/7864)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- cudf now sets an install rpath of $ORIGIN ([#7863](https://github.com/rapidsai/cudf/pull/7863)) [@robertmaynard](https://github.com/robertmaynard) +- Don't install Thrust examples, tests, docs, and python files ([#7811](https://github.com/rapidsai/cudf/pull/7811)) [@robertmaynard](https://github.com/robertmaynard) +- Sort by index in groupby tests more consistently ([#7802](https://github.com/rapidsai/cudf/pull/7802)) [@shwina](https://github.com/shwina) +- Revert "Update conda recipes pinning of repo dependencies ([#7743)" (#7793](https://github.com/rapidsai/cudf/pull/7743)" (#7793)) [@raydouglass](https://github.com/raydouglass) +- Add decimal column handling in copy_type_metadata ([#7788](https://github.com/rapidsai/cudf/pull/7788)) [@shwina](https://github.com/shwina) +- Add column names validation in parquet writer ([#7786](https://github.com/rapidsai/cudf/pull/7786)) [@galipremsagar](https://github.com/galipremsagar) +- Fix Java explode outer unit tests ([#7782](https://github.com/rapidsai/cudf/pull/7782)) [@jlowe](https://github.com/jlowe) +- Fix compiler warning about non-POD types passed through ellipsis ([#7781](https://github.com/rapidsai/cudf/pull/7781)) [@jrhemstad](https://github.com/jrhemstad) +- User resource fix for replace_nulls ([#7769](https://github.com/rapidsai/cudf/pull/7769)) [@magnatelee](https://github.com/magnatelee) +- Fix type dispatch for columnar replace_nulls ([#7768](https://github.com/rapidsai/cudf/pull/7768)) [@jlowe](https://github.com/jlowe) +- Add `ignore_order` parameter to dask-cudf concat dispatch ([#7765](https://github.com/rapidsai/cudf/pull/7765)) [@galipremsagar](https://github.com/galipremsagar) +- Fix slicing and arrow representations of decimal columns ([#7755](https://github.com/rapidsai/cudf/pull/7755)) [@vyasr](https://github.com/vyasr) +- Fixing issue with explode_outer position not nulling position entries of null rows ([#7754](https://github.com/rapidsai/cudf/pull/7754)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Implement scatter for struct columns ([#7752](https://github.com/rapidsai/cudf/pull/7752)) [@ttnghia](https://github.com/ttnghia) +- Fix data corruption in string columns ([#7746](https://github.com/rapidsai/cudf/pull/7746)) [@galipremsagar](https://github.com/galipremsagar) +- Fix string length in stripe dictionary building ([#7744](https://github.com/rapidsai/cudf/pull/7744)) [@kaatish](https://github.com/kaatish) +- Update conda recipes pinning of repo dependencies ([#7743](https://github.com/rapidsai/cudf/pull/7743)) [@mike-wendt](https://github.com/mike-wendt) +- Enable dask dispatch to cuDF's `is_categorical_dtype` for cuDF objects ([#7740](https://github.com/rapidsai/cudf/pull/7740)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix dictionary size computation in ORC writer ([#7737](https://github.com/rapidsai/cudf/pull/7737)) [@vuule](https://github.com/vuule) +- Fix `cudf::cast` overflow for `decimal64` to `int32_t` or smaller in certain cases ([#7733](https://github.com/rapidsai/cudf/pull/7733)) [@codereport](https://github.com/codereport) +- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2) +- Disable column_view data accessors for unsupported types ([#7725](https://github.com/rapidsai/cudf/pull/7725)) [@jrhemstad](https://github.com/jrhemstad) +- Materialize `RangeIndex` when `index=True` in parquet writer ([#7711](https://github.com/rapidsai/cudf/pull/7711)) [@galipremsagar](https://github.com/galipremsagar) +- Don't identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr) +- Fix return type of `DataFrame.argsort` ([#7706](https://github.com/rapidsai/cudf/pull/7706)) [@galipremsagar](https://github.com/galipremsagar) +- Fix/correct cudf installed package requirements ([#7688](https://github.com/rapidsai/cudf/pull/7688)) [@robertmaynard](https://github.com/robertmaynard) +- Fix SparkMurmurHash3_32 hash inconsistencies with Apache Spark ([#7672](https://github.com/rapidsai/cudf/pull/7672)) [@jlowe](https://github.com/jlowe) +- Fix ORC reader issue with reading empty string columns ([#7656](https://github.com/rapidsai/cudf/pull/7656)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2) +- Fixing empty null lists throwing explode_outer for a loop. ([#7649](https://github.com/rapidsai/cudf/pull/7649)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix internal compiler error during JNI Docker build ([#7645](https://github.com/rapidsai/cudf/pull/7645)) [@jlowe](https://github.com/jlowe) +- Fix Debug build break with device_uvectors in grouped_rolling.cu ([#7633](https://github.com/rapidsai/cudf/pull/7633)) [@mythrocks](https://github.com/mythrocks) +- Parquet reader: Fix issue when using skip_rows on non-nested columns containing nulls ([#7627](https://github.com/rapidsai/cudf/pull/7627)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix ORC reader for empty DataFrame/Table ([#7624](https://github.com/rapidsai/cudf/pull/7624)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Fix specifying GPU architecture in JNI build ([#7612](https://github.com/rapidsai/cudf/pull/7612)) [@jlowe](https://github.com/jlowe) +- Fix ORC writer OOM issue ([#7605](https://github.com/rapidsai/cudf/pull/7605)) [@vuule](https://github.com/vuule) +- Fix 0.18 --> 0.19 automerge ([#7589](https://github.com/rapidsai/cudf/pull/7589)) [@kkraus14](https://github.com/kkraus14) +- Fix ORC issue with incorrect timestamp nanosecond values ([#7581](https://github.com/rapidsai/cudf/pull/7581)) [@vuule](https://github.com/vuule) +- Fix missing Dask imports ([#7580](https://github.com/rapidsai/cudf/pull/7580)) [@kkraus14](https://github.com/kkraus14) +- CMAKE_CUDA_ARCHITECTURES doesn't change when build-system invokes cmake ([#7579](https://github.com/rapidsai/cudf/pull/7579)) [@robertmaynard](https://github.com/robertmaynard) +- Another fix for offsets_end() iterator in lists_column_view ([#7575](https://github.com/rapidsai/cudf/pull/7575)) [@ttnghia](https://github.com/ttnghia) +- Fix ORC writer output corruption with string columns ([#7565](https://github.com/rapidsai/cudf/pull/7565)) [@vuule](https://github.com/vuule) +- Fix cudf::lists::sort_lists failing for sliced column ([#7564](https://github.com/rapidsai/cudf/pull/7564)) [@ttnghia](https://github.com/ttnghia) +- FIX Fix Anaconda upload args ([#7558](https://github.com/rapidsai/cudf/pull/7558)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Fix index mismatch issue in equality related APIs ([#7555](https://github.com/rapidsai/cudf/pull/7555)) [@galipremsagar](https://github.com/galipremsagar) +- FIX Revert gpuci_conda_retry on conda file output locations ([#7552](https://github.com/rapidsai/cudf/pull/7552)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Fix offset_end iterator for lists_column_view, which was not correctl… ([#7551](https://github.com/rapidsai/cudf/pull/7551)) [@ttnghia](https://github.com/ttnghia) +- Fix no such file dlpack.h error when build libcudf ([#7549](https://github.com/rapidsai/cudf/pull/7549)) [@chenrui17](https://github.com/chenrui17) +- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar) +- Decimal32 Build Fix ([#7544](https://github.com/rapidsai/cudf/pull/7544)) [@razajafri](https://github.com/razajafri) +- FIX Retry conda output location ([#7540](https://github.com/rapidsai/cudf/pull/7540)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- fix missing renames of dask git branches from master to main ([#7535](https://github.com/rapidsai/cudf/pull/7535)) [@kkraus14](https://github.com/kkraus14) +- Remove detail from device_span ([#7533](https://github.com/rapidsai/cudf/pull/7533)) [@rwlee](https://github.com/rwlee) +- Change dask and distributed branch to main ([#7532](https://github.com/rapidsai/cudf/pull/7532)) [@dantegd](https://github.com/dantegd) +- Update JNI build to use CUDF_USE_ARROW_STATIC ([#7526](https://github.com/rapidsai/cudf/pull/7526)) [@jlowe](https://github.com/jlowe) +- Make sure rmm::rmm CMake target is visible to cudf users ([#7524](https://github.com/rapidsai/cudf/pull/7524)) [@robertmaynard](https://github.com/robertmaynard) +- Fix contiguous_split not properly handling output partitions > 2 GB. ([#7515](https://github.com/rapidsai/cudf/pull/7515)) [@nvdbaranec](https://github.com/nvdbaranec) +- Change jit launch to safe_launch ([#7510](https://github.com/rapidsai/cudf/pull/7510)) [@devavret](https://github.com/devavret) +- Fix comparison between Datetime/Timedelta columns and NULL scalars ([#7504](https://github.com/rapidsai/cudf/pull/7504)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix off-by-one error in char-parallel string scalar replace ([#7502](https://github.com/rapidsai/cudf/pull/7502)) [@jlowe](https://github.com/jlowe) +- Fix JNI deprecation of all, put it on the wrong version before ([#7501](https://github.com/rapidsai/cudf/pull/7501)) [@revans2](https://github.com/revans2) +- Fix Series/Dataframe Mixed Arithmetic ([#7491](https://github.com/rapidsai/cudf/pull/7491)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix JNI build after removal of libcudf sub-libraries ([#7486](https://github.com/rapidsai/cudf/pull/7486)) [@jlowe](https://github.com/jlowe) +- Correctly compile benchmarks ([#7485](https://github.com/rapidsai/cudf/pull/7485)) [@robertmaynard](https://github.com/robertmaynard) +- Fix bool column corruption with ORC Reader ([#7483](https://github.com/rapidsai/cudf/pull/7483)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Fix `__repr__` for categorical dtype ([#7476](https://github.com/rapidsai/cudf/pull/7476)) [@galipremsagar](https://github.com/galipremsagar) +- Java cleaner synchronization ([#7474](https://github.com/rapidsai/cudf/pull/7474)) [@abellina](https://github.com/abellina) +- Fix java float/double parsing tests ([#7473](https://github.com/rapidsai/cudf/pull/7473)) [@revans2](https://github.com/revans2) +- Pass stream and user resource to make_default_constructed_scalar ([#7469](https://github.com/rapidsai/cudf/pull/7469)) [@magnatelee](https://github.com/magnatelee) +- Improve stability of dask_cudf.DataFrame.var and dask_cudf.DataFrame.std ([#7453](https://github.com/rapidsai/cudf/pull/7453)) [@rjzamora](https://github.com/rjzamora) +- Missing `device_storage_dispatch` change affecting `cudf::gather` ([#7449](https://github.com/rapidsai/cudf/pull/7449)) [@codereport](https://github.com/codereport) +- fix cuFile JNI compile errors ([#7445](https://github.com/rapidsai/cudf/pull/7445)) [@rongou](https://github.com/rongou) +- Support `Series.__setitem__` with key to a new row ([#7443](https://github.com/rapidsai/cudf/pull/7443)) [@isVoid](https://github.com/isVoid) +- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source) +- Make inclusive scan safe for cases with leading nulls ([#7432](https://github.com/rapidsai/cudf/pull/7432)) [@magnatelee](https://github.com/magnatelee) +- Fix typo in list_device_view::pair_rep_end() ([#7423](https://github.com/rapidsai/cudf/pull/7423)) [@mythrocks](https://github.com/mythrocks) +- Fix string to double conversion and row equivalent comparison ([#7410](https://github.com/rapidsai/cudf/pull/7410)) [@ttnghia](https://github.com/ttnghia) +- Fix thrust failure when transferring data from device_vector to host_vector with vectors of size 1 ([#7382](https://github.com/rapidsai/cudf/pull/7382)) [@ttnghia](https://github.com/ttnghia) +- Fix std::exception catch-by-reference gcc9 compile error ([#7380](https://github.com/rapidsai/cudf/pull/7380)) [@davidwendt](https://github.com/davidwendt) +- Fix skiprows issue with ORC Reader ([#7359](https://github.com/rapidsai/cudf/pull/7359)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- fix Arrow CMake file ([#7358](https://github.com/rapidsai/cudf/pull/7358)) [@rongou](https://github.com/rongou) +- Fix lists::contains() for NaN and Decimals ([#7349](https://github.com/rapidsai/cudf/pull/7349)) [@mythrocks](https://github.com/mythrocks) +- Handle cupy array in `Dataframe.__setitem__` ([#7340](https://github.com/rapidsai/cudf/pull/7340)) [@galipremsagar](https://github.com/galipremsagar) +- Fix invalid-device-fn error in cudf::strings::replace_re with multiple regex's ([#7336](https://github.com/rapidsai/cudf/pull/7336)) [@davidwendt](https://github.com/davidwendt) +- FIX Add codecov upload block to gpu script ([#6860](https://github.com/rapidsai/cudf/pull/6860)) [@dillon-cullinan](https://github.com/dillon-cullinan) + +## 📖 Documentation + +- Fix join API doxygen ([#7890](https://github.com/rapidsai/cudf/pull/7890)) [@shwina](https://github.com/shwina) +- Add Resources to README. ([#7697](https://github.com/rapidsai/cudf/pull/7697)) [@bdice](https://github.com/bdice) +- Add `isin` examples in Docstring ([#7479](https://github.com/rapidsai/cudf/pull/7479)) [@galipremsagar](https://github.com/galipremsagar) +- Resolving unlinked type shorthands in cudf doc ([#7416](https://github.com/rapidsai/cudf/pull/7416)) [@isVoid](https://github.com/isVoid) +- Fix typo in regex.md doc page ([#7363](https://github.com/rapidsai/cudf/pull/7363)) [@davidwendt](https://github.com/davidwendt) +- Fix incorrect strings_column_view::chars_size documentation ([#7360](https://github.com/rapidsai/cudf/pull/7360)) [@jlowe](https://github.com/jlowe) + +## 🚀 New Features + +- Enable basic reductions for decimal columns ([#7776](https://github.com/rapidsai/cudf/pull/7776)) [@ChrisJar](https://github.com/ChrisJar) +- Enable join on decimal columns ([#7764](https://github.com/rapidsai/cudf/pull/7764)) [@ChrisJar](https://github.com/ChrisJar) +- Allow merging index column with data column using keyword "on" ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source) +- Implement DecimalColumn + Scalar and add cudf.Scalars of Decimal64Dtype ([#7732](https://github.com/rapidsai/cudf/pull/7732)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add support for `unique` groupby aggregation ([#7726](https://github.com/rapidsai/cudf/pull/7726)) [@shwina](https://github.com/shwina) +- Expose libcudf's label_bins function to cudf ([#7724](https://github.com/rapidsai/cudf/pull/7724)) [@vyasr](https://github.com/vyasr) +- Adding support for equi-join on struct ([#7720](https://github.com/rapidsai/cudf/pull/7720)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add decimal column comparison operations ([#7716](https://github.com/rapidsai/cudf/pull/7716)) [@isVoid](https://github.com/isVoid) +- Implement scan operations for decimal columns ([#7707](https://github.com/rapidsai/cudf/pull/7707)) [@ChrisJar](https://github.com/ChrisJar) +- Enable typecasting between decimal and int ([#7691](https://github.com/rapidsai/cudf/pull/7691)) [@ChrisJar](https://github.com/ChrisJar) +- Enable decimal support in parquet writer ([#7673](https://github.com/rapidsai/cudf/pull/7673)) [@devavret](https://github.com/devavret) +- Adds `list.unique` API ([#7664](https://github.com/rapidsai/cudf/pull/7664)) [@isVoid](https://github.com/isVoid) +- Fix NaN handling in drop_list_duplicates ([#7662](https://github.com/rapidsai/cudf/pull/7662)) [@ttnghia](https://github.com/ttnghia) +- Add `lists.sort_values` API ([#7657](https://github.com/rapidsai/cudf/pull/7657)) [@isVoid](https://github.com/isVoid) +- Add is_integer API that can check for the validity of a string-to-integer conversion ([#7642](https://github.com/rapidsai/cudf/pull/7642)) [@ttnghia](https://github.com/ttnghia) +- Adds `explode` API ([#7607](https://github.com/rapidsai/cudf/pull/7607)) [@isVoid](https://github.com/isVoid) +- Adds `list.take`, python binding for `cudf::lists::segmented_gather` ([#7591](https://github.com/rapidsai/cudf/pull/7591)) [@isVoid](https://github.com/isVoid) +- Implement cudf::label_bins() ([#7554](https://github.com/rapidsai/cudf/pull/7554)) [@vyasr](https://github.com/vyasr) +- Add Python bindings for `lists::contains` ([#7547](https://github.com/rapidsai/cudf/pull/7547)) [@skirui-source](https://github.com/skirui-source) +- cudf::row_bit_count() support. ([#7534](https://github.com/rapidsai/cudf/pull/7534)) [@nvdbaranec](https://github.com/nvdbaranec) +- Implement drop_list_duplicates ([#7528](https://github.com/rapidsai/cudf/pull/7528)) [@ttnghia](https://github.com/ttnghia) +- Add Python bindings for `lists::extract_lists_element` ([#7505](https://github.com/rapidsai/cudf/pull/7505)) [@skirui-source](https://github.com/skirui-source) +- Add explode_outer and explode_outer_position ([#7499](https://github.com/rapidsai/cudf/pull/7499)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret) +- Enable type conversion from float to decimal type ([#7450](https://github.com/rapidsai/cudf/pull/7450)) [@ChrisJar](https://github.com/ChrisJar) +- Add cython for converting strings/fixed-point functions ([#7429](https://github.com/rapidsai/cudf/pull/7429)) [@davidwendt](https://github.com/davidwendt) +- Add struct column support to cudf::sort and cudf::sorted_order ([#7422](https://github.com/rapidsai/cudf/pull/7422)) [@karthikeyann](https://github.com/karthikeyann) +- Implement groupby collect_set ([#7420](https://github.com/rapidsai/cudf/pull/7420)) [@ttnghia](https://github.com/ttnghia) +- Merge branch-0.18 into branch-0.19 ([#7411](https://github.com/rapidsai/cudf/pull/7411)) [@raydouglass](https://github.com/raydouglass) +- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism) +- Add groupby scan operations (sort groupby) ([#7387](https://github.com/rapidsai/cudf/pull/7387)) [@karthikeyann](https://github.com/karthikeyann) +- Add cudf::explode_position ([#7376](https://github.com/rapidsai/cudf/pull/7376)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Add string conversion to/from decimal values libcudf APIs ([#7364](https://github.com/rapidsai/cudf/pull/7364)) [@davidwendt](https://github.com/davidwendt) +- Add groupby SUM_OF_SQUARES support ([#7362](https://github.com/rapidsai/cudf/pull/7362)) [@karthikeyann](https://github.com/karthikeyann) +- Add `Series.drop` api ([#7304](https://github.com/rapidsai/cudf/pull/7304)) [@isVoid](https://github.com/isVoid) +- get_json_object() implementation ([#7286](https://github.com/rapidsai/cudf/pull/7286)) [@nvdbaranec](https://github.com/nvdbaranec) +- Python API for `LIstMethods.len()` ([#7283](https://github.com/rapidsai/cudf/pull/7283)) [@isVoid](https://github.com/isVoid) +- Support null_policy::EXCLUDE for COLLECT rolling aggregation ([#7264](https://github.com/rapidsai/cudf/pull/7264)) [@mythrocks](https://github.com/mythrocks) +- Add support for special tokens in nvtext::subword_tokenizer ([#7254](https://github.com/rapidsai/cudf/pull/7254)) [@davidwendt](https://github.com/davidwendt) +- Fix inplace update of data and add Series.update ([#7201](https://github.com/rapidsai/cudf/pull/7201)) [@galipremsagar](https://github.com/galipremsagar) +- Implement `cudf::group_by` (hash) for `decimal32` and `decimal64` ([#7190](https://github.com/rapidsai/cudf/pull/7190)) [@codereport](https://github.com/codereport) +- Adding support to specify "level" parameter for `Dataframe.rename` ([#7135](https://github.com/rapidsai/cudf/pull/7135)) [@skirui-source](https://github.com/skirui-source) + +## 🛠️ Improvements + +- fix GDS include path for version 0.95 ([#7877](https://github.com/rapidsai/cudf/pull/7877)) [@rongou](https://github.com/rongou) +- Update `dask` + `distributed` to `2021.4.0` ([#7858](https://github.com/rapidsai/cudf/pull/7858)) [@jakirkham](https://github.com/jakirkham) +- Add ability to extract include dirs from `CUDF_HOME` ([#7848](https://github.com/rapidsai/cudf/pull/7848)) [@galipremsagar](https://github.com/galipremsagar) +- Add USE_GDS as an option in build script ([#7833](https://github.com/rapidsai/cudf/pull/7833)) [@pxLi](https://github.com/pxLi) +- add an allocate method with stream in java DeviceMemoryBuffer ([#7826](https://github.com/rapidsai/cudf/pull/7826)) [@rongou](https://github.com/rongou) +- Constrain dask and distributed versions to 2021.3.1 ([#7825](https://github.com/rapidsai/cudf/pull/7825)) [@shwina](https://github.com/shwina) +- Revert dask versioning of concat dispatch ([#7823](https://github.com/rapidsai/cudf/pull/7823)) [@galipremsagar](https://github.com/galipremsagar) +- add copy methods in Java memory buffer ([#7791](https://github.com/rapidsai/cudf/pull/7791)) [@rongou](https://github.com/rongou) +- Update README and CONTRIBUTING for 0.19 ([#7778](https://github.com/rapidsai/cudf/pull/7778)) [@robertmaynard](https://github.com/robertmaynard) +- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee) +- Turn on NVTX by default in java build ([#7761](https://github.com/rapidsai/cudf/pull/7761)) [@tgravescs](https://github.com/tgravescs) +- Add Java bindings to join gather map APIs ([#7751](https://github.com/rapidsai/cudf/pull/7751)) [@jlowe](https://github.com/jlowe) +- Add replacements column support for Java replaceNulls ([#7750](https://github.com/rapidsai/cudf/pull/7750)) [@jlowe](https://github.com/jlowe) +- Add Java bindings for row_bit_count ([#7749](https://github.com/rapidsai/cudf/pull/7749)) [@jlowe](https://github.com/jlowe) +- Remove unused JVM array creation ([#7748](https://github.com/rapidsai/cudf/pull/7748)) [@jlowe](https://github.com/jlowe) +- Added JNI support for new is_integer ([#7739](https://github.com/rapidsai/cudf/pull/7739)) [@revans2](https://github.com/revans2) +- Create and promote library aliases in libcudf installations ([#7734](https://github.com/rapidsai/cudf/pull/7734)) [@trxcllnt](https://github.com/trxcllnt) +- Support groupby operations for decimal dtypes ([#7731](https://github.com/rapidsai/cudf/pull/7731)) [@vyasr](https://github.com/vyasr) +- Memory map the input file only when GDS compatibility mode is not used ([#7717](https://github.com/rapidsai/cudf/pull/7717)) [@vuule](https://github.com/vuule) +- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism) +- Struct hashing support for SerialMurmur3 and SparkMurmur3 ([#7714](https://github.com/rapidsai/cudf/pull/7714)) [@jlowe](https://github.com/jlowe) +- Add gbenchmark for nvtext replace-tokens function ([#7708](https://github.com/rapidsai/cudf/pull/7708)) [@davidwendt](https://github.com/davidwendt) +- Use stream in groupby calls ([#7705](https://github.com/rapidsai/cudf/pull/7705)) [@karthikeyann](https://github.com/karthikeyann) +- Update codeowners file ([#7701](https://github.com/rapidsai/cudf/pull/7701)) [@ajschmidt8](https://github.com/ajschmidt8) +- Cleanup groupby to use host_span, device_span, device_uvector ([#7698](https://github.com/rapidsai/cudf/pull/7698)) [@karthikeyann](https://github.com/karthikeyann) +- Add gbenchmark for nvtext ngrams functions ([#7693](https://github.com/rapidsai/cudf/pull/7693)) [@davidwendt](https://github.com/davidwendt) +- Misc Python/Cython optimizations ([#7686](https://github.com/rapidsai/cudf/pull/7686)) [@shwina](https://github.com/shwina) +- Add gbenchmark for nvtext tokenize functions ([#7684](https://github.com/rapidsai/cudf/pull/7684)) [@davidwendt](https://github.com/davidwendt) +- Add column_device_view to orc writer ([#7676](https://github.com/rapidsai/cudf/pull/7676)) [@kaatish](https://github.com/kaatish) +- cudf_kafka now uses cuDF CMake export targets (CPM) ([#7674](https://github.com/rapidsai/cudf/pull/7674)) [@robertmaynard](https://github.com/robertmaynard) +- Add gbenchmark for nvtext normalize functions ([#7668](https://github.com/rapidsai/cudf/pull/7668)) [@davidwendt](https://github.com/davidwendt) +- Resolve unnecessary import of thrust/optional.hpp in types.hpp ([#7667](https://github.com/rapidsai/cudf/pull/7667)) [@vyasr](https://github.com/vyasr) +- Feature/optimize accessor copy ([#7660](https://github.com/rapidsai/cudf/pull/7660)) [@vyasr](https://github.com/vyasr) +- Fix `find_package(cudf)` ([#7658](https://github.com/rapidsai/cudf/pull/7658)) [@trxcllnt](https://github.com/trxcllnt) +- Work-around for gcc7 compile error on Centos7 ([#7652](https://github.com/rapidsai/cudf/pull/7652)) [@davidwendt](https://github.com/davidwendt) +- Add in JNI support for count_elements ([#7651](https://github.com/rapidsai/cudf/pull/7651)) [@revans2](https://github.com/revans2) +- Fix issues with building cudf in a non-conda environment ([#7647](https://github.com/rapidsai/cudf/pull/7647)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor ConfigureCUDA to not conditionally insert compiler flags ([#7643](https://github.com/rapidsai/cudf/pull/7643)) [@robertmaynard](https://github.com/robertmaynard) +- Add gbenchmark for converting strings to/from timestamps ([#7641](https://github.com/rapidsai/cudf/pull/7641)) [@davidwendt](https://github.com/davidwendt) +- Handle constructing a `cudf.Scalar` from a `cudf.Scalar` ([#7639](https://github.com/rapidsai/cudf/pull/7639)) [@shwina](https://github.com/shwina) +- Add in JNI support for table partition ([#7637](https://github.com/rapidsai/cudf/pull/7637)) [@revans2](https://github.com/revans2) +- Add explicit fixed_point merge test ([#7635](https://github.com/rapidsai/cudf/pull/7635)) [@codereport](https://github.com/codereport) +- Add JNI support for IDENTITY hash partitioning ([#7626](https://github.com/rapidsai/cudf/pull/7626)) [@revans2](https://github.com/revans2) +- Java support on explode_outer ([#7625](https://github.com/rapidsai/cudf/pull/7625)) [@sperlingxx](https://github.com/sperlingxx) +- Java support of casting string from/to decimal ([#7623](https://github.com/rapidsai/cudf/pull/7623)) [@sperlingxx](https://github.com/sperlingxx) +- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism) +- Add gbenchmark for cudf::strings::translate function ([#7617](https://github.com/rapidsai/cudf/pull/7617)) [@davidwendt](https://github.com/davidwendt) +- Use file(COPY ) over file(INSTALL ) so cmake output is reduced ([#7616](https://github.com/rapidsai/cudf/pull/7616)) [@robertmaynard](https://github.com/robertmaynard) +- Use rmm::device_uvector in place of rmm::device_vector for ORC reader/writer and cudf::io::column_buffer ([#7614](https://github.com/rapidsai/cudf/pull/7614)) [@vuule](https://github.com/vuule) +- Refactor Java host-side buffer concatenation to expose separate steps ([#7610](https://github.com/rapidsai/cudf/pull/7610)) [@jlowe](https://github.com/jlowe) +- Add gbenchmarks for string substrings functions ([#7603](https://github.com/rapidsai/cudf/pull/7603)) [@davidwendt](https://github.com/davidwendt) +- Refactor string conversion check ([#7599](https://github.com/rapidsai/cudf/pull/7599)) [@ttnghia](https://github.com/ttnghia) +- JNI: Pass names of children struct columns to native Arrow IPC writer ([#7598](https://github.com/rapidsai/cudf/pull/7598)) [@firestarman](https://github.com/firestarman) +- Revert "ENH Fix stale GHA and prevent duplicates " ([#7595](https://github.com/rapidsai/cudf/pull/7595)) [@mike-wendt](https://github.com/mike-wendt) +- ENH Fix stale GHA and prevent duplicates ([#7594](https://github.com/rapidsai/cudf/pull/7594)) [@mike-wendt](https://github.com/mike-wendt) +- Fix auto-detecting GPU architectures ([#7593](https://github.com/rapidsai/cudf/pull/7593)) [@trxcllnt](https://github.com/trxcllnt) +- Reduce cudf library size ([#7583](https://github.com/rapidsai/cudf/pull/7583)) [@robertmaynard](https://github.com/robertmaynard) +- Optimize cudf::make_strings_column for long strings ([#7576](https://github.com/rapidsai/cudf/pull/7576)) [@davidwendt](https://github.com/davidwendt) +- Always build and export the cudf::cudftestutil target ([#7574](https://github.com/rapidsai/cudf/pull/7574)) [@trxcllnt](https://github.com/trxcllnt) +- Eliminate literal parameters to uvector::set_element_async and device_scalar::set_value ([#7563](https://github.com/rapidsai/cudf/pull/7563)) [@harrism](https://github.com/harrism) +- Add gbenchmark for strings::concatenate ([#7560](https://github.com/rapidsai/cudf/pull/7560)) [@davidwendt](https://github.com/davidwendt) +- Update Changelog Link ([#7550](https://github.com/rapidsai/cudf/pull/7550)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add gbenchmarks for strings replace regex functions ([#7541](https://github.com/rapidsai/cudf/pull/7541)) [@davidwendt](https://github.com/davidwendt) +- Add `__repr__` for Column and ColumnAccessor ([#7531](https://github.com/rapidsai/cudf/pull/7531)) [@shwina](https://github.com/shwina) +- Support Decimal DIV changes in cudf ([#7527](https://github.com/rapidsai/cudf/pull/7527)) [@razajafri](https://github.com/razajafri) +- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt) +- Use device_uvector, device_span in sort groupby ([#7523](https://github.com/rapidsai/cudf/pull/7523)) [@karthikeyann](https://github.com/karthikeyann) +- Add gbenchmarks for strings extract function ([#7522](https://github.com/rapidsai/cudf/pull/7522)) [@davidwendt](https://github.com/davidwendt) +- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt) +- Reduce compile time/size for scan.cu ([#7516](https://github.com/rapidsai/cudf/pull/7516)) [@davidwendt](https://github.com/davidwendt) +- Change device_vector to device_uvector in nvtext source files ([#7512](https://github.com/rapidsai/cudf/pull/7512)) [@davidwendt](https://github.com/davidwendt) +- Removed unneeded includes from traits.hpp ([#7509](https://github.com/rapidsai/cudf/pull/7509)) [@davidwendt](https://github.com/davidwendt) +- FIX Remove random build directory generation for ccache ([#7508](https://github.com/rapidsai/cudf/pull/7508)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- xfail failing pytest in pandas 1.2.3 ([#7507](https://github.com/rapidsai/cudf/pull/7507)) [@galipremsagar](https://github.com/galipremsagar) +- JNI bit cast ([#7493](https://github.com/rapidsai/cudf/pull/7493)) [@revans2](https://github.com/revans2) +- Combine rolling window function tests ([#7480](https://github.com/rapidsai/cudf/pull/7480)) [@mythrocks](https://github.com/mythrocks) +- Prepare Changelog for Automation ([#7477](https://github.com/rapidsai/cudf/pull/7477)) [@ajschmidt8](https://github.com/ajschmidt8) +- Java support for explode position ([#7471](https://github.com/rapidsai/cudf/pull/7471)) [@sperlingxx](https://github.com/sperlingxx) +- Update 0.18 changelog entry ([#7463](https://github.com/rapidsai/cudf/pull/7463)) [@ajschmidt8](https://github.com/ajschmidt8) +- JNI: Support skipping nulls for collect aggregation ([#7457](https://github.com/rapidsai/cudf/pull/7457)) [@firestarman](https://github.com/firestarman) +- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina) +- Remove dependence on managed memory for multimap test ([#7451](https://github.com/rapidsai/cudf/pull/7451)) [@jrhemstad](https://github.com/jrhemstad) +- Use cuFile for Parquet IO when available ([#7444](https://github.com/rapidsai/cudf/pull/7444)) [@vuule](https://github.com/vuule) +- Statistics cleanup ([#7439](https://github.com/rapidsai/cudf/pull/7439)) [@kaatish](https://github.com/kaatish) +- Add gbenchmarks for strings filter functions ([#7438](https://github.com/rapidsai/cudf/pull/7438)) [@davidwendt](https://github.com/davidwendt) +- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport) +- Improve string gather performance ([#7433](https://github.com/rapidsai/cudf/pull/7433)) [@jlowe](https://github.com/jlowe) +- Don't use user resource for a temporary allocation in sort_by_key ([#7431](https://github.com/rapidsai/cudf/pull/7431)) [@magnatelee](https://github.com/magnatelee) +- Detail APIs for datetime functions ([#7430](https://github.com/rapidsai/cudf/pull/7430)) [@magnatelee](https://github.com/magnatelee) +- Replace thrust::max_element with thrust::reduce in strings findall_re ([#7428](https://github.com/rapidsai/cudf/pull/7428)) [@davidwendt](https://github.com/davidwendt) +- Add gbenchmark for strings split/split_record functions ([#7427](https://github.com/rapidsai/cudf/pull/7427)) [@davidwendt](https://github.com/davidwendt) +- Update JNI build to use CMAKE_CUDA_ARCHITECTURES ([#7425](https://github.com/rapidsai/cudf/pull/7425)) [@jlowe](https://github.com/jlowe) +- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt) +- Simplify type dispatch with `device_storage_dispatch` ([#7419](https://github.com/rapidsai/cudf/pull/7419)) [@codereport](https://github.com/codereport) +- Java support for casting of nested child columns ([#7417](https://github.com/rapidsai/cudf/pull/7417)) [@razajafri](https://github.com/razajafri) +- Improve scalar string replace performance for long strings ([#7415](https://github.com/rapidsai/cudf/pull/7415)) [@jlowe](https://github.com/jlowe) +- Remove unneeded temporary device vector for strings scatter specialization ([#7409](https://github.com/rapidsai/cudf/pull/7409)) [@davidwendt](https://github.com/davidwendt) +- bitmask_or implementation with bitmask refactor ([#7406](https://github.com/rapidsai/cudf/pull/7406)) [@rwlee](https://github.com/rwlee) +- Add other cudf::strings::replace functions to current strings replace gbenchmark ([#7403](https://github.com/rapidsai/cudf/pull/7403)) [@davidwendt](https://github.com/davidwendt) +- Clean up included headers in `device_operators.cuh` ([#7401](https://github.com/rapidsai/cudf/pull/7401)) [@codereport](https://github.com/codereport) +- Move nullable index iterator to indexalator factory ([#7399](https://github.com/rapidsai/cudf/pull/7399)) [@davidwendt](https://github.com/davidwendt) +- ENH Pass ccache variables to conda recipe & use Ninja in CI ([#7398](https://github.com/rapidsai/cudf/pull/7398)) [@Ethyling](https://github.com/Ethyling) +- upgrade maven-antrun-plugin to support maven parallel builds ([#7393](https://github.com/rapidsai/cudf/pull/7393)) [@rongou](https://github.com/rongou) +- Add gbenchmark for strings find/contains functions ([#7392](https://github.com/rapidsai/cudf/pull/7392)) [@davidwendt](https://github.com/davidwendt) +- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard) +- Refactor libcudf strings::replace to use make_strings_children utility ([#7384](https://github.com/rapidsai/cudf/pull/7384)) [@davidwendt](https://github.com/davidwendt) +- Added in JNI support for out of core sort algorithm ([#7381](https://github.com/rapidsai/cudf/pull/7381)) [@revans2](https://github.com/revans2) +- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar) +- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia) +- jitify 2 support ([#7372](https://github.com/rapidsai/cudf/pull/7372)) [@cwharris](https://github.com/cwharris) +- compile_udf: Cache PTX for similar functions ([#7371](https://github.com/rapidsai/cudf/pull/7371)) [@gmarkall](https://github.com/gmarkall) +- Add string scalar replace benchmark ([#7369](https://github.com/rapidsai/cudf/pull/7369)) [@jlowe](https://github.com/jlowe) +- Add gbenchmark for strings contains_re/count_re functions ([#7366](https://github.com/rapidsai/cudf/pull/7366)) [@davidwendt](https://github.com/davidwendt) +- Update orc reader and writer fuzz tests ([#7357](https://github.com/rapidsai/cudf/pull/7357)) [@galipremsagar](https://github.com/galipremsagar) +- Improve url_decode performance for long strings ([#7353](https://github.com/rapidsai/cudf/pull/7353)) [@jlowe](https://github.com/jlowe) +- `cudf::ast` Small Refactorings ([#7352](https://github.com/rapidsai/cudf/pull/7352)) [@codereport](https://github.com/codereport) +- Remove std::cout and print in the scatter test function EmptyListsOfNullableStrings. ([#7342](https://github.com/rapidsai/cudf/pull/7342)) [@ttnghia](https://github.com/ttnghia) +- Use `cudf::detail::make_counting_transform_iterator` ([#7338](https://github.com/rapidsai/cudf/pull/7338)) [@codereport](https://github.com/codereport) +- Change block size parameter from a global to a template param. ([#7333](https://github.com/rapidsai/cudf/pull/7333)) [@nvdbaranec](https://github.com/nvdbaranec) +- Partial clean up of ORC writer ([#7324](https://github.com/rapidsai/cudf/pull/7324)) [@vuule](https://github.com/vuule) +- Add gbenchmark for cudf::strings::to_lower ([#7316](https://github.com/rapidsai/cudf/pull/7316)) [@davidwendt](https://github.com/davidwendt) +- Update Java bindings version to 0.19-SNAPSHOT ([#7307](https://github.com/rapidsai/cudf/pull/7307)) [@pxLi](https://github.com/pxLi) +- Move `cudf::test::make_counting_transform_iterator` to `cudf/detail/iterator.cuh` ([#7306](https://github.com/rapidsai/cudf/pull/7306)) [@codereport](https://github.com/codereport) +- Use string literals in `fixed_point` `release_assert`s ([#7303](https://github.com/rapidsai/cudf/pull/7303)) [@codereport](https://github.com/codereport) +- Fix merge conflicts for #7295 ([#7297](https://github.com/rapidsai/cudf/pull/7297)) [@ajschmidt8](https://github.com/ajschmidt8) +- Add UTF-8 chars to create_random_column<string_view> benchmark utility ([#7292](https://github.com/rapidsai/cudf/pull/7292)) [@davidwendt](https://github.com/davidwendt) +- Abstracting block reduce and block scan from cuIO kernels with `cub` apis ([#7278](https://github.com/rapidsai/cudf/pull/7278)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Build.sh use cmake --build to drive build system invocation ([#7270](https://github.com/rapidsai/cudf/pull/7270)) [@robertmaynard](https://github.com/robertmaynard) +- Refactor dictionary support for reductions any/all ([#7242](https://github.com/rapidsai/cudf/pull/7242)) [@davidwendt](https://github.com/davidwendt) +- Replace stream.value() with stream for stream_view args ([#7236](https://github.com/rapidsai/cudf/pull/7236)) [@karthikeyann](https://github.com/karthikeyann) +- Interval index and interval_range ([#7182](https://github.com/rapidsai/cudf/pull/7182)) [@marlenezw](https://github.com/marlenezw) +- avro reader integration tests ([#7156](https://github.com/rapidsai/cudf/pull/7156)) [@cwharris](https://github.com/cwharris) +- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt) +- Adding Interval Dtype ([#6984](https://github.com/rapidsai/cudf/pull/6984)) [@marlenezw](https://github.com/marlenezw) +- Cleaning up `for` loops with `make_(counting_)transform_iterator` ([#6546](https://github.com/rapidsai/cudf/pull/6546)) [@codereport](https://github.com/codereport) + +# cuDF 0.18.0 (24 Feb 2021) + +## Breaking Changes 🚨 + +- Default `groupby` to `sort=False` (#7180) @isVoid +- Add libcudf API for parsing of ORC statistics (#7136) @vuule +- Replace ORC writer api with class (#7099) @rgsl888prabhu +- Pack/unpack functionality to convert tables to and from a serialized format. (#7096) @nvdbaranec +- Replace parquet writer api with class (#7058) @rgsl888prabhu +- Add days check to cudf::is_timestamp using cuda::std::chrono classes (#7028) @davidwendt +- Fix default parameter values of `write_csv` and `write_parquet` (#6967) @vuule +- Align `Series.groupby` API to match Pandas (#6964) @kkraus14 +- Share `factorize` implementation with Index and cudf module (#6885) @brandon-b-miller + +## Bug Fixes 🐛 + +- Remove incorrect std::move call on return variable (#7319) @davidwendt +- Fix failing CI ORC test (#7313) @vuule +- Disallow constructing frames from a ColumnAccessor (#7298) @shwina +- fix java cuFile tests (#7296) @rongou +- Fix style issues related to NumPy (#7279) @shwina +- Fix bug when `iloc` slice terminates at before-the-zero position (#7277) @isVoid +- Fix copying dtype metadata after calling libcudf functions (#7271) @shwina +- Move lists utility function definition out of header (#7266) @mythrocks +- Throw if bool column would cause incorrect result when writing to ORC (#7261) @vuule +- Use `uvector` in `replace_nulls`; Fix `sort_helper::grouped_value` doc (#7256) @isVoid +- Remove floating point types from cudf::sort fast-path (#7250) @davidwendt +- Disallow picking output columns from nested columns. (#7248) @devavret +- Fix `loc` for Series with a MultiIndex (#7243) @shwina +- Fix Arrow column test leaks (#7241) @tgravescs +- Fix test column vector leak (#7238) @kuhushukla +- Fix some bugs in java scalar support for decimal (#7237) @revans2 +- Improve `assert_eq` handling of scalar (#7220) @isVoid +- Fix missing null_count() comparison in test framework and related failures (#7219) @nvdbaranec +- Remove floating point types from radix sort fast-path (#7215) @davidwendt +- Fixing parquet benchmarks (#7214) @rgsl888prabhu +- Handle various parameter combinations in `replace` API (#7207) @galipremsagar +- Export mock aws credentials for s3 tests (#7176) @ayushdg +- Add `MultiIndex.rename` API (#7172) @isVoid +- Fix importing list & struct types in `from_arrow` (#7162) @galipremsagar +- Fixing parquet precision writing failing if scale is equal to precision (#7146) @hyperbolic2346 +- Update s3 tests to use moto_server (#7144) @ayushdg +- Fix JIT cache multi-process test flakiness in slow drives (#7142) @devavret +- Fix compilation errors in libcudf (#7138) @galipremsagar +- Fix compilation failure caused by `-Wall` addition. (#7134) @codereport +- Add informative error message for `sep` in CSV writer (#7095) @galipremsagar +- Add JIT cache per compute capability (#7090) @devavret +- Implement `__hash__` method for ListDtype (#7081) @galipremsagar +- Only upload packages that were built (#7077) @raydouglass +- Fix comparisons between Series and cudf.NA (#7072) @brandon-b-miller +- Handle `nan` values correctly in `Series.one_hot_encoding` (#7059) @galipremsagar +- Add `unstack()` support for non-multiindexed dataframes (#7054) @isVoid +- Fix `read_orc` for decimal type (#7034) @rgsl888prabhu +- Fix backward compatibility of loading a 0.16 pkl file (#7033) @galipremsagar +- Decimal casts in JNI became a NOOP (#7032) @revans2 +- Restore usual instance/subclass checking to cudf.DateOffset (#7029) @shwina +- Add days check to cudf::is_timestamp using cuda::std::chrono classes (#7028) @davidwendt +- Fix to_csv delimiter handling of timestamp format (#7023) @davidwendt +- Pin librdkakfa to gcc 7 compatible version (#7021) @raydouglass +- Fix `fillna` & `dropna` to also consider `np.nan` as a missing value (#7019) @galipremsagar +- Fix round operator's HALF_EVEN computation for negative integers (#7014) @nartal1 +- Skip Thrust sort patch if already applied (#7009) @harrism +- Fix `cudf::hash_partition` for `decimal32` and `decimal64` (#7006) @codereport +- Fix Thrust unroll patch command (#7002) @harrism +- Fix loc behaviour when key of incorrect type is used (#6993) @shwina +- Fix int to datetime conversion in csv_read (#6991) @kaatish +- fix excluding cufile tests by default (#6988) @rongou +- Fix java cufile tests when cufile is not installed (#6987) @revans2 +- Make `cudf::round` for `fixed_point` when `scale = -decimal_places` a no-op (#6975) @codereport +- Fix type comparison for java (#6970) @revans2 +- Fix default parameter values of `write_csv` and `write_parquet` (#6967) @vuule +- Align `Series.groupby` API to match Pandas (#6964) @kkraus14 +- Fix timestamp parsing in ORC reader for timezones without transitions (#6959) @vuule +- Fix typo in numerical.py (#6957) @rgsl888prabhu +- `fixed_point_value` double-shifts in `fixed_point` construction (#6950) @codereport +- fix libcu++ include path for jni (#6948) @rongou +- Fix groupby agg/apply behaviour when no key columns are provided (#6945) @shwina +- Avoid inserting null elements into join hash table when nulls are treated as unequal (#6943) @hyperbolic2346 +- Fix cudf::merge gtest for dictionary columns (#6942) @davidwendt +- Pass numeric scalars of the same dtype through numeric binops (#6938) @brandon-b-miller +- Fix N/A detection for empty fields in CSV reader (#6922) @vuule +- Fix rmm_mode=managed parameter for gtests (#6912) @davidwendt +- Fix nullmask offset handling in parquet and orc writer (#6889) @kaatish +- Correct the sampling range when sampling with replacement (#6884) @ChrisJar +- Handle nested string columns with no children in contiguous_split. (#6864) @nvdbaranec +- Fix `columns` & `index` handling in dataframe constructor (#6838) @galipremsagar + +## Documentation 📖 + +- Update readme (#7318) @shwina +- Fix typo in cudf.core.column.string.extract docs (#7253) @adelevie +- Update doxyfile project number (#7161) @davidwendt +- Update 10 minutes to cuDF and CuPy with new APIs (#7158) @ChrisJar +- Cross link RMM & libcudf Doxygen docs (#7149) @ajschmidt8 +- Add documentation for support dtypes in all IO formats (#7139) @galipremsagar +- Add groupby docs (#7100) @shwina +- Update cudf python docstrings with new null representation (`<NA>`) (#7050) @galipremsagar +- Make Doxygen comments formatting consistent (#7041) @vuule +- Add docs for working with missing data (#7010) @galipremsagar +- Remove warning in from_dlpack and to_dlpack methods (#7001) @miguelusque +- libcudf Developer Guide (#6977) @harrism +- Add JNI wrapper for the cuFile API (GDS) (#6940) @rongou + +## New Features 🚀 + +- Support `numeric_only` field for `rank()` (#7213) @isVoid +- Add support for `cudf::binary_operation` `TRUE_DIV` for `decimal32` and `decimal64` (#7198) @codereport +- Implement COLLECT rolling window aggregation (#7189) @mythrocks +- Add support for array-like inputs in `cudf.get_dummies` (#7181) @galipremsagar +- Default `groupby` to `sort=False` (#7180) @isVoid +- Add libcudf lists column count_elements API (#7173) @davidwendt +- Implement `cudf::group_by` (sort) for `decimal32` and `decimal64` (#7169) @codereport +- Add encoding and compression argument to CSV writer (#7168) @VibhuJawa +- `cudf::rolling_window` `SUM` support for `decimal32` and `decimal64` (#7147) @codereport +- Adding support for explode to cuDF (#7140) @hyperbolic2346 +- Add libcudf API for parsing of ORC statistics (#7136) @vuule +- update GDS/cuFile location for 0.9 release (#7131) @rongou +- Add Segmented sort (#7122) @karthikeyann +- Add `cudf::binary_operation` `NULL_MIN`, `NULL_MAX` & `NULL_EQUALS` for `decimal32` and `decimal64` (#7119) @codereport +- Add `scale` and `value` methods to `fixed_point` (#7109) @codereport +- Replace ORC writer api with class (#7099) @rgsl888prabhu +- Pack/unpack functionality to convert tables to and from a serialized format. (#7096) @nvdbaranec +- Improve `digitize` API (#7071) @isVoid +- Add List types support in data generator (#7064) @galipremsagar +- `cudf::scan` support for `decimal32` and `decimal64` (#7063) @codereport +- `cudf::rolling` `ROW_NUMBER` support for `decimal32` and `decimal64` (#7061) @codereport +- Replace parquet writer api with class (#7058) @rgsl888prabhu +- Support contains() on lists of primitives (#7039) @mythrocks +- Implement `cudf::rolling` for `decimal32` and `decimal64` (#7037) @codereport +- Add `ffill` and `bfill` to string columns (#7036) @isVoid +- Enable round in cudf for DataFrame and Series (#7022) @ChrisJar +- Extend `replace_nulls_policy` to `string` and `dictionary` type (#7004) @isVoid +- Add segmented_gather(list_column, gather_list) (#7003) @karthikeyann +- Add `method` field to `fillna` for fixed width columns (#6998) @isVoid +- Manual merge of branch 0.17 into branch 0.18 (#6995) @shwina +- Implement `cudf::reduce` for `decimal32` and `decimal64` (part 2) (#6980) @codereport +- Add Ufunc alias look up for appropriate numpy ufunc dispatching (#6973) @VibhuJawa +- Add pytest-xdist to dev environment.yml (#6958) @galipremsagar +- Add `Index.set_names` api (#6929) @galipremsagar +- Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support (#6907) @isVoid +- Share `factorize` implementation with Index and cudf module (#6885) @brandon-b-miller +- Implement update() function (#6883) @skirui-source +- Add groupby idxmin, idxmax aggregation (#6856) @karthikeyann +- Implement `cudf::reduce` for `decimal32` and `decimal64` (part 1) (#6814) @codereport +- Implement cudf.DateOffset for months (#6775) @brandon-b-miller +- Add Python DecimalColumn (#6715) @shwina +- Add dictionary support to libcudf groupby functions (#6585) @davidwendt + +## Improvements 🛠️ + +- Update stale GHA with exemptions & new labels (#7395) @mike-wendt +- Add GHA to mark issues/prs as stale/rotten (#7388) @Ethyling +- Unpin from numpy < 1.20 (#7335) @shwina +- Prepare Changelog for Automation (#7309) @galipremsagar +- Prepare Changelog for Automation (#7272) @ajschmidt8 +- Add JNI support for converting Arrow buffers to CUDF ColumnVectors (#7222) @tgravescs +- Add coverage for `skiprows` and `num_rows` in parquet reader fuzz testing (#7216) @galipremsagar +- Define and implement more behavior for merging on categorical variables (#7209) @brandon-b-miller +- Add CudfSeriesGroupBy to optimize dask_cudf groupby-mean (#7194) @rjzamora +- Add dictionary column support to rolling_window (#7186) @davidwendt +- Modify the semantics of `end` pointers in cuIO to match standard library (#7179) @vuule +- Adding unit tests for `fixed_point` with extremely large `scale`s (#7178) @codereport +- Fast path single column sort (#7167) @davidwendt +- Fix -Werror=sign-compare errors in device code (#7164) @trxcllnt +- Refactor cudf::string_view host and device code (#7159) @davidwendt +- Enable logic for GPU auto-detection in cudfjni (#7155) @gerashegalov +- Java bindings for Fixed-point type support for Parquet (#7153) @razajafri +- Add Java interface for the new API 'explode' (#7151) @firestarman +- Replace offsets with iterators in cuIO utilities and CSV parser (#7150) @vuule +- Add gbenchmarks for reduction aggregations any() and all() (#7129) @davidwendt +- Update JNI for contiguous_split packed results (#7127) @jlowe +- Add JNI and Java bindings for list_contains (#7125) @kuhushukla +- Add Java unit tests for window aggregate 'collect' (#7121) @firestarman +- verify window operations on decimal with java tests (#7120) @sperlingxx +- Adds in JNI support for creating an list column from existing columns (#7112) @revans2 +- Build libcudf with -Wall (#7105) @trxcllnt +- Add column_device_view pointers to EncColumnDesc (#7097) @kaatish +- Add `pyorc` to dev environment (#7085) @galipremsagar +- JNI support for creating struct column from existing columns and fixed bug in struct with no children (#7084) @revans2 +- Fastpath single strings column in cudf::sort (#7075) @davidwendt +- Upgrade nvcomp to 1.2.1 (#7069) @rongou +- Refactor ORC `ProtobufReader` to make it more extendable (#7055) @vuule +- Add Java tests for decimal casts (#7051) @sperlingxx +- Auto-label PRs based on their content (#7044) @jolorunyomi +- Create sort gbenchmark for strings column (#7040) @davidwendt +- Refactor io memory fetches to use hostdevice_vector methods (#7035) @ChrisJar +- Spark Murmur3 hash functionality (#7024) @rwlee +- Fix libcudf strings logic where size_type is used to access INT32 column data (#7020) @davidwendt +- Adding decimal writing support to parquet (#7017) @hyperbolic2346 +- Add compression="infer" as default for dask_cudf.read_csv (#7013) @rjzamora +- Correct ORC docstring; other minor cuIO improvements (#7012) @vuule +- Reduce number of hostdevice_vector allocations in parquet reader (#7005) @devavret +- Check output size overflow on strings gather (#6997) @davidwendt +- Improve representation of `MultiIndex` (#6992) @galipremsagar +- Disable some pragma unroll statements in thrust sort.h (#6982) @davidwendt +- Minor `cudf::round` internal refactoring (#6976) @codereport +- Add Java bindings for URL conversion (#6972) @jlowe +- Enable strict_decimal_types in parquet reading (#6969) @sperlingxx +- Add in basic support to JNI for logical_cast (#6954) @revans2 +- Remove duplicate file array_tests.cpp (#6953) @karthikeyann +- Add null mask `fixed_point_column_wrapper` constructors (#6951) @codereport +- Update Java bindings version to 0.18-SNAPSHOT (#6949) @jlowe +- Use simplified `rmm::exec_policy` (#6939) @harrism +- Add null count test for apply_boolean_mask (#6903) @harrism +- Implement DataFrame.quantile for datetime and timedelta data types (#6902) @ChrisJar +- Remove **kwargs from string/categorical methods (#6750) @shwina +- Refactor rolling.cu to reduce compile time (#6512) @mythrocks +- Add static type checking via Mypy (#6381) @shwina +- Update to official libcu++ on Github (#6275) @trxcllnt + +# cuDF 0.17.0 (10 Dec 2020) + +## New Features + +- PR #6116 Add `filters` parameter to Python `read_orc` function or filtering +- PR #6848 Added Java bindings for writing parquet files with INT96 timestamps +- PR #6460 Add is_timestamp format check API +- PR #6647 Implement `cudf::round` floating point and integer types (`HALF_EVEN`) +- PR #6562 Implement `cudf::round` floating point and integer types (`HALF_UP`) +- PR #6685 Implement `cudf::round` `decimal32` & `decimal64` (`HALF_UP` and `HALF_EVEN`) +- PR #6711 Implement `cudf::cast` for `decimal32/64` to/from integer and floating point +- PR #6777 Implement `cudf::unary_operation` for `decimal32` & `decimal64` +- PR #6729 Implement `cudf::cast` for `decimal32/64` to/from different `type_id` +- PR #6792 Implement `cudf::clamp` for `decimal32` and `decimal64` +- PR #6845 Implement `cudf::copy_if_else` for `decimal32` and `decimal64` +- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64` +- PR #6843 Implement `cudf::copy_range` for `decimal32` and `decimal64` +- PR #6528 Enable `fixed_point` binary operations +- PR #6460 Add is_timestamp format check API +- PR #6568 Add function to create hashed vocabulary file from raw vocabulary +- PR #6142 Add Python `read_orc_statistics` function for reading file- and stripe-level statistics +- PR #6581 Add JNI API to check if PTDS is enabled +- PR #6615 Add support for list and struct types to contiguous_split +- PR #6625 Add INT96 timestamp writing option to parquet writer +- PR #6592 Add `cudf.to_numeric` function +- PR #6598 Add strings::contains API with target column parameter +- PR #6638 Add support for `pipe` API +- PR #6737 New build process (Project Flash) +- PR #6652 Add support for struct columns in concatenate +- PR #6675 Add DecimalDtype to cuDF +- PR #6739 Add Java bindings for is_timestamp +- PR #6808 Add support for reading decimal32 and decimal64 from parquet +- PR #6781 Add serial murmur3 hashing +- PR #6811 First class support for unbounded window function bounds +- PR #6768 Add support for scatter() on list columns +- PR #6796 Add create_metadata_file in dask_cudf +- PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series +- PR #6817 Add support for scatter() on lists-of-struct columns +- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64` +- PR #6483 Add `agg` function to aggregate dataframe using one or more operations +- PR #6726 Support selecting different hash functions in hash_partition +- PR #6619 Improve Dockerfile +- PR #6831 Added parquet chunked writing ability for list columns + +## Improvements + +- PR #6430 Add struct type support to `to_arrow` and `from_arrow` +- PR #6384 Add CSV fuzz tests with varying function parameters +- PR #6385 Add JSON fuzz tests with varying function parameters +- PR #6398 Remove function constructor macros in parquet reader +- PR #6432 Add dictionary support to `cudf::upper_bound` and `cudf::lower_bound` +- PR #6461 Replace index type-dispatch call with indexalator in cudf::scatter +- PR #6415 Support `datetime64` in row-wise op +- PR #6457 Replace index type-dispatch call with indexalator in `cudf::gather` +- PR #6413 Replace Python NVTX package with conda-forge source +- PR #6442 Remove deprecated `DataFrame.from_gpu_matrix`, `DataFrame.to_gpu_matrix`, `DataFrame.add_column` APIs and method parameters +- PR #6502 Add dictionary support to `cudf::merge` +- PR #6471 Replace index type-dispatch call with indexalator in cudf::strings::substring +- PR #6485 Add File IO to cuIO benchmarks +- PR #6504 Update Java bindings version to 0.17-SNAPSHOT +- PR #6875 Remove bounds check for `cudf::gather` +- PR #6489 Add `AVRO` fuzz tests with varying function parameters +- PR #6540 Add dictionary support to `cudf::unary_operation` +- PR #6537 Refactor ORC timezone +- PR #6527 Refactor DeviceColumnViewAccess to avoid JNI returning an array +- PR #6690 Explicitly set legacy or per-thread default stream in JNI +- PR #6545 Pin cmake policies to cmake 3.17 version +- PR #6556 Add dictionary support to `cudf::inner_join`, `cudf::left_join` and `cudf::full_join` +- PR #6557 Support nullable timestamp columns in time range window functions +- PR #6566 Remove `reinterpret_cast` conversions between pointer types in ORC +- PR #6544 Remove `fixed_point` precise round +- PR #6552 Use `assert_exceptions_equal` to assert exceptions in pytests +- PR #6555 Adapt JNI build to libcudf composition of multiple libraries +- PR #6559 Refactoring cooperative loading with single thread loading. +- PR #6564 Load JNI library dependencies with a thread pool +- PR #6571 Add ORC fuzz tests with varying function parameters +- PR #6578 Add in java column to row conversion +- PR #6573 Create `cudf::detail::byte_cast` for `cudf::byte_cast` +- PR #6597 Use thread-local to track CUDA device in JNI +- PR #6599 Replace `size()==0` with `empty()`, `is_empty()` +- PR #6514 Initial work for decimal type in Java/JNI +- PR #6605 Reduce HtoD copies in `cudf::concatenate` of string columns +- PR #6608 Improve subword tokenizer docs +- PR #6610 Add ability to set scalar values in `cudf.DataFrame` +- PR #6612 Update JNI to new RMM cuda_stream_view API +- PR #6646 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 1) +- PR #6648 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 2) +- PR #6744 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 3) +- PR #6579 Update scatter APIs to use reference wrapper / const scalar +- PR #6614 Add support for conversion to Pandas nullable dtypes and fix related issue in `cudf.to_json` +- PR #6622 Update `to_pandas` api docs +- PR #6623 Add operator overloading to column and clean up error messages +- PR #6644 Cover different CSV reader/writer options in benchmarks +- PR #6741 Cover different ORC and Parquet reader/writer options in benchmarks +- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator +- PR #6666 Add dictionary support to `cudf::reduce` +- PR #6635 Add cudf::test::dictionary_column_wrapper class +- PR #6702 Fix orc read corruption on boolean column +- PR #6676 Add dictionary support to `cudf::quantile` +- PR #6673 Parameterize avro and json benchmark +- PR #6609 Support fixed-point decimal for HostColumnVector +- PR #6703 Add list column statistics writing to Parquet writer +- PR #6662 `RangeIndex` supports `step` parameter +- PR #6712 Remove `reinterpret_cast` conversions between pointer types in Avro +- PR #6705 Add nested type support to Java table serialization +- PR #6709 Raise informative error while converting a pandas dataframe with duplicate columns +- PR #6727 Remove 2nd type-dispatcher call from cudf::reduce +- PR #6749 Update nested JNI builder so we can do it incrementally +- PR #6748 Add Java API to concatenate serialized tables to ContiguousTable +- PR #6764 Add dictionary support to `cudf::minmax` +- PR #6734 Binary operations support for decimal type in cudf Java +- PR #6761 Add Java/JNI bindings for round +- PR #6776 Use `void` return type for kernel wrapper functions instead of returning `cudaError_t` +- PR #6786 Add nested type support to ColumnVector#getDeviceMemorySize +- PR #6780 Move `cudf::cast` tests to separate test file +- PR #6809 size_type overflow checking when concatenating columns +- PR #6789 Rename `unary_op` to `unary_operator` +- PR #6770 Support building decimal columns with Table.TestBuilder +- PR #6815 Add wildcard path support to `read_parquet` +- PR #6800 Push DeviceScalar to cython-only +- PR #6822 Split out `cudf::distinct_count` from `drop_duplicates.cu` +- PR #6813 Enable `expand=False` in `.str.split` and `.str.rsplit` +- PR #6829 Enable workaround to write categorical columns in csv +- PR #6819 Use CMake 3.19 for RMM when building cuDF jar +- PR #6833 Use settings.xml if existing for internal build +- PR #6839 Handle index when dispatching __array_function__ and __array_ufunc__ to cupy for cudf.Series +- PR #6835 Move template param to member var to improve compile of hash/groupby.cu +- PR #6837 Avoid gather when copying strings view from start of strings column +- PR #6859 Move align_ptr_for_type() from cuda.cuh to alignment.hpp +- PR #6807 Refactor `std::array` usage in row group index writing in ORC +- PR #6914 Enable groupby `list` aggregation for strings +- PR #6908 Parquet option for strictly decimal reading + +## Bug Fixes + +- PR #6446 Fix integer parsing in CSV and JSON for values outside of int64 range +- PR #6506 Fix DateTime type value truncation while writing to csv +- PR #6509 Disable JITIFY log printing +- PR #6517 Handle index equality in `Series` and `DataFrame` equality checks +- PR #6519 Fix end-of-string marking boundary condition in subword-tokenizer +- PR #6543 Handle `np.nan` values in `isna`/`isnull`/`notna`/`notnull` +- PR #6549 Fix memory_usage calls for list columns +- PR #6575 Fix JNI RMM initialize with no pool allocator limit +- PR #6636 Fix orc boolean column corruption issue +- PR #6582 Add missing `device_scalar` stream parameters +- PR #6596 Fix memory usage calculation +- PR #6595 Fix JNI build, broken by to_arrow() signature change +- PR #6601 Fix timezone offset when reading ORC files +- PR #6603 Use correct stream in hash_join. +- PR #6616 Block `fixed_point` `cudf::concatenate` with different scales +- PR #6607 Fix integer overflow in ORC encoder +- PR #6617 Fix JNI native dependency load order +- PR #6621 Fix subword tokenizer metadata for token count equal to max_sequence_length +- PR #6629 Fix JNI CMake +- PR #6633 Fix Java HostColumnVector unnecessarily loading native dependencies +- PR #6643 Fix csv writer handling embedded comma delimiter +- PR #6640 Add error message for unsupported `axis` parameter in DataFrame APIs +- PR #6686 Fix output size for orc read for skip_rows option +- PR #6710 Fix an out-of-bounds indexing error in gather() for lists +- PR #6670 Fix a bug where PTX parser fails to correctly parse a python lambda generated UDF +- PR #6687 Fix issue where index name of caller object is being modified in csv writer +- PR #6735 Fix hash join where row hash values would end up equal to the reserved empty key value +- PR #6696 Fix release_assert. +- PR #6692 Fix handling of empty column name in csv writer +- PR #6693 Fix issue related to `na_values` input in `read_csv` +- PR #6701 Fix issue when `numpy.str_` is given as input to string parameters in io APIs +- PR #6704 Fix leak warnings in JNI unit tests +- PR #6713 Fix missing call to cudaStreamSynchronize in get_value +- PR #6708 Apply `na_rep` to column names in csv writer +- PR #6720 Fix implementation of `dtype` parameter in `cudf.read_csv` +- PR #6721 Add missing serialization methods for ListColumn +- PR #6722 Fix index=False bug in dask_cudf.read_parquet +- PR #6766 Fix race conditions in parquet +- PR #6728 Fix cudf python docs and associated build warnings +- PR #6732 Fix cuDF benchmarks build with static Arrow lib and fix rapids-compose cuDF JNI build +- PR #6742 Fix concat bug in dask_cudf Series/Index creation +- PR #6632 Fix DataFrame initialization from list of dicts +- PR #6767 Fix sort order of parameters in `test_scalar_invalid_implicit_conversion` pytest +- PR #6771 Fix index handling in parquet reader and writer +- PR #6787 Update java reduction APIs to reflect C++ changes +- PR #6790 Fix result representation in groupby.apply +- PR #6794 Fix AVRO reader issues with empty input +- PR #6798 Fix `read_avro` docs +- PR #6824 Fix JNI build +- PR #6826 Fix resource management in Java ColumnBuilder +- PR #6830 Fix categorical scalar insertion +- PR #6844 Fix uint32_t undefined errors +- PR #6854 Fix the parameter order of writeParquetBufferBegin +- PR #6855 Fix `.str.replace_with_backrefs` docs examples +- PR #6853 Fix contiguous split of null string columns +- PR #6860 Move codecov upload to build script +- PR #6861 Fix compile error in type_dispatch_benchmark.cu +- PR #6864 Handle contiguous_split corner case for nested string columns with no children +- PR #6869 Avoid dependency resolution failure in latest version of pip by explicitly specifying versions for dask and distributed +- PR #6806 Force install of local conda artifacts +- PR #6887 Fix typo and `0-d` numpy array handling in binary operation +- PR #6898 Fix missing clone overrides on derived aggregations +- PR #6899 Update JNI to new gather boundary check API + + +# cuDF 0.16.0 (21 Oct 2020) + +## New Features + +- PR #5779 Add DataFrame.pivot() and DataFrame.unstack() +- PR #5975 Add strings `filter_characters` API +- PR #5843 Add `filters` parameter to Python `read_parquet` function for filtering row groups +- PR #5974 Use libcudf instead of cupy for `arange` or column creation from a scalar. +- PR #5494 Add Abstract Syntax Tree (AST) evaluator. +- PR #6076 Add durations type support for csv writer, reader +- PR #5874 Add `COLLECT` groupby aggregation +- PR #6330 Add ability to query if PTDS is enabled +- PR #6119 Add support for `dayofweek` property in `DateTimeIndex` and `DatetimeProperties` +- PR #6171 Java and Jni support for Struct columns +- PR #6125 Add support for `Series.mode` and `DataFrame.mode` +- PR #6271 Add support to deep-copy struct columns from struct column-view +- PR #6262 Add nth_element series aggregation with null handling +- PR #6316 Add StructColumn to Python API +- PR #6247 Add `minmax` reduction function +- PR #6232 `Json` and `Avro` benchmarking in python +- PR #6139 Add column conversion to big endian byte list. +- PR #6220 Add `list_topics()` to supply list of underlying Kafka connection topics +- PR #6254 Add `cudf::make_dictionary_from_scalar` factory function +- PR #6262 Add nth_element series aggregation with null handling +- PR #6277 Add support for LEAD/LAG window functions for fixed-width types +- PR #6318 Add support for reading Struct and map types from Parquet files +- PR #6315 Native code for string-map lookups, for cudf-java +- PR #6302 Add custom dataframe accessors +- PR #6301 Add JNI bindings to nvcomp +- PR #6328 Java and JNI bindings for getMapValue/map_lookup +- PR #6371 Use ColumnViewAccess on Host side +- PR #6392 add hash based groupby mean aggregation +- PR #6511 Add LogicalType to Parquet reader +- PR #6297 cuDF Python Scalars +- PR #6723 Support creating decimal vectors from scalar + +## Improvements + +- PR #6393 Fix some misspelled words +- PR #6292 Remove individual size tracking from JNI tracking resource adaptor +- PR #5946 Add cython and python support for libcudf `to_arrow` and `from_arrow` +- PR #5919 Remove max_strings and max_chars from nvtext::subword_tokenize +- PR #5956 Add/Update tests for cuStreamz +- PR #5953 Use stable sort when doing a sort groupby +- PR #5973 Link to the Code of Conduct in CONTRIBUTING.md +- PR #6354 Perform shallow clone of external projects +- PR #6388 Add documentation for building `libboost_filesystem.a` from source +- PR #5917 Just use `None` for `strides` in `Buffer` +- PR #6015 Upgrade CUB/Thrust to the latest commit +- PR #5971 Add cuStreamz README for basic installation and use +- PR #6024 Expose selecting multiple ORC stripes to read from Python +- PR #6155 Use the CUB submodule in Thrust instead of fetching CUB separately +- PR #6321 Add option in JNI code to use `arena_memory_resource` +- PR #6002 Add Java bindings for md5 +- PR #6311 Switch Thrust to use the NVIDIA/thrust repo +- PR #6060 Add support for all types in `Series.describe` and `DataFrame.describe` +- PR #6051 Add builder API for cuIO `parquet_writer_options` and `parquet_reader_options` +- PR #6067 Added compute codes for aarch64 devices +- PR #5861 `fixed_point` Column Optimization (store `scale` in `data_type`) +- PR #6083 Small cleanup +- PR #6355 Make sure PTDS mode is compatible between libcudf and JNI +- PR #6120 Consolidate functionality in NestedHostColumnVector and HostColumnVector +- PR #6092 Add `name` and `dtype` field to `Index.copy` +- PR #5984 Support gather() on CUDF struct columns +- PR #6103 Small refactor of `print_differences` +- PR #6124 Fix gcc-9 compilation errors on tests +- PR #6122 Add builder API for cuIO `csv_writer_options` and `csv_reader_options` +- PR #6141 Fix typo in custreamz README that was a result of recent changes +- PR #6162 Reduce output parameters in cuio csv and json reader internals +- PR #6146 Added element/validity pair constructors for fixed_width and string wrappers +- PR #6143 General improvements for java arrow IPC. +- PR #6138 Add builder API for cuIO `orc_writer_options` and `orc_reader_options` +- PR #6152 Change dictionary indices to uint32 +- PR #6099 Add fluent builder apis to `json_reader_options` and `avro_reader_options` +- PR #6163 Use `Column.full` instead of `scalar_broadcast_to` or `cupy.zeros` +- PR #6176 Fix cmake warnings for GoogleTest, GoogleBenchmark, and Arrow external projects +- PR #6149 Update to Arrow v1.0.1 +- PR #6421 Use `pandas.testing` in `cudf` +- PR #6357 Use `pandas.testing` in `dask-cudf` +- PR #6201 Expose libcudf test utilities headers for external project use. +- PR #6174 Data profile support in random data generator; Expand cuIO benchmarks +- PR #6189 Avoid deprecated pyarrow.compat for parquet +- PR #6184 Add cuda 11 dev environment.yml +- PR #6186 Update JNI to look for cub in new location +- PR #6194 Remove unnecessary memory-resource parameter in `cudf::contains` API +- PR #6195 Update JNI to use parquet options builder +- PR #6190 Avoid reading full csv files for metadata in dask_cudf +- PR #6197 Remove librmm dependency for libcudf +- PR #6205 Add dictionary support to cudf::contains +- PR #6213 Reduce subscript usage in cuio in favor of pointer dereferencing +- PR #6230 Support any unsigned int type for dictionary indices +- PR #6202 Add additional parameter support to `DataFrame.drop` +- PR #6214 Small clean up to use more algorithms +- PR #6209 Remove CXX11 ABI handling from CMake +- PR #6223 Remove CXX11 ABI flag from JNI build +- PR #6114 Implement Fuzz tests for cuIO +- PR #6231 Adds `inplace`, `append`, `verify_integrity` fields to `DataFrame.set_index` +- PR #6215 Add cmake command-line setting for spdlog logging level +- PR #6242 Added cudf::detail::host_span and device_span +- PR #6240 Don't shallow copy index in as_index() unless necessary +- PR #6204 Add dockerfile and script to build cuDF jar +- PR #6248 Optimize groupby-agg in dask_cudf +- PR #6243 Move `equals()` logic to `Frame` +- PR #6245 Split up replace.cu into multiple source files +- PR #6218 increase visibility/consistency for cuio reader writer private member variable names. +- PR #6268 Add file tags to libcudf doxygen +- PR #6265 Update JNI to use ORC options builder +- PR #6273 Update JNI to use ORC options builder +- PR #6293 Replace shuffle warp reduce with cub calls +- PR #6287 Make java aggregate API follow C++ API +- PR #6303 Use cudf test dtypes so timedelta tests are deterministic +- PR #6329 Update and clean-up gpuCI scripts +- PR #6299 Add lead and lag to java +- PR #6327 Add dictionary specialization to `cudf::replace_nulls` +- PR #6306 Remove cpw macros from page encode kernels +- PR #6375 Parallelize Cython compilation in addition to Cythonization +- PR #6303 Use cudf test dtypes so timedelta tests are deterministic +- PR #6326 Simplify internal csv/json kernel parameters +- PR #6308 Add dictionary support to cudf::scatter with scalar +- PR #6367 Add JNI bindings for byte casting +- PR #6312 Conda recipe dependency cleanup +- PR #6346 Remove macros from CompactProtocolWriter +- PR #6347 Add dictionary support to cudf::copy_range +- PR #6352 Add specific Topic support for Kafka "list_topics()" metadata requests +- PR #6332 Add support to return csv as string when `path=None` in `to_csv` +- PR #6358 Add Parquet fuzz tests with varying function parameters +- PR #6369 Add dictionary support to `cudf::find_and_replace` +- PR #6373 Add dictionary support to `cudf::clamp` +- PR #6377 Update ci/local/README.md +- PR #6383 Removed `move.pxd`, use standard library `move` +- PR #6400 Removed unused variables +- PR #6409 Allow CuPy 8.x +- PR #6407 Add RMM_LOGGING_LEVEL flag to Java docker build +- PR #6425 Factor out csv parse_options creation to pure function +- PR #6438 Fetch nvcomp v1.1.0 for JNI build +- PR #6459 Add `map` method to series +- PR #6379 Add list hashing functionality to MD5 +- PR #6498 Add helper method to ColumnBuilder with some nits +- PR #6336 Add `join` functionality in cudf concat +- PR #6653 Replaced SHFL_XOR calls with cub::WarpReduce +- PR #6751 Rework ColumnViewAccess and its usage +- PR #6698 Remove macros from ORC reader and writer +- PR #6782 Replace cuio macros with constexpr and inline functions + +## Bug Fixes + +- PR #6073 Fix issue related to `.loc` in case of `DatetimeIndex` +- PR #6081 Fix issue where fsspec thinks it has a protocol string +- PR #6100 Fix issue in `Series.factorize` to correctly pick `na_sentinel` value +- PR #6106 Fix datetime limit in csv due to 32-bit arithmetic +- PR #6113 Fix to_timestamp to initialize default year to 1970 +- PR #6110 Handle `format` for other input types in `to_datetime` +- PR #6118 Fix Java build for ORC read args change and update package version +- PR #6121 Replace calls to get_default_resource with get_current_device_resource +- PR #6128 Add support for numpy RandomState handling in `sample` +- PR #6134 Fix CUDA C/C++ debug builds +- PR #6137 Fix issue where `np.nan` is being return instead of `NAT` for datetime/duration types +- PR #6298 Fix gcc-9 compilation error in dictionary/remove_keys.cu +- PR #6172 Fix slice issue with empty column +- PR #6342 Fix array out-of-bound errors in Orc writer +- PR #6154 Warnings on row-wise op only when non-numeric columns are found. +- PR #6150 Fix issue related to inferring `datetime64` format with UTC timezone in string data +- PR #6179 `make_elements` copies to `iterator` without adjusting `size` +- PR #6387 Remove extra `std::move` call in java/src/main/native/src/map_lookup.cu +- PR #6182 Fix cmake build of arrow +- PR #6288 Fix gcc-9 compilation error with `ColumnVectorJni.cpp` +- PR #6173 Fix normalize_characters offset logic on sliced strings column +- PR #6159 Fix issue related to empty `Dataframe` with columns input to `DataFrame.append` +- PR #6199 Fix index preservation for dask_cudf parquet +- PR #6207 Remove shared libs from Java sources jar +- PR #6217 Fixed missing bounds checking when storing validity in parquet reader +- PR #6212 Update codeowners file +- PR #6389 Fix RMM logging level so that it can be turned off from the command line +- PR #6157 Fix issue related to `Series.concat` to concat a non-empty and empty series. +- PR #6226 Add in some JNI checks for null handles +- PR #6183 Fix issues related to `Series.acos` for consistent output regardless of dtype +- PR #6234 Add float infinity parsing in csv reader +- PR #6251 Replace remaining calls to RMM `get_default_resource` +- PR #6257 Support truncated fractions in `cudf::strings::to_timestamp` +- PR #6259 Fix compilation error with GCC 8 +- PR #6258 Pin libcudf conda recipe to boost 1.72.0 +- PR #6264 Remove include statement for missing rmm/mr/device/default_memory_resource.hpp file +- PR #6296 Handle double quote and escape character in json +- PR #6294 Fix read parquet key error when reading empty pandas DataFrame with cudf +- PR #6285 Removed unsafe `reinterpret_cast` and implicit pointer-to-bool casts +- PR #6281 Fix unreachable code warning in datetime.cuh +- PR #6286 Fix `read_csv` `int32` overflow +- PR #6466 Fix ORC reader issue with decimal type +- PR #6310 Replace a misspelled reference to `master` branch with `main` branch in a comment in changelog.sh +- PR #6289 Revert #6206 +- PR #6291 Fix issue related to row-wise operations in `cudf.DataFrame` +- PR #6304 Fix span_tests.cu includes +- PR #6331 Avoids materializing `RangeIndex` during frame concatnation (when not needed) +- PR #6278 Add filter tests for struct columns +- PR #6344 Fix rolling-window count for null input +- PR #6353 Rename `skip_rows` parameter to `skiprows` in `read_parquet`, `read_avro` and `read_orc` +- PR #6361 Detect overflow in hash join +- PR #6386 Removed c-style pointer casts and redundant `reinterpret_cast`s in cudf::io +- PR #6397 Fix `build.sh` when `PARALLEL_LEVEL` environment variable isn't set +- PR #6366 Fix Warp Reduce calls in cuio statistics calculation to account for NaNs +- PR #6345 Fix ambiguous constructor compile error with devtoolset +- PR #6335 Fix conda commands for outdated python version +- PR #6372 Fix issue related to reading a nullable boolean column in `read_parquet` when `engine=pyarrow` +- PR #6378 Fix index handling in `fillna` and incorrect pytests +- PR #6380 Avoid problematic column-index check in dask_cudf.read_parquet test +- PR #6403 Fix error handling in notebook tests +- PR #6408 Avoid empty offset list in hash_partition output +- PR #6402 Update JNI build to pull fixed nvcomp commit +- PR #6410 Fix uses of dangerous default values in Python code +- PR #6424 Check for null data in close for ColumnBuilder +- PR #6426 Fix `RuntimeError` when `np.bool_` is passed as `header` in `to_csv` +- PR #6443 Make java apis getList and getStruct public +- PR #6445 Add `dlpack` to run section of libcudf conda recipe to fix downstream build issues +- PR #6450 Make java Column Builder row agnostic +- PR #6309 Make all CI `.sh` scripts have a consistent set of permissions +- PR #6491 Remove repo URL from Java build-info +- PR #6462 Bug fixes for ColumnBuilder +- PR #6497 Fixes a data corruption issue reading list columns from Parquet files with multiple row groups. + + + +# cuDF 0.15.0 (26 Aug 2020) + +## New Features + +- PR #5292 Add unsigned int type columns to libcudf +- PR #5287 Add `index.join` support +- PR #5222 Adding clip feature support to DataFrame and Series +- PR #5318 Support/leverage DataFrame.shuffle in dask_cudf +- PR #4546 Support pandas 1.0+ +- PR #5331 Add `cudf::drop_nans` +- PR #5327 Add `cudf::cross_join` feature +- PR #5204 Concatenate strings columns using row separator as strings column +- PR #5342 Add support for `StringMethods.__getitem__` +- PR #5358 Add zero-copy `column_view` cast for compatible types +- PR #3504 Add External Kafka Datasource +- PR #5356 Use `size_type` instead of `scalar` in `cudf::repeat`. +- PR #5397 Add internal implementation of nested loop equijoins. +- PR #5303 Add slice_strings functionality using delimiter string +- PR #5394 Enable cast and binops with duration types (builds on PR 5359) +- PR #5301 Add Java bindings for `zfill` +- PR #5411 Enable metadata collection for chunked parquet writer +- PR #5359 Add duration types +- PR #5364 Validate array interface during buffer construction +- PR #5418 Add support for `DataFrame.info` +- PR #5425 Add Python `Groupby.rolling()` +- PR #5434 Add nvtext function generate_character_grams +- PR #5442 Add support for `cudf.isclose` +- PR #5444 Remove usage of deprecated RMM APIs and headers. +- PR #5463 Add `.str.byte_count` python api and cython(bindings) +- PR #5488 Add plumbings for `.str.replace_tokens` +- PR #5502 Add Unsigned int types support in dlpack +- PR #5497 Add `.str.isinteger` & `.str.isfloat` +- PR #5511 Port of clx subword tokenizer to cudf +- PR #5528 Add unsigned int reading and writing support to parquet +- PR #5510 Add support for `cudf.Index` to create Indexes +- PR #5618 Add Kafka as a cudf datasource +- PR #5668 Adding support for `cudf.testing` +- PR #5460 Add support to write to remote filesystems +- PR #5454 Add support for `DataFrame.append`, `Index.append`, `Index.difference` and `Index.empty` +- PR #5536 Parquet reader - add support for multiple sources +- PR #5654 Adding support for `cudf.DataFrame.sample` and `cudf.Series.sample` +- PR #5607 Add Java bindings for duration types +- PR #5612 Add `is_hex` strings API +- PR #5625 String conversion to and from duration types +- PR #5659 Added support for rapids-compose for Java bindings and other enhancements +- PR #5637 Parameterize Null comparator behaviour in Joins +- PR #5623 Add `is_ipv4` strings API +- PR #5723 Parquet reader - add support for nested LIST columns +- PR #5669 Add support for reading JSON files with missing or out-of-order fields +- PR #5674 Support JIT backend on PowerPC64 +- PR #5629 Add `ListColumn` and `ListDtype` +- PR #5658 Add `filter_tokens` nvtext API +- PR #5666 Add `filter_characters_of_type` strings API +- PR #5778 Add support for `cudf::table` to `arrow::Table` and `arrow::Table` to `cudf::table` +- PR #5673 Always build and test with per-thread default stream enabled in the GPU CI build +- PR #5438 Add MD5 hash support +- PR #5704 Initial `fixed_point` Column Support +- PR #5716 Add `double_type_dispatcher` to libcudf +- PR #5739 Add `nvtext::detokenize` API +- PR #5645 Enforce pd.NA and Pandas nullable dtype parity +- PR #5729 Create nvtext normalize_characters API from the subword_tokenize internal function +- PR #5572 Add `cudf::encode` API. +- PR #5767 Add `nvtext::porter_stemmer_measure` and `nvtext::is_letter` APIs +- PR #5753 Add `cudf::lists::extract_list_element` API +- PR #5568 Add support for `Series.keys()` and `DataFrame.keys()` +- PR #5782 Add Kafka support to custreamz +- PR #5642 Add `GroupBy.groups()` +- PR #5811 Add `nvtext::edit_distance` API +- PR #5789 Add groupby support for duration types +- PR #5810 Make Cython subdirs packages and simplify package_data +- PR #6005 Add support for Ampere +- PR #5807 Initial support for struct columns +- PR #5817 Enable more `fixed_point` unit tests by introducing "scale-less" constructor +- PR #5822 Add `cudf_kafka` to `custreamz` run time conda dependency and fix bash syntax issue +- PR #5903 Add duration support for Parquet reader, writer +- PR #5845 Add support for `mask_to_bools` +- PR #5851 Add support for `Index.sort_values` +- PR #5904 Add slice/split support for LIST columns +- PR #5857 Add dtypes information page in python docs +- PR #5859 Add conversion form `fixed_point` to `bool` +- PR #5781 Add duration types support in cudf(python/cython) +- PR #5815 LIST Support for ColumnVector +- PR #5931 Support for `add_calendrical_months` API +- PR #5992 Add support for `.dt.strftime` +- PR #6075 Parquet writer - add support for nested LIST columns + +## Improvements + +- PR #5492 compile_udf: compile straight to PTX instead of using @jit +- PR #5605 Automatically flush RMM allocate/free logs in JNI +- PR #5632 Switch JNI code to use `pool_memory_resource` instead of CNMeM +- PR #5486 Link Boost libraries statically in the Java build +- PR #5479 Link Arrow libraries statically +- PR #5414 Use new release of Thrust/CUB in the JNI build +- PR #5403 Update required CMake version to 3.14 in contribution guide +- PR #5245 Add column reduction benchmark +- PR #5315 Use CMake `FetchContent` to obtain `cub` and `thrust` +- PR #5398 Use CMake `FetchContent` to obtain `jitify` and `libcudacxx` +- PR #5268 Rely on NumPy arrays for out-of-band pickling +- PR #5288 Drop `auto_pickle` decorator #5288 +- PR #5231 Type `Buffer` as `uint8` +- PR #5305 Add support for `numpy`/`cupy` array in `DataFrame` construction +- PR #5308 Coerce frames to `Buffer`s in deserialization +- PR #5309 Handle host frames in serialization +- PR #5312 Test serializing `Series` after `slice` +- PR #5248 Support interleave_columns for string types +- PR #5332 Remove outdated dask-xgboost docs +- PR #5349 Improve libcudf documentation CSS style +- PR #5317 Optimize fixed_point rounding shift for integers +- PR #5386 Remove `cub` from `include_dirs` in `setup.py` +- PR #5373 Remove legacy nvstrings/nvcategory/nvtext +- PR #5362 Remove dependency on `rmm._DevicePointer` +- PR #5302 Add missing comparison operators to `fixed_point` type +- PR #5824 Mark host frames as not needing to be writeable +- PR #5354 Split Dask deserialization methods by dask/cuda +- PR #5363 Handle `0-dim` inputs while broadcasting to a column +- PR #5396 Remove legacy tests env variable from build.sh +- PR #5374 Port nvtext character_tokenize API to libcudf +- PR #5389 Expose typed accessors for Java HostMemoryBuffer +- PR #5379 Avoid chaining `Buffer`s +- PR #5387 Port nvtext replace_tokens API to libcudf +- PR #5381 Change numpy usages to cupy in `10min.ipynb` +- PR #5408 Update pyrrow and arrow-cpp to 0.17.1 +- PR #5366 Add benchmarks for cuIO writers +- PR #5913 Call cudaMemcpyAsync/cudaMemsetAsync in JNI +- PR #5405 Add Error message to `StringColumn.unary_operator` +- PR #5424 Add python plumbing for `.str.character_tokenize` +- PR #5420 Aligning signature of `Series.value_counts` to Pandas +- PR #5535 Update document for XGBoost usage with dask-cuda +- PR #5431 Adding support for unsigned int +- PR #5426 Refactor strings code to minimize calls to regex +- PR #5433 Add support for column inputs in `strings::starts_with` and `strings::ends_with` +- PR #5427 Add Java bindings for unsigned data types +- PR #5429 Improve text wrapping in libcudf documentation +- PR #5443 Remove unused `is_simple` trait +- PR #5441 Update Java HostMemoryBuffer to only load native libs when necessary +- PR #5452 Add support for strings conversion using negative timestamps +- PR #5437 Improve libcudf join documentation +- PR #5458 Install meta packages for dependencies +- PR #5467 Move doc customization scripts to Jenkins +- PR #5468 Add cudf::unique_count(table_view) +- PR #5482 Use rmm::device_uvector in place of rmm::device_vector in copy_if +- PR #5483 Add NVTX range calls to dictionary APIs +- PR #5477 Add `is_index_type` trait +- PR #5487 Use sorted lists instead of sets for pytest parameterization +- PR #5491 allow build libcudf in custom dir +- PR #5501 Adding only unsigned types support for categorical column codes +- PR #5570 Add Index APIs such as `Int64Index`, `UInt64Index` and others +- PR #5503 Change `unique_count` to `distinct_count` +- PR #5514 `convert_datetime.cu` Small Cleanup +- PR #5496 Rename .cu tests (zero cuda kernels) to .cpp files +- PR #5518 split iterator and gather tests to speedup build tests +- PR #5526 Change `type_id` to enum class +- PR #5559 Java APIs for missing date/time operators +- PR #5582 Add support for axis and other parameters to `DataFrame.sort_index` and fix other bunch of issues. +- PR #5562 Add missing join type for java +- PR #5584 Refactor `CompactProtocolReader::InitSchema` +- PR #5591 Add `__arrow_array__` protocol and raise a descriptive error message +- PR #5635 Ad cuIO reader benchmarks for CSV, ORC and Parquet +- PR #5601 Instantiate Table instances in `Frame._concat` to avoid `DF.insert()` overhead +- PR #5602 Add support for concatenation of `Series` & `DataFrame` in `cudf.concat` when `axis=0` +- PR #5603 Refactor JIT `parser.cpp` +- PR #5643 Update `isort` to 5.0.4 +- PR #5648 OO interface for hash join with explicit `build/probe` semantic +- PR #5662 Make Java ColumnVector(long nativePointer) constructor public +- PR #5681 Pin black, flake8 and isort +- PR #5679 Use `pickle5` to test older Python versions +- PR #5684 Use `pickle5` in `Serializable` (when available) +- PR #5419 Support rolling, groupby_rolling for durations +- PR #5687 Change strings::split_record to return a lists column +- PR #5708 Add support for `dummy_na` in `get_dummies` +- PR #5709 Update java build to help cu-spacial with java bindings +- PR #5713 Remove old NVTX utilities +- PR #5726 Replace use of `assert_frame_equal` in tests with `assert_eq` +- PR #5720 Replace owning raw pointers with std::unique_ptr +- PR #5702 Add inherited methods to python docs and other docs fixes +- PR #5733 Add support for `size` property in `DataFrame`/ `Series` / `Index`/ `MultiIndex` +- PR #5735 Force timestamp creation only with duration +- PR #5743 Reduce number of test cases in concatenate benchmark +- PR #5748 Disable `tolist` API in `Series` & `Index` and add `tolist` dispatch in `dask-cudf` +- PR #5744 Reduce number of test cases in reduction benchmark +- PR #5756 Switch JNI code to use the RMM owning wrapper +- PR #5725 Integrate Gbenchmarks into CI +- PR #5752 Add cuDF internals documentation (ColumnAccessor) +- PR #5759 Fix documentation describing JIT cache default location +- PR #5780 Add Java bindings for pad +- PR #5775 Update dask_cudf.read_parquet to align with upstream improvements +- PR #5785 Enable computing views of ListColumns +- PR #5791 Get nullable_pd_dtype from kwargs if provided in assert_eq +- PR #5786 JNI Header Cleanup for cuSpatial +- PR #5800 Expose arrow datasource instead of directly taking a RandomAccessFile +- PR #5795 Clarify documentation on Boost dependency +- PR #5803 Add in Java support for the repeat command +- PR #5806 Expose the error message from native exception when throwing an OOM exception +- PR #5825 Enable ORC statistics generation by default +- PR #5771 Enable gather/slicing/joins with ListColumns in Python +- PR #5834 Add support for dictionary column in concatenate +- PR #5832 Make dictionary_wrapper constructor from a value explicit +- PR #5833 Pin `dask` and `distributed` version to `2.22.0` +- PR #5856 Bump Pandas support to >=1.0,<1.2 +- PR #5855 Java interface to limit RMM maximum pool size +- PR #5853 Disable `fixed_point` for use in `copy_if` +- PR #5854 Raise informative error in `DataFrame.iterrows` and `DataFrame.itertuples` +- PR #5864 Replace cnmem with pool_memory_resource in test/benchmark fixtures +- PR #5863 Explicitly require `ucx-py` on CI +- PR #5879 Added support of sub-types and object wrappers in concat() +- PR #5884 Use S3 bucket directly for benchmark plugni +- PR #5881 Add in JVM extractListElement and stringSplitRecord +- PR #5885 Add in java support for merge sort +- PR #5894 Small code improvement / cleanup +- PR #5899 Add in gather support for Java +- PR #5906 Add macros for showing line of failures in unit tests +- PR #5933 Add in APIs to read/write arrow IPC formatted data from java +- PR #3918 Update cuDF internals doc +- PR #5970 Map data to pandas through arrow, always +- PR #6012 Remove `cudf._cuda` and replace usages with `rmm._cuda` +- PR #6045 Parametrize parquet_reader_list tests +- PR #6053 Import traits.hpp for cudftestutils consumers + +## Bug Fixes + +- PR #6034 Specify `--basetemp` for `py.test` run +- PR #5793 Fix leak in mutable_table_device_view by deleting _descendant_storage in table_device_view_base::destroy +- PR #5525 Make sure to allocate bitmasks of string columns only once +- PR #5336 Initialize conversion tables on a per-context basis +- PR #5283 Fix strings::ipv4_to_integers overflow to negative +- PR #5269 Explicitly require NumPy +- PR #5271 Fix issue when different dtype values are passed to `.cat.add_categories` +- PR #5333 Fix `DataFrame.loc` issue with list like argument +- PR #5299 Update package version for Java bindings +- PR #5300 Add support to ignore `None` in `cudf.concat` input +- PR #5334 Fix pickling sizeof test +- PR #5337 Fix broken alias from DataFrame.{at,iat} to {loc, iloc} +- PR #5347 Fix APPLY_BOOLEAN_MASK_BENCH segfault +- PR #5368 Fix loc indexing issue with `datetime` type index +- PR #5367 Fix API for `cudf::repeat` in `cudf::cross_join` +- PR #5377 Handle array of cupy scalars in to_column +- PR #5326 Fix `DataFrame.__init__` for list of scalar inputs and related dask issue +- PR #5383 Fix cython `type_id` enum mismatch +- PR #5982 Fix gcc-9 compile errors under CUDA 11 +- PR #5382 Fix CategoricalDtype equality comparisons +- PR #5989 Fix gcc-9 warnings on narrowing conversion +- PR #5385 Fix index issues in `DataFrame.from_gpu_matrix` +- PR #5390 Fix Java data type IDs and string interleave test +- PR #5392 Fix documentation links +- PR #5978 Fix option to turn off NVTX +- PR #5410 Fix compile warning by disallowing bool column type for slice_strings +- PR #5404 Fix issue with column creation when chunked arrays are passed +- PR #5409 Use the correct memory resource when creating empty null masks +- PR #5399 Fix cpp compiler warnings of unreachable code +- PR #5439 Fix nvtext ngrams_tokenize performance for multi-byte UTF8 +- PR #5446 Fix compile error caused by out-of-date PR merge (4990) +- PR #5983 Fix JNI gcc-9 compile error under CUDA 11 +- PR #5423 Fix any() reduction ignore nulls +- PR #5459 Fix str.translate to convert table characters to UTF-8 +- PR #5480 Fix merge sort docs +- PR #5465 Fix benchmark out of memory errors due to multiple initialization +- PR #5473 Fix RLEv2 patched base in ORC reader +- PR #5472 Fix str concat issue with indexed series +- PR #5478 Fix `loc` and `iloc` doc +- PR #5484 Ensure flat index after groupby if nlevels == 1 +- PR #5489 Fix drop_nulls/boolean_mask corruption for large columns +- PR #5504 Remove some java assertions that are not needed +- PR #5516 Update gpuCI image in local build script +- PR #5529 Fix issue with negative timestamp in orc writer +- PR #5523 Handle `dtype` of `Buffer` objects when not passed explicitly +- PR #5534 Fix the java build around type_id +- PR #5564 Fix CudfEngine.read_metadata API in dask_cudf +- PR #5537 Fix issue related to using `set_index` on a string series +- PR #5561 Fix `copy_bitmask` issue with offset +- PR #5609 Fix loc and iloc issue with column like input +- PR #5578 Fix getattr logic in GroupBy +- PR #5490 Fix python column view +- PR #5613 Fix assigning an equal length object into a masked out Series +- PR #5608 Fix issue related to string types being represented as binary types +- PR #5619 Fix issue related to typecasting when using a `CategoricalDtype` +- PR #5649 Fix issue when empty Dataframe with index are passed to `cudf.concat` +- PR #5644 Fix issue related to Dataframe init when passing in `columns` +- PR #5340 Disable iteration in cudf objects and add support for `DataFrame` initialization with list of `Series` +- PR #5663 Move Duration types under Timestamps in doxygen Modules page +- PR #5664 Update conda upload versions for new supported CUDA/Python +- PR #5656 Fix issue with incorrect docker image being used in local build script +- PR #5671 Fix chunksize issue with `DataFrame.to_csv` +- PR #5672 Fix crash in parquet writer while writing large string data +- PR #5675 Allow lists_column_wrappers to be constructed from incomplete hierarchies. +- PR #5691 Raise error on incompatible mixed-type input for a column +- PR #5692 Fix compilation issue with gcc 7.4.0 and CUDA 10.1 +- PR #5693 Add fix missing from PR 5656 to update local docker image to py3.7 +- PR #5703 Small fix for dataframe constructor with cuda array interface objects that don't have `descr` field +- PR #5727 Fix `Index.__repr__` to allow representation of null values +- PR #5719 Fix Frame._concat() with categorical columns +- PR #5736 Disable unsigned type in ORC writer benchmarks +- PR #5745 Update JNI cast for inability to cast timestamp and integer types +- PR #5750 Add RMM_ROOT/include to the spdlog search path in JNI build +- PR #5763 Update Java slf4j version to match Spark 3.0 +- PR #5816 Always preserve list column hierarchies across operations. +- PR #5766 Fix issue related to `iloc` and slicing a `DataFrame` +- PR #5827 Revert fallback for `tolist` being absent +- PR #5774 Add fallback for when `tolist` is absent +- PR #5319 Disallow SUM and specialize MEAN of timestamp types +- PR #5797 Fix a missing data issue in some Parquet files +- PR #5787 Fix column create from dictionary column view +- PR #5764 Remove repetition of install instructions +- PR #5926 Fix SeriesGroupBy.nunique() to return a Series +- PR #5813 Fix normalizer exception with all-null strings column +- PR #5820 Fix ListColumn.to_arrow for all null case +- PR #5837 Bash syntax error in prebuild.sh preventing `cudf_kafka` and `libcudf_kafka` from being uploaded to Anaconda +- PR #5841 Added custreamz functions that were missing in interface layer +- PR #5844 Fix `.str.cat` when objects with different index are passed +- PR #5849 Modify custreamz api to integrate seamlessly with python streamz +- PR #5866 cudf_kafka python version inconsistencies in Anaconda packages +- PR #5872 libcudf_kafka r_path is causing docker build failures on centos7 +- PR #5869 Fix bug in parquet writer in writing string column with offset +- PR #5910 Propagate `CUDA` insufficient driver error to the user +- PR #5914 Link CUDA against libcudf_kafka +- PR #5895 Do not break kafka client consumption loop on local client timeout +- PR #5915 Fix reference count on Java DeviceMemoryBuffer after contiguousSplit +- PR #5941 Fix issue related to `string` to `datetime64` column typecast +- PR #5927 Fix return type of `MultiIndex.argsort` +- PR #5942 Fix JIT cache multiprocess test failure +- PR #5929 Revised assertEquals for List Columns in java tests +- PR #5947 Fix null count for child device column vector +- PR #5951 Fix mkdir error in benchmark build +- PR #5949 Find Arrow include directory for JNI builds +- PR #5964 Fix API doc page title tag +- PR #5981 Handle `nat` in `fillna` for datetime and timedelta types +- PR #6016 Fix benchmark fixture segfault +- PR #6003 Fix concurrent JSON reads crash +- PR #6032 Change black version to 19.10b0 in .pre-commit-config.yaml +- PR #6041 Fix Java memory resource handler to rethrow original exception object +- PR #6057 Fix issue in parquet reader with reading columns out of file-order +- PR #6098 Patch Thrust to workaround CUDA_CUB_RET_IF_FAIL macro clearing CUDA errors + + +# cuDF 0.14.0 (03 Jun 2020) + +## New Features + +- PR #5042 Use RMM for Numba +- PR #4472 Add new `partition` API to replace `scatter_to_tables`. +- PR #4626 LogBase binops +- PR #4750 Normalize NANs and Zeroes (JNI Bindings) +- PR #4689 Compute last day of the month for a given date +- PR #4771 Added in an option to statically link against cudart +- PR #4788 Add cudf::day_of_year API +- PR #4789 Disallow timestamp sum and diffs via binary ops +- PR #4815 Add JNI total memory allocated API +- PR #4906 Add Java bindings for interleave_columns +- PR #4900 Add `get_element` to obtain scalar from a column given an index +- PR #4938 Add Java bindings for strip +- PR #4923 Add Java and JNI bindings for string split +- PR #4972 Add list_view (cudf::LIST) type +- PR #4990 Add lists_column_view, list_column_wrapper, lists support for concatenate +- PR #5073 gather support for cudf::LIST columns +- PR #5004 Added a null considering min/max binary op +- PR #4992 Add Java bindings for converting nans to nulls +- PR #4975 Add Java bindings for first and last aggregate expressions based on nth +- PR #5036 Add positive remainder binary op functionality +- PR #5055 Add atan2 binary op +- PR #5099 Add git commit hook for clang-format +- PR #5072 Adding cython binding to `get_element` +- PR #5092 Add `cudf::replace_nans` +- PR #4881 Support row_number in rolling_window +- PR #5068 Add Java bindings for arctan2 +- PR #5132 Support out-of-band buffers in Python pickling +- PR #5139 Add ``Serializable`` ABC for Python +- PR #5149 Add Java bindings for PMOD +- PR #5153 Add Java bindings for extract +- PR #5196 Add Java bindings for NULL_EQUALS, NULL_MAX and NULL_MIN +- PR #5192 Add support for `cudf.to_datetime` +- PR #5203 Add Java bindings for is_integer and is_float +- PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml +- PR #5239 Support for custom cuIO datasource classes +- PR #5293 Add Java bindings for replace_with_backrefs + +## Improvements + +- PR #5235 Make DataFrame.clean_renderable_dataframe() and DataFrame.get_renderable_dataframe non-public methods +- PR #4995 Add CMake option for per-thread default stream +- PR #5033 Fix Numba deprecations warnings with Numba 0.49+ +- PR #4950 Fix import errors with Numba 0.49+ +- PR #4825 Update the iloc exp in dataframe.py +- PR #4450 Parquet writer: add parameter to retrieve the raw file metadata +- PR #4531 Add doc note on conda `channel_priority` +- PR #4479 Adding cuda 10.2 support via conda environment file addition +- PR #4486 Remove explicit template parameter from detail::scatter. +- PR #4471 Consolidate partitioning functionality into a single header. +- PR #4483 Add support fill() on dictionary columns +- PR #4498 Adds in support for chunked writers to java +- PR #4073 Enable contiguous split java test +- PR #4527 Add JNI and java bindings for matches_re +- PR #4606 Fix `scan` unit test and upgrade to more appropriate algorithms +- PR #4527 Add JNI and java bindings for `matches_re` +- PR #4532 Parquet reader: add support for multiple pandas index columns +- PR #4599 Add Java and JNI bindings for string replace +- PR #4655 Raise error for list like dtypes in cudf +- PR #4548 Remove string_view is_null method +- PR #4645 Add Alias for `kurtosis` as `kurt` +- PR #4703 Optimize strings concatenate for many columns +- PR #4769 Remove legacy code from libcudf +- PR #4668 Add Java bindings for log2/log10 unary ops and log_base binary op +- PR #4616 Enable different RMM allocation modes in unit tests +- PR #4520 Fix several single char -> single char case mapping values. Add support for single -> multi char mappings. +- PR #4700 Expose events and more stream functionality in java +- PR #4699 Make Java's MemoryBuffer public and add MemoryBuffer.slice +- PR #4691 Fix compiler argument syntax for ccache +- PR #4792 Port `gather`, `scatter`, and `type_dispatcher` benchmarks to libcudf++ +- PR #3581 Remove `bool8` +- PR #4692 Add GPU and CUDA validations +- PR #4705 quantile cython bindings +- PR #4627 Remove legacy Cython +- PR #4688 Add Java count aggregation to include null values +- PR #4331 Improved test for double that considers an epsilon +- PR #4731 Avoid redundant host->device copies when reading the entire CSV/JSON file +- PR #4739 Add missing aggregations for cudf::experimental::reduce +- PR #4738 Remove stop-gaps in StringMethods and enable related tests +- PR #4745 Fix `fsspec` related issue and upgrade `fsspec` version +- PR #4779 Allow reading arbitrary stripes/rowgroup lists in CPP columnar readers +- PR #4766 Update to use header-only NVTX v3 and remove need to link against nvtx. +- PR #4716 Remove direct calls to RMM_ALLOC/RMM_FREE +- PR #4765 Add in java support for sequence +- PR #4772 Cleanup `dask_cudf` `to_parquet` and enable `"_metadata"` creation +- PR #4733 Fix `isin` docs for `DataFrame`, `Series`, `Index`, and add `DataFrame.isin` support +- PR #4767 Remove linking against `gtest_main` and `gmock_main` in unit tests +- PR #4660 Port `cudf::partition` api to python/cython +- PR #4799 Remove null_count() and has_nulls() from column_device_view +- PR #4778 Remove `scatter_to_tables` from libcudf, cython and python +- PR #4783 Add support for child columns to mutable_column_device_view +- PR #4802 Refactor `cudf::transpose` to increase performance. +- PR #4776 Improve doxygen comments for libcudf string/timestamp conversion formats +- PR #4793 Add `cudf._cuda` to setup.py +- PR #4790 Replace the use of deprecated rmm APIs in the test environment +- PR #4809 Improve libcudf doc rendering and add a new main page +- PR #4811 Add precision to subsecond specifier in timestamp/string conversion format +- PR #4543 Add `inplace` parameter support for `Series.replace` & `DataFrame.replace` +- PR #4816 Remove java API use of deprecated RMM APIs +- PR #4817 Fix `fixed_point` documentation +- PR #4844 Change Doxygen color to RAPIDS purple and documentation improvement +- PR #4840 Add docs for `T`, `empty` & `values` +- PR #4841 Remove unused `single_lane_block_popc_reduce` function +- PR #4842 Added Java bindings for titlizing a String column +- PR #4847 Replace legacy NVTX calls with "standalone" NVTX bindings calls +- PR #4851 Performance improvements relating to `concat` +- PR #4852 Add NVTX range calls to strings and nvtext APIs +- PR #4849 Update Java bindings to use new NVTX API +- PR #4845 Add CUDF_FUNC_RANGE to top-level cuIO function APIs +- PR #4848 Side step `unique_count` calculation in `scatter_by_map` +- PR #4863 Create is_integer/is_float functions for checking characters before calling to_integers/to_floats +- PR #4864 Add support for `__array__` method in cuDF +- PR #4853 Added CUDA_TRY to multiple places in libcudf code +- PR #4870 Add chunked parquet file writing from python +- PR #4865 Add docs and clarify limitations of `applymap` +- PR #4867 Parquet reader: coalesce adjacent column chunk reads +- PR #4871 Add in the build information when building the java jar file +- PR #4869 Expose contiguous table when deserializing from Java +- PR #4878 Remove obsolete string_from_host utility +- PR #4873 Prevent mutable_view() from invoking null count +- PR #4806 Modify doc and correct cupy array conversions in `10min-cudf-cupy.ipynb` +- PR #4877 Fix `DataFrame.mask` and align `mask` & `where` behavior with pandas +- PR #4884 Add more NVTX annotations in cuDF Python +- PR #4902 Use ContextDecorator instead of contextmanager for nvtx.annotate +- PR #4894 Add annotations for the `.columns` property and setter +- PR #4901 Improve unit tests for casting Java numeric types to string +- PR #4888 Handle dropping of nan's & nulls using `skipna` parameter in Statistical reduction ops +- PR #4903 Improve internal documentation of cudf-io compression/decompression kernels +- PR #4905 Get decorated function name as message when annotating +- PR #4907 Reuse EventAttributes across NVTX annotations +- PR #4912 Drop old `valid` check in `element_indexing` +- PR #4924 Properly handle npartition argument in rearrange_by_hash +- PR #4918 Adding support for `cupy.ndarray` in `series.loc` +- PR #4909 Added ability to transform a column using cuda method in Java bindings +- PR #3259 Add .clang-format file & format all files +- PR #4943 Fix-up error handling in GPU detection +- PR #4917 Add support for casting unsupported `dtypes` of same kind +- PR #4928 Misc performance improvements for `scatter_by_map` +- PR #4927 Use stack for memory in `deviceGetName` +- P# #4933 Enable nop annotate +- PR #4929 Java methods ensure calling thread's CUDA device matches RMM device +- PR #4956 Dropping `find_first_value` and `find_last_value` +- PR #4962 Add missing parameters to `DataFrame.replace` & `Series.replace` +- PR #4960 Return the result of `to_json` +- PR #4963 Use `cudaDeviceAttr` in `getDeviceAttribute` +- PR #4953 add documentation for supported NVIDIA GPUs and CUDA versions for cuDF +- PR #4967 Add more comments to top-level gpuinflate and debrotli kernels +- PR #4968 Add CODE_OF_CONDUCT.md +- PR #4980 Change Java HostMemoryBuffer default to prefer pinned memory +- PR #4994 clang-format "cpp/tests" directory +- PR #4993 Remove Java memory prediction code +- PR #4985 Add null_count to Python Column ctors and use already computed null_count when possible +- PR #4998 Clean up dispatch of aggregation methods in result_cache +- PR #5000 Performance improvements in `isin` and dask_cudf backend +- PR #5002 Fix Column.__reduce__ to accept `null_count` +- PR #5006 Add Java bindings for strip, lstrip and rstrip +- PR #5047 Add Cython binding for libcudf++ CSV reader +- PR #5027 Move nvstrings standalone docs pages to libcudf doxygen pages +- PR #4947 Add support for `CategoricalColumn` to be type-casted with different categories +- PR #4822 Add constructor to `pq_chunked_state` to enable using RAII idiom +- PR #5024 CSV reader input stage optimizations +- PR #5061 Add support for writing parquet to python file-like objects +- PR #5034 Use loc to apply boolmask to frame efficiently when constructing query result +- PR #5039 Make `annotate` picklable +- PR #5045 Remove call to `unique()` in concat when `axis=1` +- PR #5023 Object oriented join and column agnostic typecasting +- PR #5049 Add grouping of libcudf apis into doxygen modules +- PR #5069 Remove duplicate documentation from detail headers +- PR #5075 Add simple row-group aggregation mechanism in dask_cudf read_parquet +- PR #5084 Improve downcasting in `Series.label_encoding()` to reduce memory usage +- PR #5085 Print more precise numerical strings in unit tests +- PR #5028 Add Docker 19 support to local gpuci build +- PR #5093 Add `.cat.as_known` related test in `dask_cudf` +- PR #5100 Add documentation on libcudf doxygen guidelines +- PR #5106 Add detail API for `cudf::concatenate` with tables +- PR #5104 Add missing `.inl` files to clang-format and git commit hook +- PR #5112 Adding `htoi` and `ip2int` support to `StringMethods` +- PR #5101 Add POSITION_INDEPENDENT_CODE flag to static cudftestutil library +- PR #5109 Update CONTRIBUTING.md for `clang-format` pre-commit hook +- PR #5054 Change String typecasting to be inline with Pandas +- PR #5123 Display more useful info on `clang-format` CI Failure +- PR #5058 Adding cython binding for CSV writer +- PR #5156 Raise error when applying boolean mask containing null values. +- PR #5137 Add java bindings for getSizeInBytes in DType +- PR #5194 Update Series.fillna to reflect dtype behavior +- PR #5159 Add `make_meta_object` in `dask_cudf` backend and add `str.split` test +- PR #5147 Use logging_resource_adaptor from RMM in the JNI code +- PR #5184 Fix style checks +- PR #5198 Add detail headers for strings converter functions +- PR #5199 Add index support in `DataFrame.query` +- PR #5227 Refactor `detail::gather` API to make use of scoped enumerators +- PR #5218 Reduce memory usage when categorifying column with null values. +- PR #5209 Add `nan_as_null` support to `cudf.from_pandas` +- PR #5207 Break up backref_re.cu into multiple source files to improve compile time +- PR #5155 Fix cudf documentation misspellings +- PR #5208 Port search and join benchmark to libcudf++ +- PR #5214 Move docs build script into repository +- PR #5219 Add per context cache for JIT kernels +- PR #5250 Improve `to_csv()` support for writing to buffers +- PR #5233 Remove experimental namespace used during libcudf++ refactor +- PR #5213 Documentation enhancements to `cudf` python APIs +- PR #5251 Fix more mispellings in cpp comments and strings +- PR #5261 Add short git commit to conda package name +- PR #5254 Deprecate nvstrings, nvcategory and nvtext +- PR #5270 Add support to check for "NaT" and "None" strings while typecasting to `datetime64` +- PR #5298 Remove unused native deps from java library +- PR #5216 Make documentation uniform for params + +## Bug Fixes + +- PR #5221 Fix the use of user-provided resource on temporary values +- PR #5181 Allocate null count using the default resource in `copy_if` +- PR #5141 Use user-provided resource correctly in `unary_operation()` and `shift()` +- PR #5064 Fix `hash()` and `construct_join_output_df()` to use user-provided memory resource correctly +- PR #4386 Update Java package to 0.14 +- PR #4466 Fix merge key column sorting +- PR #4402 Fix `cudf::strings::join_strings` logic with all-null strings and null narep +- PR #4610 Fix validity bug in string scalar factory +- PR #4570 Fixing loc ordering issue in dataframe +- PR #4612 Fix invalid index handling in cudf:dictionary:add-keys call to gather +- PR #4614 Fix cuda-memcheck errors found in column_tests.cu and copying/utility_tests.cu +- PR #4614 Fix cuda-memcheck errors found in `column_tests.cu` and `copying/utility_tests.cu` +- PR #4639 Fix java column of empty strings issue +- PR #4613 Fix issue related to downcasting in `.loc` +- PR #4615 Fix potential OOB write in ORC writer compression stage +- PR #4587 Fix non-regex libcudf contains methods to return true when target is an empty string +- PR #4617 Fix memory leak in aggregation object destructor +- PR #4633 String concatenation fix in `DataFrame.rename` +- PR #4609 Fix to handle `Series.factorize` when index is set +- PR #4659 Fix strings::replace_re handling empty regex pattern +- PR #4652 Fix misaligned error when computing regex device structs +- PR #4651 Fix hashing benchmark missing includes +- PR #4672 Fix docs for `value_counts` and update test cases +- PR #4672 Fix `__setitem__` handling list of column names +- PR #4673 Fix regex infinite loop while parsing invalid quantifier pattern +- PR #4679 Fix comments for make_dictionary_column factory functions +- PR #4711 Fix column leaks in Java unit test +- pR #4721 Fix string binop to update nulls appropriately +- PR #4722 Fix strings::pad when using pad::both with odd width +- PR #4743 Fix loc issue with Multiindex on DataFrame and Series +- PR #4725 Fix issue java with not setting GPU on background thread +- PR #4701 Fix issue related to mixed input types in `as_column` +- PR #4748 Fix strings::all_characters_of_type to allow verify-types mask +- PR #4747 Fix random failures of decompression gtests +- PR #4749 Setting `nan_as_null=True` while creating a column in DataFrame creation +- PR #4761 Fix issues with `nan_as_null` in certain case +- PR #4650 Fix type mismatch & result format issue in `searchsorted` +- PR #4755 Fix Java build to deal with new quantiles API +- PR #4720 Fix issue related to `dtype` param not being adhered in case of cuda arrays +- PR #4756 Fix regex error checking for valid quantifier condition +- PR #4777 Fix data pointer for column slices of zero length +- PR #4770 Fix readonly flag in `Column. __cuda_array_interface__` +- PR #4800 Fix dataframe slicing with strides +- PR #4796 Fix groupby apply for operations that fail on empty groups +- PR #4801 gitignore `_cuda/*.cpp` files +- PR #4805 Fix hash_object_dispatch definitions in dask_cudf +- PR #4813 Fix `GenericIndex` printing +- PR #4804 Fix issue related `repartition` during hash based repartition +- PR #4814 Raise error if `to_csv` does not get `filename/path` +- PR #4821 Port apply_boolean_mask_benchmark to new cudf::column types +- PR #4826 Move memory resource from RmmTestEnvironment to the custom gtest main() scope +- PR #4839 Update Java bindings for timestamp cast formatting changes +- PR #4797 Fix string timestamp to datetime conversion with `ms` and `ns` +- PR #4854 Fix several cases of incorrect downcasting of operands in binops +- PR #4834 Fix bug in transform in handling single line UDFs +- PR #4857 Change JIT cache default directory to $HOME/.cudf +- PR #4807 Fix `categories` duplication in `dask_cudf` +- PR #4846 Fix CSV parsing with byte_range parameter and string columns +- PR #4883 Fix series get/set to match pandas +- PR #4861 Fix to_integers illegal-memory-access with all-empty strings column +- PR #4860 Fix issues in HostMemoryBufferTest, and testNormalizeNANsAndZeros +- PR #4879 Fix output for `cudf.concat` with `axis=1` for pandas parity +- PR #4838 Fix to support empty inputs to `replace` method +- PR #4859 JSON reader: fix data type inference for string columns +- PR #4868 Temporary fix to skip validation on Dask related runs +- PR #4872 Fix broken column wrapper constructors in merge benchmark +- PR #4875 Fix cudf::strings::from_integer logic converting min integer to string +- PR #4876 Mark Java cleaner objects as being cleaned even if exception is thrown +- PR #4780 Handle nulls in Statistical column operations +- PR #4886 Minimize regex-find calls in multi-replace cudf::strings::replace_re function +- PR #4887 Remove `developer.rst` and any links +- PR #4915 Fix to `reset_index` inplace in MultiIndex and other places +- PR #4899 Fix series inplace handling +- PR #4940 Fix boolean mask issue with large sized Dataframe +- PR #4889 Fix multi-index merging +- PR #4922 Fix cudf::strings:split logic for many columns +- PR #4949 Fix scatter, gather benchmark constructor call +- PR #4958 Fix strings::replace perf for long strings +- PR #4965 Raise Error when there are duplicate columns sent to `cudf.concat` +- PR #4983 Fix from_cudf in dask_cudf +- PR #4996 Parquet writer: fix potentially zero-sized string dictionary +- PR #5009 Fix pickling for string and categorical columns +- PR #4984 Fix groupby nth aggregation negative n and exclude nulls +- PR #5011 Fix DataFrame loc issue with boolean masking +- PR #4977 Fix compilation of cuDF benchmarks with build.sh +- PR #5018 Fix crash when JIT cache dir inaccessible. Fix inter version cache clash for custom cache path. +- PR #5005 Fix CSV reader error when only one of the row selection parameters is set +- PR #5022 Add timestamp header to transform +- PR #5021 Fix bug with unsigned right shift and scalar lhs +- PR #5020 Fix `conda install pre_commit` not found when setting up dev environment +- PR #5030 Fix Groupby sort=True +- PR #5029 Change temporary dir to working dir for cudf io tests +- PR #5040 Fix `make_scalar_iterator()` and `make_pair_iterator(scalar)` to not copy values to host +- PR #5041 Fix invalid java test for shift right unsigned +- PR #5043 Remove invalid examples page libcudf doxygen +- PR #5060 Fix unsigned char limits issue in JIT by updating Jitify +- PR #5070 Fix libcudf++ csv reader support for hex dtypes, doublequotes and empty columns +- PR #5057 Fix metadata_out parameter not reaching parquet `write_all` +- PR #5076 Fix JNI code for null_policy enum change +- PR #5031 grouped_time_range_rolling_window assumes ASC sort order +- PR #5032 grouped_time_range_rolling_window should permit invocation without specifying grouping_keys +- PR #5103 Fix `read_csv` issue with names and header +- PR #5090 Fix losing nulls while creating DataFrame from dictionary +- PR #5089 Return false for sign-only string in libcudf is_float and is_integer +- PR #5124 `DataFrame.rename` support for renaming indexes w/ default for `index` +- PR #5108 Fix float-to-string convert for -0.0 +- PR #5111 Fix header not being included in legacy jit transform. +- PR #5115 Fix hex-to-integer logic when string has prefix '0x' +- PR #5118 Fix naming for java string length operators +- PR #5129 Fix missed reference in tests from 5118 +- PR #5122 Fix `clang-format` `custrings` bug +- PR #5138 Install `contextvars` backport on Python 3.6 +- PR #5145 Fix an issue with calling an aggregation operation on `SeriesGroupBy` +- PR #5148 Fix JNI build for GCC 8 +- PR #5162 Fix issues related to empty `Dataframe` in `as_gpu_matrix` & `astype` +- PR #5167 Fix regex extract match to return empty string +- PR #5163 Fix parquet INT96 timestamps before the epoch +- PR #5165 Fix potentially missing last row in libcudf++ csv reader +- PR #5185 Fix flake8 configuration and issues from new flake8 version +- PR #5193 Fix OOB read in csv reader +- PR #5191 Fix the use of the device memory resource +- PR #5212 Fix memory leak in `dlpack.pyx:from_dlpack()` +- PR #5224 Add new headers from 5198 to libcudf/meta.yaml +- PR #5228 Fix datetime64 scalar dtype handling for unsupported time units +- PR #5256 ORC reader: fix loading individual timestamp columns +- PR #5285 Fix DEBUG compilation failure due to `fixed_point.hpp` + + +# cuDF 0.13.0 (31 Mar 2020) + +## New Features + +- PR #4360 Added Java bindings for bitwise shift operators +- PR #3577 Add initial dictionary support to column classes +- PR #3777 Add support for dictionary column in gather +- PR #3693 add string support, skipna to scan operation +- PR #3662 Define and implement `shift`. +- PR #3861 Added Series.sum feature for String +- PR #4069 Added cast of numeric columns from/to String +- PR #3681 Add cudf::experimental::boolean_mask_scatter +- PR #4040 Add support for n-way merge of sorted tables +- PR #4053 Multi-column quantiles. +- PR #4100 Add set_keys function for dictionary columns +- PR #3894 Add remove_keys functions for dictionary columns +- PR #4107 Add groupby nunique aggregation +- PR #4235 Port nvtx.pyx to use non-legacy libcudf APIs +- PR #4153 Support Dask serialization protocol on cuDF objects +- PR #4127 Add python API for n-way sorted merge (merge_sorted) +- PR #4164 Add Buffer "constructor-kwargs" header +- PR #4172 Add groupby nth aggregation +- PR #4159 Add COUNT aggregation that includes null values +- PR #4190 Add libcudf++ transpose Cython implementation +- PR #4063 Define and implement string capitalize and title API +- PR #4217 Add libcudf++ quantiles Cython implementation +- PR #4216 Add cudf.Scalar Python type +- PR #3782 Add `fixed_point` class to support DecimalType +- PR #4272 Add stable sorted order +- PR #4129 Add libcudf++ interleave_columns and tile Cython implementation +- PR #4262 Port unaryops.pyx to use libcudf++ APIs +- PR #4276 Port avro.pyx to libcudf++ +- PR #4259 Ability to create Java host buffers from memory-mapped files +- PR #4240 Add groupby::groups() +- PR #4294 Add Series rank and Dataframe rank +- PR #4304 Add new NVTX infrastructure and add ranges to all top-level compute APIs. +- PR #4319 Add repartition_by_hash API to dask_cudf +- PR #4315 ShiftLeft, ShiftRight, ShiftRightUnsigned binops +- PR #4321 Expose Python Semi and Anti Joins +- PR #4291 Add Java callback support for RMM events +- PR #4298 Port orc.pyx to libcudf++ +- PR #4344 Port concat.pyx to libcudf++ +- PR #4329 Add support for dictionary columns in scatter +- PR #4352 Add factory function make_column_from_scalar +- PR #4381 Add Java support for copying buffers with asynchronous streams +- PR #4288 Add libcudf++ shift Cython implementation +- PR #4338 Add cudf::sequence() for generating an incrementing list of numeric values +- PR #4456 Add argmin/max and string min/max to sort groupby +- PR #4564 Added Java bindings for clamp operator. +- PR #4602 Add Cython bindings for functions in `datetime.hpp` +- PR #4670 Add java and JNI bindings for contains_re +- PR #4363 Grouped Rolling Window support +- PR #4798 Add UDF support to grouped rolling window +- PR #3917 Add dictionary add_keys function +- PR #3842 ORC writer: add support for column statistics +- PR #4088 Added asString() on ColumnVector in Java that takes a format string +- PR #4484 Port CSV writer to libcudf++ + +## Improvements + +- PR #4641 Add replace example in dataframe.py and update 10min.ipynb +- PR #4140 Add cudf series examples and corr() method for dataframe in dataframe.py +- PR #4187 exposed getNativeView method in Java bindings +- PR #3525 build.sh option to disable nvtx +- PR #3748 Optimize hash_partition using shared memory +- PR #3808 Optimize hash_partition using shared memory and cub block scan +- PR #3698 Add count_(un)set_bits functions taking multiple ranges and updated slice to compute null counts at once. +- PR #3909 Move java backend to libcudf++ +- PR #3971 Adding `as_table` to convert Column to Table in python +- PR #3910 Adding sinh, cosh, tanh, asinh, acosh, atanh cube root and rint unary support. +- PR #3972 Add Java bindings for left_semi_join and left_anti_join +- PR #3975 Simplify and generalize data handling in `Buffer` +- PR #3985 Update RMM include files and remove extraneously included header files. +- PR #3601 Port UDF functionality for rolling windows to libcudf++ +- PR #3911 Adding null boolean handling for copy_if_else +- PR #4003 Drop old `to_device` utility wrapper function +- PR #4002 Adding to_frame and fix for categorical column issue +- PR #4009 build script update to enable cudf build without installing +- PR #3897 Port cuIO JSON reader to cudf::column types +- PR #4008 Eliminate extra copy in column constructor +- PR #4013 Add cython definition for io readers cudf/io/io_types.hpp +- PR #4028 Port json.pyx to use new libcudf APIs +- PR #4014 ORC/Parquet: add count parameter to stripe/rowgroup-based reader API +- PR #3880 Add aggregation infrastructure support for cudf::reduce +- PR #4059 Add aggregation infrastructure support for cudf::scan +- PR #4021 Change quantiles signature for clarity. +- PR #4057 Handle offsets in cython Column class +- PR #4045 Reorganize `libxx` directory +- PR #4029 Port stream_compaction.pyx to use libcudf++ APIs +- PR #4031 Docs build scripts and instructions update +- PR #4062 Improve how java classifiers are produced +- PR #4038 JNI and Java support for is_nan and is_not_nan +- PR #3786 Adding string support to rolling_windows +- PR #4067 Removed unused `CATEGORY` type ID. +- PR #3891 Port NVStrings (r)split_record to contiguous_(r)split_record +- PR #4070 Port NVText normalize_spaces to use libcudf strings column +- PR #4072 Allow round_robin_partition to single partition +- PR #4064 Add cudaGetDeviceCount to JNI layer +- PR #4075 Port nvtext ngrams-tokenize to libcudf++ +- PR #4087 Add support for writing large Parquet files in a chunked manner. +- PR #3716 Update cudf.to_parquet to use new GPU accelerated Parquet writer +- PR #4083 Use two partitions in test_groupby_multiindex_reset_index +- PR #4071 Add Java bindings for round robin partition +- PR #4079 Simply use `mask.size` to create the array view +- PR #4092 Keep mask on GPU for bit unpacking +- PR #4081 Copy from `Buffer`'s pointer directly to host +- PR #4105 Change threshold of using optimized hash partition code +- PR #4101 Redux serialize `Buffer` directly with `__cuda_array_interface__` +- PR #4098 Remove legacy calls from libcudf strings column code +- PR #4044 Port join.pyx to use libcudf++ APIs +- PR #4111 Use `Buffer`'s to serialize `StringColumn` +- PR #4567 Optimize `__reduce__` in `StringColumn` +- PR #4590 Register a few more types for Dask serialization +- PR #4113 Get `len` of `StringColumn`s without `nvstrings` +- PR #4147 Remove workaround for UNKNOWN_NULL_COUNT in contiguous_split. +- PR #4130 Renames in-place `cudf::experimental::fill` to `cudf::experimental::fill_in_place` +- PR #4136 Add `Index.names` property +- PR #4139 Port rolling.pyx to new libcudf APIs +- PR #4143 Renames in-place `cudf::experimental::copy_range` to `cudf::experimental::copy_range_in_place` +- PR #4144 Release GIL when calling libcudf++ functions +- PR #4082 Rework MultiColumns in cuDF +- PR #4149 Use "type-serialized" for pickled types like Dask +- PR #4174 Port hash groupby to libcudf++ +- PR #4171 Split java host and device vectors to make a vector truly immutable +- PR #4167 Port `search` to libcudf++ (support multi-column searchsorted) +- PR #4163 Assert Dask CUDA serializers have `Buffer` frames +- PR #4165 List serializable classes once +- PR #4168 IO readers: do not create null mask for non-nullable columns +- PR #4177 Use `uint8` type for host array copy of `Buffer` +- PR #4183 Update Google Test Execution +- PR #4182 Rename cuDF serialize functions to be more generic +- PR #4176 Add option to parallelize setup.py's cythonize +- PR #4191 Porting sort.pyx to use new libcudf APIs +- PR #4196 reduce CHANGELOG.md merge conflicts +- PR #4197 Added notebook testing to gpuCI gpu build +- PR #4220 Port strings wrap functionality. +- PR #4204 Port nvtext create-ngrams function +- PR #4219 Port dlpack.pyx to use new libcudf APIs +- PR #4225 Remove stale notebooks +- PR #4233 Porting replace.pyx to use new libcudf APIs +- PR #4223 Fix a few of the Cython warnings +- PR #4224 Optimize concatenate for many columns +- PR #4234 Add BUILD_LEGACY_TESTS cmake option +- PR #4231 Support for custom cuIO data_sink classes. +- PR #4251 Add class to docs in `dask-cudf` `derived_from` +- PR #4261 libxx Cython reorganization +- PR #4274 Support negative position values in slice_strings +- PR #4282 Porting nvstrings conversion functions from new libcudf++ to Python/Cython +- PR #4290 Port Parquet to use new libcudf APIs +- PR #4299 Convert cudf::shift to column-based api +- PR #4301 Add support for writing large ORC files in a chunked manner +- PR #4306 Use libcudf++ `unary.pyx` cast instead of legacy cast +- PR #4295 Port reduce.pyx to libcudf++ API +- PR #4305 Move gpuarrow.pyx and related libarrow_cuda files into `_libxx` +- PR #4244 Port nvstrings Substring Gather/Scatter functions to cuDF Python/Cython +- PR #4280 Port nvstrings Numeric Handling functions to cuDF Python/Cython +- PR #4278 Port filling.pyx to libcudf++ API +- PR #4328 Add memory threshold callbacks for Java RMM event handler +- PR #4336 Move a bunch of internal nvstrings code to use native StringColumns +- PR #4166 Port `is_sorted.pyx` to use libcudf++ APIs +- PR #4351 Remove a bunch of internal usage of Numba; set rmm as cupy allocator +- PR #4333 nvstrings case/capitalization cython bindings +- PR #4345 Removed an undesirable backwards include from /include to /src in cuIO writers.hpp +- PR #4367 Port copying.pyx to use new libcudf +- PR #4362 Move pq_chunked_state struct into it's own header to match how orc writer is doing it. +- PR #4339 Port libcudf strings `wrap` api to cython/python +- PR #4236 Update dask_cudf.io.to_parquet to use cudf to_parquet +- PR #4311 Port nvstrings String Manipulations functions to cuDF Python/Cython +- PR #4373 Port nvstrings Regular Expressions functions to cuDF Python/Cython +- PR #4308 Replace dask_cudf sort_values and improve set_index +- PR #4407 Enable `.str.slice` & `.str.get` and `.str.zfill` unit-tests +- PR #4412 Require Dask + Distributed 2.12.0+ +- PR #4377 Support loading avro files that contain nested arrays +- PR #4436 Enable `.str.cat` and fix `.str.split` on python side +- PR #4405 Port nvstrings (Sub)string Comparisons functions to cuDF Python/Cython +- PR #4316 Add Java and JNI bindings for substring expression +- PR #4314 Add Java and JNI bindings for string contains +- PR #4461 Port nvstrings Miscellaneous functions to cuDF Python/Cython +- PR #4495 Port nvtext to cuDF Python/Cython +- PR #4503 Port binaryop.pyx to libcudf++ API +- PR #4499 Adding changes to handle include `keep_index` and `RangeIndex` +- PR #4533 Import `tlz` for optional `cytoolz` support +- PR #4493 Skip legacy testing in CI +- PR #4346 Port groupby Cython/Python to use libcudf++ API +- PR #4524 Updating `__setitem__` for DataFrame to use scalar scatter +- PR #4611 Fix to use direct slicing in iloc for multiindex than using gather under `_get_row_major` +- PR #4534 Disable deprecation warnings as errors. +- PR #4542 Remove RMM init/finalize in cudf test fixture. +- PR #4506 Check for multi-dimensional data in column/Series creation +- PR #4549 Add option to disable deprecation warnings. +- PR #4516 Add negative value support for `.str.get` +- PR #4563 Remove copying to host for metadata generation in `generate_pandas_metadata` +- PR #4554 Removed raw RMM allocation from `column_device_view` +- PR #4619 Remove usage of `nvstrings` in `data_array_view` +- PR #4654 Upgrade version of `numba` required to `>=0.48.0` +- PR #4035 Port NVText tokenize function to libcudf++ +- PR #4042 Port cudf/io/functions.hpp to Cython for use in IO bindings +- PR #4058 Port hash.pyx to use libcudf++ APIs +- PR #4133 Mask cleanup and fixes: use `int32` dtype, ensure 64 byte padding, handle offsets + +## Bug Fixes + +- PR #3888 Drop `ptr=None` from `DeviceBuffer` call +- PR #3976 Fix string serialization and memory_usage method to be consistent +- PR #3902 Fix conversion of large size GPU array to dataframe +- PR #3953 Fix overflow in column_buffer when computing the device buffer size +- PR #3959 Add missing hash-dispatch function for cudf.Series +- PR #3970 Fix for Series Pickle +- PR #3964 Restore legacy NVStrings and NVCategory dependencies in Java jar +- PR #3982 Fix java unary op enum and add missing ops +- PR #3999 Fix issue serializing empty string columns (java) +- PR #3979 Add `name` to Series serialize and deserialize +- PR #4005 Fix null mask allocation bug in gather_bitmask +- PR #4000 Fix dask_cudf sort_values performance for single partitions +- PR #4007 Fix for copy_bitmask issue with uninitialized device_buffer +- PR #4037 Fix JNI quantile compile issue +- PR #4054 Fixed JNI to deal with reduction API changes +- PR #4052 Fix for round-robin when num_partitions divides nrows. +- PR #4061 Add NDEBUG guard on `constexpr_assert`. +- PR #4049 Fix `cudf::split` issue returning one less than expected column vectors +- PR #4065 Parquet writer: fix for out-of-range dictionary indices +- PR #4066 Fixed mismatch with dtype enums +- PR #4078 Fix joins for when column_in_common input parameter is empty +- PR #4080 Fix multi-index dask test with sort issue +- PR #4084 Update Java for removal of CATEGORY type +- PR #4086 ORC reader: fix potentially incorrect timestamp decoding in the last rowgroup +- PR #4089 Fix dask groupby mutliindex test case issues in join +- PR #4097 Fix strings concatenate logic with column offsets +- PR #4076 All null string entries should have null data buffer +- PR #4109 Use rmm::device_vector instead of thrust::device_vector +- PR #4113 Use `.nvstrings` in `StringColumn.sum(...)` +- PR #4116 Fix a bug in contiguous_split() where tables with mixed column types could corrupt string output +- PR #4125 Fix type enum to account for added Dictionary type in `types.hpp` +- PR #4132 Fix `hash_partition` null mask allocation +- PR #4137 Update Java for mutating fill and rolling window changes +- PR #4184 Add missing except+ to Cython bindings +- PR #4141 Fix NVStrings test_convert failure in 10.2 build +- PR #4156 Make fill/copy_range no-op on empty columns +- PR #4158 Fix merge issue with empty table return if one of the two tables are empty +- PR #4162 Properly handle no index metadata generation for to_parquet +- PR #4175 Fix `__sizeof__` calculation in `StringColumn` +- PR #4155 Update groupby group_offsets size and fix unnecessary device dispatch. +- PR #4186 Fix from_timestamps 12-hour specifiers support +- PR #4198 Fix constructing `RangeIndex` from `range` +- PR #4192 Parquet writer: fix OOB read when computing string hash +- PR #4201 Fix java window tests +- PR #4199 Fix potential race condition in memcpy_block +- PR #4221 Fix series dict alignment to not drop index name +- PR #4218 Fix `get_aggregation` definition with `except *` +- PR #4215 Fix performance regression in strings::detail::concatenate +- PR #4214 Alter ValueError exception for GPU accelerated Parquet writer to properly report `categorical` columns are not supported. +- PR #4232 Fix handling empty tuples of children in string columns +- PR #4222 Fix no-return compile error in binop-null-test +- PR #4242 Fix for rolling tests CI failure +- PR #4245 Fix race condition in parquet reader +- PR #4253 Fix dictionary decode and set_keys with column offset +- PR #4258 Fix dask-cudf losing index name in `reset_index` +- PR #4268 Fix java build for hash aggregate +- PR #4275 Fix bug in searching nullable values in non-nullable search space in `upper_bound` +- PR #4273 Fix losing `StringIndex` name in dask `_meta_nonempty` +- PR #4279 Fix converting `np.float64` to Scalar +- PR #4285 Add init files for cython pkgs and fix `setup.py` +- PR #4287 Parquet reader: fix empty string potentially read as null +- PR #4310 Fix empty values case in groupby +- PR #4297 Fix specification of package_data in setup.py +- PR #4302 Fix `_is_local_filesystem` check +- PR #4303 Parquet reader: fix empty columns missing from table +- PR #4317 Fix fill() when using string_scalar with an empty string +- PR #4324 Fix slice_strings for out-of-range start position value +- PR #4115 Serialize an empty column table with non zero rows +- PR #4327 Preemptive dispatch fix for changes in dask#5973 +- PR #4379 Correct regex reclass count variable to number of pairs instead of the number of literals +- PR #4364 Fix libcudf zfill strings to ignore '+/-' chars +- PR #4358 Fix strings::concat where narep is an empty string +- PR #4369 Fix race condition in gpuinflate +- PR #4390 Disable ScatterValid and ScatterNull legacy tests +- PR #4399 Make scalar destructor virtual. +- PR #4398 Fixes the failure in groupby in MIN/MAX on strings when some groups are empty +- PR #4406 Fix sorted merge issue with null values and ascending=False +- PR #4445 Fix string issue for parquet reader and support `keep_index` for `scatter_to_tables` +- PR #4423 Tighten up Dask serialization checks +- PR #4537 Use `elif` in Dask deserialize check +- PR #4682 Include frame lengths in Dask serialized header +- PR #4438 Fix repl-template error for replace_with_backrefs +- PR #4434 Fix join_strings logic with all-null strings and non-null narep +- PR #4465 Fix use_pandas_index having no effect in libcudf++ parquet reader +- PR #4464 Update Cmake to always link in libnvToolsExt +- PR #4467 Fix dropna issue for a DataFrame having np.nan +- PR #4480 Fix string_scalar.value to return an empty string_view for empty string-scalar +- PR #4474 Fix to not materialize RangeIndex in copy_categories +- PR #4496 Skip tests which require 2+ GPUs +- PR #4494 Update Java memory event handler for new RMM resource API +- PR #4505 Fix 0 length buffers during serialization +- PR #4482 Fix `.str.rsplit`, `.str.split`, `.str.find`, `.str.rfind`, `.str.index`, `.str.rindex` and enable related tests +- PR #4513 Backport scalar virtual destructor fix +- PR #4519 Remove `n` validation for `nlargest` & `nsmallest` and add negative support for `n` +- PR #4596 Fix `_popn` issue with performance +- PR #4526 Fix index slicing issue for index in case of an empty dataframe +- PR #4538 Fix cudf::strings::slice_strings(step=-1) for empty strings +- PR #4557 Disable compile-errors on deprecation warnings, for JNI +- PR #4669 Fix `dask_cudf` categorical nonempty meta handling +- PR #4576 Fix typo in `serialize.py` +- PR #4571 Load JNI native dependencies for Scalar class +- PR #4598 Fix to handle `pd.DataFrame` in `DataFrame.__init__` +- PR #4594 Fix exec dangling pointer issue in legacy groupby +- PR #4591 Fix issue when reading consecutive rowgroups +- PR #4600 Fix missing include in benchmark_fixture.hpp +- PR #4588 Fix ordering issue in `MultiIndex` +- PR #4632 Fix handling of empty inputs to concatenate +- PR #4630 Remove dangling reference to RMM exec policy in drop duplicates tests. +- PR #4625 Fix hash-based repartition bug in dask_cudf +- PR #4662 Fix to handle `keep_index` in `partition_by_hash` +- PR #4683 Fix Slicing issue with categorical column in DataFrame +- PR #4676 Fix bug in `_shuffle_group` for repartition +- PR #4681 Fix `test_repr` tests that were generating a `RangeIndex` for column names +- PR #4729 Fix `fsspec` versioning to prevent dask test failures +- PR #4145 Support empty index case in DataFrame._from_table +- PR #4108 Fix dtype bugs in dask_cudf metadata (metadata_nonempty overhaul) +- PR #4138 Really fix strings concatenate logic with column offsets +- PR #4119 Fix binary ops slowdown using jitify -remove-unused-globals + + +# cuDF 0.12.0 (04 Feb 2020) + +## New Features + +- PR #3759 Updated 10 Minutes with clarification on how `dask_cudf` uses `cudf` API +- PR #3224 Define and implement new join APIs. +- PR #3284 Add gpu-accelerated parquet writer +- PR #3254 Python redesign for libcudf++ +- PR #3336 Add `from_dlpack` and `to_dlpack` +- PR #3555 Add column names support to libcudf++ io readers and writers +- PR #3527 Add string functionality for merge API +- PR #3610 Add memory_usage to DataFrame and Series APIs +- PR #3557 Add contiguous_split() function. +- PR #3619 Support CuPy 7 +- PR #3604 Add nvtext ngrams-tokenize function +- PR #3403 Define and implement new stack + tile APIs +- PR #3627 Adding cudf::sort and cudf::sort_by_key +- PR #3597 Implement new sort based groupby +- PR #3776 Add column equivalence comparator (using epsilon for float equality) +- PR #3667 Define and implement round-robin partition API. +- PR #3690 Add bools_to_mask +- PR #3761 Introduce a Frame class and make Index, DataFrame and Series subclasses +- PR #3538 Define and implement left semi join and left anti join +- PR #3683 Added support for multiple delimiters in `nvtext.token_count()` +- PR #3792 Adding is_nan and is_notnan +- PR #3594 Adding clamp support to libcudf++ + +## Improvements + +- PR #3124 Add support for grand-children in cudf column classes +- PR #3292 Port NVStrings regex contains function +- PR #3409 Port NVStrings regex replace function +- PR #3417 Port NVStrings regex findall function +- PR #3351 Add warning when filepath resolves to multiple files in cudf readers +- PR #3370 Port NVStrings strip functions +- PR #3453 Port NVStrings IPv4 convert functions to cudf strings column +- PR #3441 Port NVStrings url encode/decode to cudf strings column +- PR #3364 Port NVStrings split functions +- PR #3463 Port NVStrings partition/rpartition to cudf strings column +- PR #3502 ORC reader: add option to read DECIMALs as INT64 +- PR #3461 Add a new overload to allocate_like() that takes explicit type and size params. +- PR #3590 Specialize hash functions for floating point +- PR #3569 Use `np.asarray` in `StringColumn.deserialize` +- PR #3553 Support Python NoneType in numeric binops +- PR #3511 Support DataFrame / Series mixed arithmetic +- PR #3567 Include `strides` in `__cuda_array_interface__` +- PR #3608 Update OPS codeowner group name +- PR #3431 Port NVStrings translate to cudf strings column +- PR #3507 Define and implement new binary operation APIs +- PR #3620 Add stream parameter to unary ops detail API +- PR #3593 Adding begin/end for mutable_column_device_view +- PR #3587 Merge CHECK_STREAM & CUDA_CHECK_LAST to CHECK_CUDA +- PR #3733 Rework `hash_partition` API +- PR #3655 Use move with make_pair to avoid copy construction +- PR #3402 Define and implement new quantiles APIs +- PR #3612 Add ability to customize the JIT kernel cache path +- PR #3647 Remove PatchedNumbaDeviceArray with CuPy 6.6.0 +- PR #3641 Remove duplicate definitions of CUDA_DEVICE_CALLABLE +- PR #3640 Enable memory_usage in dask_cudf (also adds pd.Index from_pandas) +- PR #3654 Update Jitify submodule ref to include gcc-8 fix +- PR #3639 Define and implement `nans_to_nulls` +- PR #3561 Rework contains implementation in search +- PR #3616 Add aggregation infrastructure for argmax/argmin. +- PR #3673 Parquet reader: improve rounding of timestamp conversion to seconds +- PR #3699 Stringify libcudacxx headers for binary op JIT +- PR #3697 Improve column insert performance for wide frames +- PR #3653 Make `gather_bitmask_kernel` more reusable. +- PR #3710 Remove multiple CMake configuration steps from root build script +- PR #3657 Define and implement compiled binops for string column comparisons +- PR #3520 Change read_parquet defaults and add warnings +- PR #3780 Java APIs for selecting a GPU +- PR #3796 Improve on round-robin with the case when number partitions greater than number of rows. +- PR #3805 Avoid CuPy 7.1.0 for now +- PR #3758 detail::scatter variant with map iterator support +- PR #3882 Fail loudly when creating a StringColumn from nvstrings with > MAX_VAL(int32) bytes +- PR #3823 Add header file for detail search functions +- PR #2438 Build GBench Benchmarks in CI +- PR #3713 Adding aggregation support to rolling_window +- PR #3875 Add abstract sink for IO writers, used by ORC and Parquet writers for now +- PR #3916 Refactor gather bindings + +## Bug Fixes + +- PR #3618 Update 10 minutes to cudf and cupy to hide warning that were being shown in the docs +- PR #3550 Update Java package to 0.12 +- PR #3549 Fix index name issue with iloc with RangeIndex +- PR #3562 Fix 4GB limit for gzipped-compressed csv files +- PR #2981 enable build.sh to build all targets without installation +- PR #3563 Use `__cuda_array_interface__` for serialization +- PR #3564 Fix cuda memory access error in gather_bitmask_kernel +- PR #3548 Replaced CUDA_RT_CALL with CUDA_TRY +- PR #3486 Pandas > 0.25 compatibility +- PR #3622 Fix new warnings and errors when building with gcc-8 +- PR #3588 Remove avro reader column order reversal +- PR #3629 Fix hash map test failure +- PR #3637 Fix sorted set_index operations in dask_cudf +- PR #3663 Fix libcudf++ ORC reader microseconds and milliseconds conversion +- PR #3668 Fixing CHECK_CUDA debug build issue +- PR #3684 Fix ends_with logic for matching string case +- PR #3691 Fix create_offsets to handle offset correctly +- PR #3687 Fixed bug while passing input GPU memory pointer in `nvtext.scatter_count()` +- PR #3701 Fix hash_partition hashing all columns instead of columns_to_hash +- PR #3694 Allow for null columns parameter in `csv_writer` +- PR #3706 Removed extra type-dispatcher call from merge +- PR #3704 Changed the default delimiter to `whitespace` for nvtext methods. +- PR #3741 Construct DataFrame from dict-of-Series with alignment +- PR #3724 Update rmm version to match release +- PR #3743 Fix for `None` data in `__array_interface__` +- PR #3731 Fix performance of zero sized dataframe slice +- PR #3709 Fix inner_join incorrect result issue +- PR #3734 Update numba to 0.46 in conda files +- PR #3738 Update libxx cython types.hpp path +- PR #3672 Fix to_host issue with column_view having offset +- PR #3730 CSV reader: Set invalid float values to NaN/null +- PR #3670 Floor when casting between timestamps of different precisions +- PR #3728 Fix apply_boolean_mask issue with non-null string column +- PR #3769 Don't look for a `name` attribute in column +- PR #3783 Bind cuDF operators to Dask Dataframe +- PR #3775 Fix segfault when reading compressed CSV files larger than 4GB +- PR #3799 Align indices of Series inputs when adding as columns to DataFrame +- PR #3803 Keep name when unpickling Index objects +- PR #3804 Fix cuda crash in AVRO reader +- PR #3766 Remove references to cudf::type_id::CATEGORY from IO code +- PR #3817 Don't always deepcopy an index +- PR #3821 Fix OOB read in gpuinflate prefetcher +- PR #3829 Parquet writer: fix empty dataframe causing cuda launch errors +- PR #3835 Fix memory leak in Cython when dealing with nulls in string columns +- PR #3866 Remove unnecessary if check in NVStrings.create_offsets +- PR #3858 Fixes the broken debug build after #3728 +- PR #3850 Fix merge typecast scope issue and resulting memory leak +- PR #3855 Fix MultiColumn recreation with reset_index +- PR #3869 Fixed size calculation in NVStrings::byte_count() +- PR #3868 Fix apply_grouped moving average example +- PR #3900 Properly link `NVStrings` and `NVCategory` into tests +- PR #3868 Fix apply_grouped moving average example +- PR #3871 Fix `split_out` error +- PR #3886 Fix string column materialization from column view +- PR #3893 Parquet reader: fix segfault reading empty parquet file +- PR #3931 Dask-cudf groupby `.agg` multicolumn handling fix +- PR #4017 Fix memory leaks in `GDF_STRING` cython handling and `nans_to_nulls` cython + + +# cuDF 0.11.0 (11 Dec 2019) + +## New Features + +- PR #2905 Added `Series.median()` and null support for `Series.quantile()` +- PR #2930 JSON Reader: Support ARROW_RANDOM_FILE input +- PR #2956 Add `cudf::stack` and `cudf::tile` +- PR #2980 Added nvtext is_vowel/is_consonant functions +- PR #2987 Add `inplace` arg to `DataFrame.reset_index` and `Series` +- PR #3011 Added libcudf++ transition guide +- PR #3129 Add strings column factory from `std::vector`s +- PR #3054 Add parquet reader support for decimal data types +- PR #3022 adds DataFrame.astype for cuDF dataframes +- PR #2962 Add isnull(), notnull() and related functions +- PR #3025 Move search files to legacy +- PR #3068 Add `scalar` class +- PR #3094 Adding `any` and `all` support from libcudf +- PR #3130 Define and implement new `column_wrapper` +- PR #3143 Define and implement new copying APIs `slice` and `split` +- PR #3161 Move merge files to legacy +- PR #3079 Added support to write ORC files given a local path +- PR #3192 Add dtype param to cast `DataFrame` on init +- PR #3213 Port cuIO to libcudf++ +- PR #3222 Add nvtext character tokenizer +- PR #3223 Java expose underlying buffers +- PR #3300 Add `DataFrame.insert` +- PR #3263 Define and implement new `valid_if` +- PR #3278 Add `to_host` utility to copy `column_view` to host +- PR #3087 Add new cudf::experimental bool8 wrapper +- PR #3219 Construct column from column_view +- PR #3250 Define and implement new merge APIs +- PR #3144 Define and implement new hashing APIs `hash` and `hash_partition` +- PR #3229 Define and implement new search APIs +- PR #3308 java add API for memory usage callbacks +- PR #2691 Row-wise reduction and scan operations via CuPy +- PR #3291 Add normalize_nans_and_zeros +- PR #3187 Define and implement new replace APIs +- PR #3356 Add vertical concatenation for table/columns +- PR #3344 java split API +- PR #2791 Add `groupby.std()` +- PR #3368 Enable dropna argument in dask_cudf groupby +- PR #3298 add null replacement iterator for column_device_view +- PR #3297 Define and implement new groupby API. +- PR #3396 Update device_atomics with new bool8 and timestamp specializations +- PR #3411 Java host memory management API +- PR #3393 Implement df.cov and enable covariance/correlation in dask_cudf +- PR #3401 Add dask_cudf ORC writer (to_orc) +- PR #3331 Add copy_if_else +- PR #3427 Define and Implement new multi-search API +- PR #3442 Add Bool-index + Multi column + DataFrame support for set-item +- PR #3172 Define and implement new fill/repeat/copy_range APIs +- PR #3490 Add pair iterators for columns +- PR #3497 Add DataFrame.drop(..., inplace=False) argument +- PR #3469 Add string functionality for replace API +- PR #3273 Define and implement new reduction APIs + +## Improvements + +- PR #2904 Move gpu decompressors to cudf::io namespace +- PR #2977 Moved old C++ test utilities to legacy directory. +- PR #2965 Fix slow orc reader perf with large uncompressed blocks +- PR #2995 Move JIT type utilities to legacy directory +- PR #2927 Add ``Table`` and ``TableView`` extension classes that wrap legacy cudf::table +- PR #3005 Renames `cudf::exp` namespace to `cudf::experimental` +- PR #3008 Make safe versions of `is_null` and `is_valid` in `column_device_view` +- PR #3026 Move fill and repeat files to legacy +- PR #3027 Move copying.hpp and related source to legacy folder +- PR #3014 Snappy decompression optimizations +- PR #3032 Use `asarray` to coerce indices to a NumPy array +- PR #2996 IO Readers: Replace `cuio::device_buffer` with `rmm::device_buffer` +- PR #3051 Specialized hash function for strings column +- PR #3065 Select and Concat for cudf::experimental::table +- PR #3080 Move `valid_if.cuh` to `legacy/` +- PR #3052 Moved replace.hpp functionality to legacy +- PR #3091 Move join files to legacy +- PR #3092 Implicitly init RMM if Java allocates before init +- PR #3029 Update gdf_ numeric types with stdint and move to cudf namespace +- PR #3052 Moved replace.hpp functionality to legacy +- PR #2955 Add cmake option to only build for present GPU architecture +- PR #3070 Move functions.h and related source to legacy +- PR #2951 Allow set_index to handle a list of column names +- PR #3093 Move groupby files to legacy +- PR #2988 Removing GIS functionality (now part of cuSpatial library) +- PR #3067 Java method to return size of device memory buffer +- PR #3083 Improved some binary operation tests to include null testing. +- PR #3084 Update to arrow-cpp and pyarrow 0.15.0 +- PR #3071 Move cuIO to legacy +- PR #3126 Round 2 of snappy decompression optimizations +- PR #3046 Define and implement new copying APIs `empty_like` and `allocate_like` +- PR #3128 Support MultiIndex in DataFrame.join +- PR #2971 Added initial gather and scatter methods for strings_column_view +- PR #3133 Port NVStrings to cudf column: count_characters and count_bytes +- PR #2991 Added strings column functions concatenate and join_strings +- PR #3028 Define and implement new `gather` APIs. +- PR #3135 Add nvtx utilities to cudf::nvtx namespace +- PR #3021 Java host side concat of serialized buffers +- PR #3138 Move unary files to legacy +- PR #3170 Port NVStrings substring functions to cudf strings column +- PR #3159 Port NVStrings is-chars-types function to cudf strings column +- PR #3154 Make `table_view_base.column()` const and add `mutable_table_view.column()` +- PR #3175 Set cmake cuda version variables +- PR #3171 Move deprecated error macros to legacy +- PR #3191 Port NVStrings integer convert ops to cudf column +- PR #3189 Port NVStrings find ops to cudf column +- PR #3352 Port NVStrings convert float functions to cudf strings column +- PR #3193 Add cuPy as a formal dependency +- PR #3195 Support for zero columned `table_view` +- PR #3165 Java device memory size for string category +- PR #3205 Move transform files to legacy +- PR #3202 Rename and move error.hpp to public headers +- PR #2878 Use upstream merge code in dask_cudf +- PR #3217 Port NVStrings upper and lower case conversion functions +- PR #3350 Port NVStrings booleans convert functions +- PR #3231 Add `column::release()` to give up ownership of contents. +- PR #3157 Use enum class rather than enum for mask_allocation_policy +- PR #3232 Port NVStrings datetime conversion to cudf strings column +- PR #3136 Define and implement new transpose API +- PR #3237 Define and implement new transform APIs +- PR #3245 Move binaryop files to legacy +- PR #3241 Move stream_compaction files to legacy +- PR #3166 Move reductions to legacy +- PR #3261 Small cleanup: remove `== true` +- PR #3271 Update rmm API based on `rmm.reinitialize(...)` change +- PR #3266 Remove optional checks for CuPy +- PR #3268 Adding null ordering per column feature when sorting +- PR #3239 Adding floating point specialization to comparators for NaNs +- PR #3270 Move predicates files to legacy +- PR #3281 Add to_host specialization for strings in column test utilities +- PR #3282 Add `num_bitmask_words` +- PR #3252 Add new factory methods to include passing an existing null mask +- PR #3288 Make `bit.cuh` utilities usable from host code. +- PR #3287 Move rolling windows files to legacy +- PR #3182 Define and implement new unary APIs `is_null` and `is_not_null` +- PR #3314 Drop `cython` from run requirements +- PR #3301 Add tests for empty column wrapper. +- PR #3294 Update to arrow-cpp and pyarrow 0.15.1 +- PR #3310 Add `row_hasher` and `element_hasher` utilities +- PR #3272 Support non-default streams when creating/destroying hash maps +- PR #3286 Clean up the starter code on README +- PR #3332 Port NVStrings replace to cudf strings column +- PR #3354 Define and implement new `scatter` APIs +- PR #3322 Port NVStrings pad operations to cudf strings column +- PR #3345 Add cache member for number of characters in string_view class +- PR #3299 Define and implement new `is_sorted` APIs +- PR #3328 Partition by stripes in dask_cudf ORC reader +- PR #3243 Use upstream join code in dask_cudf +- PR #3371 Add `select` method to `table_view` +- PR #3309 Add java and JNI bindings for search bounds +- PR #3305 Define and implement new rolling window APIs +- PR #3380 Concatenate columns of strings +- PR #3382 Add fill function for strings column +- PR #3391 Move device_atomics_tests.cu files to legacy +- PR #3303 Define and implement new stream compaction APIs `copy_if`, `drop_nulls`, + `apply_boolean_mask`, `drop_duplicate` and `unique_count`. +- PR #3387 Strings column gather function +- PR #3440 Strings column scatter function +- PR #3389 Move quantiles.hpp + group_quantiles.hpp files to legacy +- PR #3397 Port unary cast to libcudf++ +- PR #3398 Move reshape.hpp files to legacy +- PR #3395 Port NVStrings regex extract to cudf strings column +- PR #3423 Port NVStrings htoi to cudf strings column +- PR #3425 Strings column copy_if_else implementation +- PR #3422 Move utilities to legacy +- PR #3201 Define and implement new datetime_ops APIs +- PR #3421 Port NVStrings find_multiple to cudf strings column +- PR #3448 Port scatter_to_tables to libcudf++ +- PR #3458 Update strings sections in the transition guide +- PR #3462 Add `make_empty_column` and update `empty_like`. +- PR #3465 Port `aggregation` traits and utilities. +- PR #3214 Define and implement new unary operations APIs +- PR #3475 Add `bitmask_to_host` column utility +- PR #3487 Add is_boolean trait and random timestamp generator for testing +- PR #3492 Small cleanup (remove std::abs) and comment +- PR #3407 Allow multiple row-groups per task in dask_cudf read_parquet +- PR #3512 Remove unused CUDA conda labels +- PR #3500 cudf::fill()/cudf::repeat() support for strings columns. +- PR #3438 Update scalar and scalar_device_view to better support strings +- PR #3414 Add copy_range function for strings column +- PR #3685 Add string support to contiguous_split. +- PR #3471 Add scalar/column, column/scalar and scalar/scalar overloads to copy_if_else. +- PR #3451 Add support for implicit typecasting of join columns + +## Bug Fixes + +- PR #2895 Fixed dask_cudf group_split behavior to handle upstream rearrange_by_divisions +- PR #3048 Support for zero columned tables +- PR #3030 Fix snappy decoding regression in PR #3014 +- PR #3041 Fixed exp to experimental namespace name change issue +- PR #3056 Add additional cmake hint for finding local build of RMM files +- PR #3060 Move copying.hpp includes to legacy +- PR #3139 Fixed java RMM auto initialization +- PR #3141 Java fix for relocated IO headers +- PR #3149 Rename column_wrapper.cuh to column_wrapper.hpp +- PR #3168 Fix mutable_column_device_view head const_cast +- PR #3199 Update JNI includes for legacy moves +- PR #3204 ORC writer: Fix ByteRLE encoding of NULLs +- PR #2994 Fix split_out-support but with hash_object_dispatch +- PR #3212 Fix string to date casting when format is not specified +- PR #3218 Fixes `row_lexicographic_comparator` issue with handling two tables +- PR #3228 Default initialize RMM when Java native dependencies are loaded +- PR #3012 replacing instances of `to_gpu_array` with `mem` +- PR #3236 Fix Numba 0.46+/CuPy 6.3 interface compatibility +- PR #3276 Update JNI includes for legacy moves +- PR #3256 Fix orc writer crash with multiple string columns +- PR #3211 Fix breaking change caused by rapidsai/rmm#167 +- PR #3265 Fix dangling pointer in `is_sorted` +- PR #3267 ORC writer: fix incorrect ByteRLE encoding of long literal runs +- PR #3277 Fix invalid reference to deleted temporary in `is_sorted`. +- PR #3274 ORC writer: fix integer RLEv2 mode2 unsigned base value encoding +- PR #3279 Fix shutdown hang issues with pinned memory pool init executor +- PR #3280 Invalid children check in mutable_column_device_view +- PR #3289 fix java memory usage API for empty columns +- PR #3293 Fix loading of csv files zipped on MacOS (disabled zip min version check) +- PR #3295 Fix storing storing invalid RMM exec policies. +- PR #3307 Add pd.RangeIndex to from_pandas to fix dask_cudf meta_nonempty bug +- PR #3313 Fix public headers including non-public headers +- PR #3318 Revert arrow to 0.15.0 temporarily to unblock downstream projects CI +- PR #3317 Fix index-argument bug in dask_cudf parquet reader +- PR #3323 Fix `insert` non-assert test case +- PR #3341 Fix `Series` constructor converting NoneType to "None" +- PR #3326 Fix and test for detail::gather map iterator type inference +- PR #3334 Remove zero-size exception check from make_strings_column factories +- PR #3333 Fix compilation issues with `constexpr` functions not marked `__device__` +- PR #3340 Make all benchmarks use cudf base fixture to initialize RMM pool +- PR #3337 Fix Java to pad validity buffers to 64-byte boundary +- PR #3362 Fix `find_and_replace` upcasting series for python scalars and lists +- PR #3357 Disabling `column_view` iterators for non fixed-width types +- PR #3383 Fix : properly compute null counts for rolling_window. +- PR #3386 Removing external includes from `column_view.hpp` +- PR #3369 Add write_partition to dask_cudf to fix to_parquet bug +- PR #3388 Support getitem with bools when DataFrame has a MultiIndex +- PR #3408 Fix String and Column (De-)Serialization +- PR #3372 Fix dask-distributed scatter_by_map bug +- PR #3419 Fix a bug in parse_into_parts (incomplete input causing walking past the end of string). +- PR #3413 Fix dask_cudf read_csv file-list bug +- PR #3416 Fix memory leak in ColumnVector when pulling strings off the GPU +- PR #3424 Fix benchmark build by adding libcudacxx to benchmark's CMakeLists.txt +- PR #3435 Fix diff and shift for empty series +- PR #3439 Fix index-name bug in StringColumn concat +- PR #3445 Fix ORC Writer default stripe size +- PR #3459 Fix printing of invalid entries +- PR #3466 Fix gather null mask allocation for invalid index +- PR #3468 Fix memory leak issue in `drop_duplicates` +- PR #3474 Fix small doc error in capitalize Docs +- PR #3491 Fix more doc errors in NVStrings +- PR #3478 Fix as_index deep copy via Index.rename inplace arg +- PR #3476 Fix ORC reader timezone conversion +- PR #3188 Repr slices up large DataFrames +- PR #3519 Fix strings column concatenate handling zero-sized columns +- PR #3530 Fix copy_if_else test case fail issue +- PR #3523 Fix lgenfe issue with debug build +- PR #3532 Fix potential use-after-free in cudf parquet reader +- PR #3540 Fix unary_op null_mask bug and add missing test cases +- PR #3559 Use HighLevelGraph api in DataFrame constructor (Fix upstream compatibility) +- PR #3572 Fix CI Issue with hypothesis tests that are flaky + + +# cuDF 0.10.0 (16 Oct 2019) + +## New Features + +- PR #2423 Added `groupby.quantile()` +- PR #2522 Add Java bindings for NVStrings backed upper and lower case mutators +- PR #2605 Added Sort based groupby in libcudf +- PR #2607 Add Java bindings for parsing JSON +- PR #2629 Add dropna= parameter to groupby +- PR #2585 ORC & Parquet Readers: Remove millisecond timestamp restriction +- PR #2507 Add GPU-accelerated ORC Writer +- PR #2559 Add Series.tolist() +- PR #2653 Add Java bindings for rolling window operations +- PR #2480 Merge `custreamz` codebase into `cudf` repo +- PR #2674 Add __contains__ for Index/Series/Column +- PR #2635 Add support to read from remote and cloud sources like s3, gcs, hdfs +- PR #2722 Add Java bindings for NVTX ranges +- PR #2702 Add make_bool to dataset generation functions +- PR #2394 Move `rapidsai/custrings` into `cudf` +- PR #2734 Final sync of custrings source into cudf +- PR #2724 Add libcudf support for __contains__ +- PR #2777 Add python bindings for porter stemmer measure functionality +- PR #2781 Add issorted to is_monotonic +- PR #2685 Add cudf::scatter_to_tables and cython binding +- PR #2743 Add Java bindings for NVStrings timestamp2long as part of String ColumnVector casting +- PR #2785 Add nvstrings Python docs +- PR #2786 Add benchmarks option to root build.sh +- PR #2802 Add `cudf::repeat()` and `cudf.Series.repeat()` +- PR #2773 Add Fisher's unbiased kurtosis and skew for Series/DataFrame +- PR #2748 Parquet Reader: Add option to specify loading of PANDAS index +- PR #2807 Add scatter_by_map to DataFrame python API +- PR #2836 Add nvstrings.code_points method +- PR #2844 Add Series/DataFrame notnull +- PR #2858 Add GTest type list utilities +- PR #2870 Add support for grouping by Series of arbitrary length +- PR #2719 Series covariance and Pearson correlation +- PR #2207 Beginning of libcudf overhaul: introduce new column and table types +- PR #2869 Add `cudf.CategoricalDtype` +- PR #2838 CSV Reader: Support ARROW_RANDOM_FILE input +- PR #2655 CuPy-based Series and Dataframe .values property +- PR #2803 Added `edit_distance_matrix()` function to calculate pairwise edit distance for each string on a given nvstrings object. +- PR #2811 Start of cudf strings column work based on 2207 +- PR #2872 Add Java pinned memory pool allocator +- PR #2969 Add findAndReplaceAll to ColumnVector +- PR #2814 Add Datetimeindex.weekday +- PR #2999 Add timestamp conversion support for string categories +- PR #2918 Add cudf::column timestamp wrapper types + +## Improvements + +- PR #2578 Update legacy_groupby to use libcudf group_by_without_aggregation +- PR #2581 Removed `managed` allocator from hash map classes. +- PR #2571 Remove unnecessary managed memory from gdf_column_concat +- PR #2648 Cython/Python reorg +- PR #2588 Update Series.append documentation +- PR #2632 Replace dask-cudf set_index code with upstream +- PR #2682 Add cudf.set_allocator() function for easier allocator init +- PR #2642 Improve null printing and testing +- PR #2747 Add missing Cython headers / cudftestutil lib to conda package for cuspatial build +- PR #2706 Compute CSV format in device code to speedup performance +- PR #2673 Add support for np.longlong type +- PR #2703 move dask serialization dispatch into cudf +- PR #2728 Add YYMMDD to version tag for nightly conda packages +- PR #2729 Handle file-handle input in to_csv +- PR #2741 CSV Reader: Move kernel functions into its own file +- PR #2766 Improve nvstrings python cmake flexibility +- PR #2756 Add out_time_unit option to csv reader, support timestamp resolutions +- PR #2771 Stopgap alias for to_gpu_matrix() +- PR #2783 Support mapping input columns to function arguments in apply kernels +- PR #2645 libcudf unique_count for Series.nunique +- PR #2817 Dask-cudf: `read_parquet` support for remote filesystems +- PR #2823 improve java data movement debugging +- PR #2806 CSV Reader: Clean-up row offset operations +- PR #2640 Add dask wait/persist example to 10 minute guide +- PR #2828 Optimizations of kernel launch configuration for `DataFrame.apply_rows` and `DataFrame.apply_chunks` +- PR #2831 Add `column` argument to `DataFrame.drop` +- PR #2775 Various optimizations to improve __getitem__ and __setitem__ performance +- PR #2810 cudf::allocate_like can optionally always allocate a mask. +- PR #2833 Parquet reader: align page data allocation sizes to 4-bytes to satisfy cuda-memcheck +- PR #2832 Using the new Python bindings for UCX +- PR #2856 Update group_split_cudf to use scatter_by_map +- PR #2890 Optionally keep serialized table data on the host. +- PR #2778 Doc: Updated and fixed some docstrings that were formatted incorrectly. +- PR #2830 Use YYMMDD tag in custreamz nightly build +- PR #2875 Java: Remove synchronized from register methods in MemoryCleaner +- PR #2887 Minor snappy decompression optimization +- PR #2899 Use new RMM API based on Cython +- PR #2788 Guide to Python UDFs +- PR #2919 Change java API to use operators in groupby namespace +- PR #2909 CSV Reader: Avoid row offsets host vector default init +- PR #2834 DataFrame supports setting columns via attribute syntax `df.x = col` +- PR #3147 DataFrame can be initialized from rows via list of tuples +- PR #3539 Restrict CuPy to 6 + +## Bug Fixes + +- PR #2584 ORC Reader: fix parsing of `DECIMAL` index positions +- PR #2619 Fix groupby serialization/deserialization +- PR #2614 Update Java version to match +- PR #2601 Fixes nlargest(1) issue in Series and Dataframe +- PR #2610 Fix a bug in index serialization (properly pass DeviceNDArray) +- PR #2621 Fixes the floordiv issue of not promoting float type when rhs is 0 +- PR #2611 Types Test: fix static casting from negative int to string +- PR #2618 IO Readers: Fix datasource memory map failure for multiple reads +- PR #2628 groupby_without_aggregation non-nullable input table produces non-nullable output +- PR #2615 fix string category partitioning in java API +- PR #2641 fix string category and timeunit concat in the java API +- PR #2649 Fix groupby issue resulting from column_empty bug +- PR #2658 Fix astype() for null categorical columns +- PR #2660 fix column string category and timeunit concat in the java API +- PR #2664 ORC reader: fix `skip_rows` larger than first stripe +- PR #2654 Allow Java gdfOrderBy to work with string categories +- PR #2669 AVRO reader: fix non-deterministic output +- PR #2668 Update Java bindings to specify timestamp units for ORC and Parquet readers +- PR #2679 AVRO reader: fix cuda errors when decoding compressed streams +- PR #2692 Add concatenation for data-frame with different headers (empty and non-empty) +- PR #2651 Remove nvidia driver installation from ci/cpu/build.sh +- PR #2697 Ensure csv reader sets datetime column time units +- PR #2698 Return RangeIndex from contiguous slice of RangeIndex +- PR #2672 Fix null and integer handling in round +- PR #2704 Parquet Reader: Fix crash when loading string column with nulls +- PR #2725 Fix Jitify issue with running on Turing using CUDA version < 10 +- PR #2731 Fix building of benchmarks +- PR #2738 Fix java to find new NVStrings locations +- PR #2736 Pin Jitify branch to v0.10 version +- PR #2742 IO Readers: Fix possible silent failures when creating `NvStrings` instance +- PR #2753 Fix java quantile API calls +- PR #2762 Fix validity processing for time in java +- PR #2796 Fix handling string slicing and other nvstrings delegated methods with dask +- PR #2769 Fix link to API docs in README.md +- PR #2772 Handle multiindex pandas Series #2772 +- PR #2749 Fix apply_rows/apply_chunks pessimistic null mask to use in_cols null masks only +- PR #2752 CSV Reader: Fix exception when there's no rows to process +- PR #2716 Added Exception for `StringMethods` in string methods +- PR #2787 Fix Broadcasting `None` to `cudf-series` +- PR #2794 Fix async race in NVCategory::get_value and get_value_bounds +- PR #2795 Fix java build/cast error +- PR #2496 Fix improper merge of two dataframes when names differ +- PR #2824 Fix issue with incorrect result when Numeric Series replace is called several times +- PR #2751 Replace value with null +- PR #2765 Fix Java inequality comparisons for string category +- PR #2818 Fix java join API to use new C++ join API +- PR #2841 Fix nvstrings.slice and slice_from for range (0,0) +- PR #2837 Fix join benchmark +- PR #2809 Add hash_df and group_split dispatch functions for dask +- PR #2843 Parquet reader: fix skip_rows when not aligned with page or row_group boundaries +- PR #2851 Deleted existing dask-cudf/record.txt +- PR #2854 Fix column creation from ephemeral objects exposing __cuda_array_interface__ +- PR #2860 Fix boolean indexing when the result is a single row +- PR #2859 Fix tail method issue for string columns +- PR #2852 Fixed `cumsum()` and `cumprod()` on boolean series. +- PR #2865 DaskIO: Fix `read_csv` and `read_orc` when input is list of files +- PR #2750 Fixed casting values to cudf::bool8 so non-zero values always cast to true +- PR #2873 Fixed dask_cudf read_partition bug by generating ParquetDatasetPiece +- PR #2850 Fixes dask_cudf.read_parquet on partitioned datasets +- PR #2896 Properly handle `axis` string keywords in `concat` +- PR #2926 Update rounding algorithm to avoid using fmod +- PR #2968 Fix Java dependency loading when using NVTX +- PR #2963 Fix ORC writer uncompressed block indexing +- PR #2928 CSV Reader: Fix using `byte_range` for large datasets +- PR #2983 Fix sm_70+ race condition in gpu_unsnap +- PR #2964 ORC Writer: Segfault when writing mixed numeric and string columns +- PR #3007 Java: Remove unit test that frees RMM invalid pointer +- PR #3009 Fix orc reader RLEv2 patch position regression from PR #2507 +- PR #3002 Fix CUDA invalid configuration errors reported after loading an ORC file without data +- PR #3035 Update update-version.sh for new docs locations +- PR #3038 Fix uninitialized stream parameter in device_table deleter +- PR #3064 Fixes groupby performance issue +- PR #3061 Add rmmInitialize to nvstrings gtests +- PR #3058 Fix UDF doc markdown formatting +- PR #3059 Add nvstrings python build instructions to contributing.md + + +# cuDF 0.9.0 (21 Aug 2019) + +## New Features + +- PR #1993 Add CUDA-accelerated series aggregations: mean, var, std +- PR #2111 IO Readers: Support memory buffer, file-like object, and URL inputs +- PR #2012 Add `reindex()` to DataFrame and Series +- PR #2097 Add GPU-accelerated AVRO reader +- PR #2098 Support binary ops on DFs and Series with mismatched indices +- PR #2160 Merge `dask-cudf` codebase into `cudf` repo +- PR #2149 CSV Reader: Add `hex` dtype for explicit hexadecimal parsing +- PR #2156 Add `upper_bound()` and `lower_bound()` for libcudf tables and `searchsorted()` for cuDF Series +- PR #2158 CSV Reader: Support single, non-list/dict argument for `dtype` +- PR #2177 CSV Reader: Add `parse_dates` parameter for explicit date inference +- PR #1744 cudf::apply_boolean_mask and cudf::drop_nulls support for cudf::table inputs (multi-column) +- PR #2196 Add `DataFrame.dropna()` +- PR #2197 CSV Writer: add `chunksize` parameter for `to_csv` +- PR #2215 `type_dispatcher` benchmark +- PR #2179 Add Java quantiles +- PR #2157 Add __array_function__ to DataFrame and Series +- PR #2212 Java support for ORC reader +- PR #2224 Add DataFrame isna, isnull, notna functions +- PR #2236 Add Series.drop_duplicates +- PR #2105 Add hash-based join benchmark +- PR #2316 Add unique, nunique, and value_counts for datetime columns +- PR #2337 Add Java support for slicing a ColumnVector +- PR #2049 Add cudf::merge (sorted merge) +- PR #2368 Full cudf+dask Parquet Support +- PR #2380 New cudf::is_sorted checks whether cudf::table is sorted +- PR #2356 Java column vector standard deviation support +- PR #2221 MultiIndex full indexing - Support iloc and wildcards for loc +- PR #2429 Java support for getting length of strings in a ColumnVector +- PR #2415 Add `value_counts` for series of any type +- PR #2446 Add __array_function__ for index +- PR #2437 ORC reader: Add 'use_np_dtypes' option +- PR #2382 Add CategoricalAccessor add, remove, rename, and ordering methods +- PR #2464 Native implement `__cuda_array_interface__` for Series/Index/Column objects +- PR #2425 Rolling window now accepts array-based user-defined functions +- PR #2442 Add __setitem__ +- PR #2449 Java support for getting byte count of strings in a ColumnVector +- PR #2492 Add groupby.size() method +- PR #2358 Add cudf::nans_to_nulls: convert floating point column into bitmask +- PR #2489 Add drop argument to set_index +- PR #2491 Add Java bindings for ORC reader 'use_np_dtypes' option +- PR #2213 Support s/ms/us/ns DatetimeColumn time unit resolutions +- PR #2536 Add _constructor properties to Series and DataFrame + +## Improvements + +- PR #2103 Move old `column` and `bitmask` files into `legacy/` directory +- PR #2109 added name to Python column classes +- PR #1947 Cleanup serialization code +- PR #2125 More aggregate in java API +- PR #2127 Add in java Scalar tests +- PR #2088 Refactor of Python groupby code +- PR #2130 Java serialization and deserialization of tables. +- PR #2131 Chunk rows logic added to csv_writer +- PR #2129 Add functions in the Java API to support nullable column filtering +- PR #2165 made changes to get_dummies api for it to be available in MethodCache +- PR #2171 Add CodeCov integration, fix doc version, make --skip-tests work when invoking with source +- PR #2184 handle remote orc files for dask-cudf +- PR #2186 Add `getitem` and `getattr` style access to Rolling objects +- PR #2168 Use cudf.Column for CategoricalColumn's categories instead of a tuple +- PR #2193 DOC: cudf::type_dispatcher documentation for specializing dispatched functors +- PR #2199 Better java support for appending strings +- PR #2176 Added column dtype support for datetime, int8, int16 to csv_writer +- PR #2209 Matching `get_dummies` & `select_dtypes` behavior to pandas +- PR #2217 Updated Java bindings to use the new groupby API +- PR #2214 DOC: Update doc instructions to build/install `cudf` and `dask-cudf` +- PR #2220 Update Java bindings for reduction rename +- PR #2232 Move CodeCov upload from build script to Jenkins +- PR #2225 refactor to use libcudf for gathering columns in dataframes +- PR #2293 Improve join performance (faster compute_join_output_size) +- PR #2300 Create separate dask codeowners for dask-cudf codebase +- PR #2304 gdf_group_by_without_aggregations returns gdf_column +- PR #2309 Java readers: remove redundant copy of result pointers +- PR #2307 Add `black` and `isort` to style checker script +- PR #2345 Restore removal of old groupby implementation +- PR #2342 Improve `astype()` to operate all ways +- PR #2329 using libcudf cudf::copy for column deep copy +- PR #2344 DOC: docs on code formatting for contributors +- PR #2376 Add inoperative axis= and win_type= arguments to Rolling() +- PR #2378 remove dask for (de-)serialization of cudf objects +- PR #2353 Bump Arrow and Dask versions +- PR #2377 Replace `standard_python_slice` with just `slice.indices()` +- PR #2373 cudf.DataFrame enhancements & Series.values support +- PR #2392 Remove dlpack submodule; make cuDF's Cython API externally accessible +- PR #2430 Updated Java bindings to use the new unary API +- PR #2406 Moved all existing `table` related files to a `legacy/` directory +- PR #2350 Performance related changes to get_dummies +- PR #2420 Remove `cudautils.astype` and replace with `typecast.apply_cast` +- PR #2456 Small improvement to typecast utility +- PR #2458 Fix handling of thirdparty packages in `isort` config +- PR #2459 IO Readers: Consolidate all readers to use `datasource` class +- PR #2475 Exposed type_dispatcher.hpp, nvcategory_util.hpp and wrapper_types.hpp in the include folder +- PR #2484 Enabled building libcudf as a static library +- PR #2453 Streamline CUDA_REL environment variable +- PR #2483 Bundle Boost filesystem dependency in the Java jar +- PR #2486 Java API hash functions +- PR #2481 Adds the ignore_null_keys option to the java api +- PR #2490 Java api: support multiple aggregates for the same column +- PR #2510 Java api: uses table based apply_boolean_mask +- PR #2432 Use pandas formatting for console, html, and latex output +- PR #2573 Bump numba version to 0.45.1 +- PR #2606 Fix references to notebooks-contrib + +## Bug Fixes + +- PR #2086 Fixed quantile api behavior mismatch in series & dataframe +- PR #2128 Add offset param to host buffer readers in java API. +- PR #2145 Work around binops validity checks for java +- PR #2146 Work around unary_math validity checks for java +- PR #2151 Fixes bug in cudf::copy_range where null_count was invalid +- PR #2139 matching to pandas describe behavior & fixing nan values issue +- PR #2161 Implicitly convert unsigned to signed integer types in binops +- PR #2154 CSV Reader: Fix bools misdetected as strings dtype +- PR #2178 Fix bug in rolling bindings where a view of an ephemeral column was being taken +- PR #2180 Fix issue with isort reordering `importorskip` below imports depending on them +- PR #2187 fix to honor dtype when numpy arrays are passed to columnops.as_column +- PR #2190 Fix issue in astype conversion of string column to 'str' +- PR #2208 Fix issue with calling `head()` on one row dataframe +- PR #2229 Propagate exceptions from Cython cdef functions +- PR #2234 Fix issue with local build script not properly building +- PR #2223 Fix CUDA invalid configuration errors reported after loading small compressed ORC files +- PR #2162 Setting is_unique and is_monotonic-related attributes +- PR #2244 Fix ORC RLEv2 delta mode decoding with nonzero residual delta width +- PR #2297 Work around `var/std` unsupported only at debug build +- PR #2302 Fixed java serialization corner case +- PR #2355 Handle float16 in binary operations +- PR #2311 Fix copy behaviour for GenericIndex +- PR #2349 Fix issues with String filter in java API +- PR #2323 Fix groupby on categoricals +- PR #2328 Ensure order is preserved in CategoricalAccessor._set_categories +- PR #2202 Fix issue with unary ops mishandling empty input +- PR #2326 Fix for bug in DLPack when reading multiple columns +- PR #2324 Fix cudf Docker build +- PR #2325 Fix ORC RLEv2 patched base mode decoding with nonzero patch width +- PR #2235 Fix get_dummies to be compatible with dask +- PR #2332 Zero initialize gdf_dtype_extra_info +- PR #2355 Handle float16 in binary operations +- PR #2360 Fix missing dtype handling in cudf.Series & columnops.as_column +- PR #2364 Fix quantile api and other trivial issues around it +- PR #2361 Fixed issue with `codes` of CategoricalIndex +- PR #2357 Fixed inconsistent type of index created with from_pandas vs direct construction +- PR #2389 Fixed Rolling __getattr__ and __getitem__ for offset based windows +- PR #2402 Fixed bug in valid mask computation in cudf::copy_if (apply_boolean_mask) +- PR #2401 Fix to a scalar datetime(of type Days) issue +- PR #2386 Correctly allocate output valids in groupby +- PR #2411 Fixed failures on binary op on single element string column +- PR #2422 Fix Pandas logical binary operation incompatibilites +- PR #2447 Fix CodeCov posting build statuses temporarily +- PR #2450 Fix erroneous null handling in `cudf.DataFrame`'s `apply_rows` +- PR #2470 Fix issues with empty strings and string categories (Java) +- PR #2471 Fix String Column Validity. +- PR #2481 Fix java validity buffer serialization +- PR #2485 Updated bytes calculation to use size_t to avoid overflow in column concat +- PR #2461 Fix groupby multiple aggregations same column +- PR #2514 Fix cudf::drop_nulls threshold handling in Cython +- PR #2516 Fix utilities include paths and meta.yaml header paths +- PR #2517 Fix device memory leak in to_dlpack tensor deleter +- PR #2431 Fix local build generated file ownerships +- PR #2511 Added import of orc, refactored exception handlers to not squash fatal exceptions +- PR #2527 Fix index and column input handling in dask_cudf read_parquet +- PR #2466 Fix `dataframe.query` returning null rows erroneously +- PR #2548 Orc reader: fix non-deterministic data decoding at chunk boundaries +- PR #2557 fix cudautils import in string.py +- PR #2521 Fix casting datetimes from/to the same resolution +- PR #2545 Fix MultiIndexes with datetime levels +- PR #2560 Remove duplicate `dlpack` definition in conda recipe +- PR #2567 Fix ColumnVector.fromScalar issues while dealing with null scalars +- PR #2565 Orc reader: fix incorrect data decoding of int64 data types +- PR #2577 Fix search benchmark compilation error by adding necessary header +- PR #2604 Fix a bug in copying.pyx:_normalize_types that upcasted int32 to int64 + + +# cuDF 0.8.0 (27 June 2019) + +## New Features + +- PR #1524 Add GPU-accelerated JSON Lines parser with limited feature set +- PR #1569 Add support for Json objects to the JSON Lines reader +- PR #1622 Add Series.loc +- PR #1654 Add cudf::apply_boolean_mask: faster replacement for gdf_apply_stencil +- PR #1487 cython gather/scatter +- PR #1310 Implemented the slice/split functionality. +- PR #1630 Add Python layer to the GPU-accelerated JSON reader +- PR #1745 Add rounding of numeric columns via Numba +- PR #1772 JSON reader: add support for BytesIO and StringIO input +- PR #1527 Support GDF_BOOL8 in readers and writers +- PR #1819 Logical operators (AND, OR, NOT) for libcudf and cuDF +- PR #1813 ORC Reader: Add support for stripe selection +- PR #1828 JSON Reader: add support for bool8 columns +- PR #1833 Add column iterator with/without nulls +- PR #1665 Add the point-in-polygon GIS function +- PR #1863 Series and Dataframe methods for all and any +- PR #1908 cudf::copy_range and cudf::fill for copying/assigning an index or range to a constant +- PR #1921 Add additional formats for typecasting to/from strings +- PR #1807 Add Series.dropna() +- PR #1987 Allow user defined functions in the form of ptx code to be passed to binops +- PR #1948 Add operator functions like `Series.add()` to DataFrame and Series +- PR #1954 Add skip test argument to GPU build script +- PR #2018 Add bindings for new groupby C++ API +- PR #1984 Add rolling window operations Series.rolling() and DataFrame.rolling() +- PR #1542 Python method and bindings for to_csv +- PR #1995 Add Java API +- PR #1998 Add google benchmark to cudf +- PR #1845 Add cudf::drop_duplicates, DataFrame.drop_duplicates +- PR #1652 Added `Series.where()` feature +- PR #2074 Java Aggregates, logical ops, and better RMM support +- PR #2140 Add a `cudf::transform` function +- PR #2068 Concatenation of different typed columns + +## Improvements + +- PR #1538 Replacing LesserRTTI with inequality_comparator +- PR #1703 C++: Added non-aggregating `insert` to `concurrent_unordered_map` with specializations to store pairs with a single atomicCAS when possible. +- PR #1422 C++: Added a RAII wrapper for CUDA streams +- PR #1701 Added `unique` method for stringColumns +- PR #1713 Add documentation for Dask-XGBoost +- PR #1666 CSV Reader: Improve performance for files with large number of columns +- PR #1725 Enable the ability to use a single column groupby as its own index +- PR #1759 Add an example showing simultaneous rolling averages to `apply_grouped` documentation +- PR #1746 C++: Remove unused code: `windowed_ops.cu`, `sorting.cu`, `hash_ops.cu` +- PR #1748 C++: Add `bool` nullability flag to `device_table` row operators +- PR #1764 Improve Numerical column: `mean_var` and `mean` +- PR #1767 Speed up Python unit tests +- PR #1770 Added build.sh script, updated CI scripts and documentation +- PR #1739 ORC Reader: Add more pytest coverage +- PR #1696 Added null support in `Series.replace()`. +- PR #1390 Added some basic utility functions for `gdf_column`'s +- PR #1791 Added general column comparison code for testing +- PR #1795 Add printing of git submodule info to `print_env.sh` +- PR #1796 Removing old sort based group by code and gdf_filter +- PR #1811 Added functions for copying/allocating `cudf::table`s +- PR #1838 Improve columnops.column_empty so that it returns typed columns instead of a generic Column +- PR #1890 Add utils.get_dummies- a pandas-like wrapper around one_hot-encoding +- PR #1823 CSV Reader: default the column type to string for empty dataframes +- PR #1827 Create bindings for scalar-vector binops, and update one_hot_encoding to use them +- PR #1817 Operators now support different sized dataframes as long as they don't share different sized columns +- PR #1855 Transition replace_nulls to new C++ API and update corresponding Cython/Python code +- PR #1858 Add `std::initializer_list` constructor to `column_wrapper` +- PR #1846 C++ type-erased gdf_equal_columns test util; fix gdf_equal_columns logic error +- PR #1390 Added some basic utility functions for `gdf_column`s +- PR #1391 Tidy up bit-resolution-operation and bitmask class code +- PR #1882 Add iloc functionality to MultiIndex dataframes +- PR #1884 Rolling windows: general enhancements and better coverage for unit tests +- PR #1886 support GDF_STRING_CATEGORY columns in apply_boolean_mask, drop_nulls and other libcudf functions +- PR #1896 Improve performance of groupby with levels specified in dask-cudf +- PR #1915 Improve iloc performance for non-contiguous row selection +- PR #1859 Convert read_json into a C++ API +- PR #1919 Rename libcudf namespace gdf to namespace cudf +- PR #1850 Support left_on and right_on for DataFrame merge operator +- PR #1930 Specialize constructor for `cudf::bool8` to cast argument to `bool` +- PR #1938 Add default constructor for `column_wrapper` +- PR #1930 Specialize constructor for `cudf::bool8` to cast argument to `bool` +- PR #1952 consolidate libcudf public API headers in include/cudf +- PR #1949 Improved selection with boolmask using libcudf `apply_boolean_mask` +- PR #1956 Add support for nulls in `query()` +- PR #1973 Update `std::tuple` to `std::pair` in top-most libcudf APIs and C++ transition guide +- PR #1981 Convert read_csv into a C++ API +- PR #1868 ORC Reader: Support row index for speed up on small/medium datasets +- PR #1964 Added support for list-like types in Series.str.cat +- PR #2005 Use HTML5 details tag in bug report issue template +- PR #2003 Removed few redundant unit-tests from test_string.py::test_string_cat +- PR #1944 Groupby design improvements +- PR #2017 Convert `read_orc()` into a C++ API +- PR #2011 Convert `read_parquet()` into a C++ API +- PR #1756 Add documentation "10 Minutes to cuDF and dask_cuDF" +- PR #2034 Adding support for string columns concatenation using "add" binary operator +- PR #2042 Replace old "10 Minutes" guide with new guide for docs build process +- PR #2036 Make library of common test utils to speed up tests compilation +- PR #2022 Facilitating get_dummies to be a high level api too +- PR #2050 Namespace IO readers and add back free-form `read_xxx` functions +- PR #2104 Add a functional ``sort=`` keyword argument to groupby +- PR #2108 Add `find_and_replace` for StringColumn for replacing single values +- PR #1803 cuDF/CuPy interoperability documentation + +## Bug Fixes + +- PR #1465 Fix for test_orc.py and test_sparse_df.py test failures +- PR #1583 Fix underlying issue in `as_index()` that was causing `Series.quantile()` to fail +- PR #1680 Add errors= keyword to drop() to fix cudf-dask bug +- PR #1651 Fix `query` function on empty dataframe +- PR #1616 Fix CategoricalColumn to access categories by index instead of iteration +- PR #1660 Fix bug in `loc` when indexing with a column name (a string) +- PR #1683 ORC reader: fix timestamp conversion to UTC +- PR #1613 Improve CategoricalColumn.fillna(-1) performance +- PR #1642 Fix failure of CSV_TEST gdf_csv_test.SkiprowsNrows on multiuser systems +- PR #1709 Fix handling of `datetime64[ms]` in `dataframe.select_dtypes` +- PR #1704 CSV Reader: Add support for the plus sign in number fields +- PR #1687 CSV reader: return an empty dataframe for zero size input +- PR #1757 Concatenating columns with null columns +- PR #1755 Add col_level keyword argument to melt +- PR #1758 Fix df.set_index() when setting index from an empty column +- PR #1749 ORC reader: fix long strings of NULL values resulting in incorrect data +- PR #1742 Parquet Reader: Fix index column name to match PANDAS compat +- PR #1782 Update libcudf doc version +- PR #1783 Update conda dependencies +- PR #1786 Maintain the original series name in series.unique output +- PR #1760 CSV Reader: fix segfault when dtype list only includes columns from usecols list +- PR #1831 build.sh: Assuming python is in PATH instead of using PYTHON env var +- PR #1839 Raise an error instead of segfaulting when transposing a DataFrame with StringColumns +- PR #1840 Retain index correctly during merge left_on right_on +- PR #1825 cuDF: Multiaggregation Groupby Failures +- PR #1789 CSV Reader: Fix missing support for specifying `int8` and `int16` dtypes +- PR #1857 Cython Bindings: Handle `bool` columns while calling `column_view_from_NDArrays` +- PR #1849 Allow DataFrame support methods to pass arguments to the methods +- PR #1847 Fixed #1375 by moving the nvstring check into the wrapper function +- PR #1864 Fixing cudf reduction for POWER platform +- PR #1869 Parquet reader: fix Dask timestamps not matching with Pandas (convert to milliseconds) +- PR #1876 add dtype=bool for `any`, `all` to treat integer column correctly +- PR #1875 CSV reader: take NaN values into account in dtype detection +- PR #1873 Add column dtype checking for the all/any methods +- PR #1902 Bug with string iteration in _apply_basic_agg +- PR #1887 Fix for initialization issue in pq_read_arg,orc_read_arg +- PR #1867 JSON reader: add support for null/empty fields, including the 'null' literal +- PR #1891 Fix bug #1750 in string column comparison +- PR #1909 Support of `to_pandas()` of boolean series with null values +- PR #1923 Use prefix removal when two aggs are called on a SeriesGroupBy +- PR #1914 Zero initialize gdf_column local variables +- PR #1959 Add support for comparing boolean Series to scalar +- PR #1966 Ignore index fix in series append +- PR #1967 Compute index __sizeof__ only once for DataFrame __sizeof__ +- PR #1977 Support CUDA installation in default system directories +- PR #1982 Fixes incorrect index name after join operation +- PR #1985 Implement `GDF_PYMOD`, a special modulo that follows python's sign rules +- PR #1991 Parquet reader: fix decoding of NULLs +- PR #1990 Fixes a rendering bug in the `apply_grouped` documentation +- PR #1978 Fix for values being filled in an empty dataframe +- PR #2001 Correctly create MultiColumn from Pandas MultiColumn +- PR #2006 Handle empty dataframe groupby construction for dask +- PR #1965 Parquet Reader: Fix duplicate index column when it's already in `use_cols` +- PR #2033 Add pip to conda environment files to fix warning +- PR #2028 CSV Reader: Fix reading of uncompressed files without a recognized file extension +- PR #2073 Fix an issue when gathering columns with NVCategory and nulls +- PR #2053 cudf::apply_boolean_mask return empty column for empty boolean mask +- PR #2066 exclude `IteratorTest.mean_var_output` test from debug build +- PR #2069 Fix JNI code to use read_csv and read_parquet APIs +- PR #2071 Fix bug with unfound transitive dependencies for GTests in Ubuntu 18.04 +- PR #2089 Configure Sphinx to render params correctly +- PR #2091 Fix another bug with unfound transitive dependencies for `cudftestutils` in Ubuntu 18.04 +- PR #2115 Just apply `--disable-new-dtags` instead of trying to define all the transitive dependencies +- PR #2106 Fix errors in JitCache tests caused by sharing of device memory between processes +- PR #2120 Fix errors in JitCache tests caused by running multiple threads on the same data +- PR #2102 Fix memory leak in groupby +- PR #2113 fixed typo in to_csv code example + + +# cudf 0.7.2 (16 May 2019) + +## New Features + +- PR #1735 Added overload for atomicAdd on int64. Streamlined implementation of custom atomic overloads. +- PR #1741 Add MultiIndex concatenation + +## Bug Fixes + +- PR #1718 Fix issue with SeriesGroupBy MultiIndex in dask-cudf +- PR #1734 Python: fix performance regression for groupby count() aggregations +- PR #1768 Cython: fix handling read only schema buffers in gpuarrow reader + + +# cudf 0.7.1 (11 May 2019) + +## New Features + +- PR #1702 Lazy load MultiIndex to return groupby performance to near optimal. + +## Bug Fixes + +- PR #1708 Fix handling of `datetime64[ms]` in `dataframe.select_dtypes` + + +# cuDF 0.7.0 (10 May 2019) + +## New Features + +- PR #982 Implement gdf_group_by_without_aggregations and gdf_unique_indices functions +- PR #1142 Add `GDF_BOOL` column type +- PR #1194 Implement overloads for CUDA atomic operations +- PR #1292 Implemented Bitwise binary ops AND, OR, XOR (&, |, ^) +- PR #1235 Add GPU-accelerated Parquet Reader +- PR #1335 Added local_dict arg in `DataFrame.query()`. +- PR #1282 Add Series and DataFrame.describe() +- PR #1356 Rolling windows +- PR #1381 Add DataFrame._get_numeric_data +- PR #1388 Add CODEOWNERS file to auto-request reviews based on where changes are made +- PR #1396 Add DataFrame.drop method +- PR #1413 Add DataFrame.melt method +- PR #1412 Add DataFrame.pop() +- PR #1419 Initial CSV writer function +- PR #1441 Add Series level cumulative ops (cumsum, cummin, cummax, cumprod) +- PR #1420 Add script to build and test on a local gpuCI image +- PR #1440 Add DatetimeColumn.min(), DatetimeColumn.max() +- PR #1455 Add Series.Shift via Numba kernel +- PR #1441 Add Series level cumulative ops (cumsum, cummin, cummax, cumprod) +- PR #1461 Add Python coverage test to gpu build +- PR #1445 Parquet Reader: Add selective reading of rows and row group +- PR #1532 Parquet Reader: Add support for INT96 timestamps +- PR #1516 Add Series and DataFrame.ndim +- PR #1556 Add libcudf C++ transition guide +- PR #1466 Add GPU-accelerated ORC Reader +- PR #1565 Add build script for nightly doc builds +- PR #1508 Add Series isna, isnull, and notna +- PR #1456 Add Series.diff() via Numba kernel +- PR #1588 Add Index `astype` typecasting +- PR #1301 MultiIndex support +- PR #1599 Level keyword supported in groupby +- PR #929 Add support operations to dataframe +- PR #1609 Groupby accept list of Series +- PR #1658 Support `group_keys=True` keyword in groupby method + +## Improvements + +- PR #1531 Refactor closures as private functions in gpuarrow +- PR #1404 Parquet reader page data decoding speedup +- PR #1076 Use `type_dispatcher` in join, quantiles, filter, segmented sort, radix sort and hash_groupby +- PR #1202 Simplify README.md +- PR #1149 CSV Reader: Change convertStrToValue() functions to `__device__` only +- PR #1238 Improve performance of the CUDA trie used in the CSV reader +- PR #1245 Use file cache for JIT kernels +- PR #1278 Update CONTRIBUTING for new conda environment yml naming conventions +- PR #1163 Refactored UnaryOps. Reduced API to two functions: `gdf_unary_math` and `gdf_cast`. Added `abs`, `-`, and `~` ops. Changed bindings to Cython +- PR #1284 Update docs version +- PR #1287 add exclude argument to cudf.select_dtype function +- PR #1286 Refactor some of the CSV Reader kernels into generic utility functions +- PR #1291 fillna in `Series.to_gpu_array()` and `Series.to_array()` can accept the scalar too now. +- PR #1005 generic `reduction` and `scan` support +- PR #1349 Replace modernGPU sort join with thrust. +- PR #1363 Add a dataframe.mean(...) that raises NotImplementedError to satisfy `dask.dataframe.utils.is_dataframe_like` +- PR #1319 CSV Reader: Use column wrapper for gdf_column output alloc/dealloc +- PR #1376 Change series quantile default to linear +- PR #1399 Replace CFFI bindings for NVTX functions with Cython bindings +- PR #1389 Refactored `set_null_count()` +- PR #1386 Added macros `GDF_TRY()`, `CUDF_TRY()` and `ASSERT_CUDF_SUCCEEDED()` +- PR #1435 Rework CMake and conda recipes to depend on installed libraries +- PR #1391 Tidy up bit-resolution-operation and bitmask class code +- PR #1439 Add cmake variable to enable compiling CUDA code with -lineinfo +- PR #1462 Add ability to read parquet files from arrow::io::RandomAccessFile +- PR #1453 Convert CSV Reader CFFI to Cython +- PR #1479 Convert Parquet Reader CFFI to Cython +- PR #1397 Add a utility function for producing an overflow-safe kernel launch grid configuration +- PR #1382 Add GPU parsing of nested brackets to cuIO parsing utilities +- PR #1481 Add cudf::table constructor to allocate a set of `gdf_column`s +- PR #1484 Convert GroupBy CFFI to Cython +- PR #1463 Allow and default melt keyword argument var_name to be None +- PR #1486 Parquet Reader: Use device_buffer rather than device_ptr +- PR #1525 Add cudatoolkit conda dependency +- PR #1520 Renamed `src/dataframe` to `src/table` and moved `table.hpp`. Made `types.hpp` to be type declarations only. +- PR #1492 Convert transpose CFFI to Cython +- PR #1495 Convert binary and unary ops CFFI to Cython +- PR #1503 Convert sorting and hashing ops CFFI to Cython +- PR #1522 Use latest release version in update-version CI script +- PR #1533 Remove stale join CFFI, fix memory leaks in join Cython +- PR #1521 Added `row_bitmask` to compute bitmask for rows of a table. Merged `valids_ops.cu` and `bitmask_ops.cu` +- PR #1553 Overload `hash_row` to avoid using initial hash values. Updated `gdf_hash` to select between overloads +- PR #1585 Updated `cudf::table` to maintain own copy of wrapped `gdf_column*`s +- PR #1559 Add `except +` to all Cython function definitions to catch C++ exceptions properly +- PR #1617 `has_nulls` and `column_dtypes` for `cudf::table` +- PR #1590 Remove CFFI from the build / install process entirely +- PR #1536 Convert gpuarrow CFFI to Cython +- PR #1655 Add `Column._pointer` as a way to access underlying `gdf_column*` of a `Column` +- PR #1655 Update readme conda install instructions for cudf version 0.6 and 0.7 + + +## Bug Fixes + +- PR #1233 Fix dtypes issue while adding the column to `str` dataframe. +- PR #1254 CSV Reader: fix data type detection for floating-point numbers in scientific notation +- PR #1289 Fix looping over each value instead of each category in concatenation +- PR #1293 Fix Inaccurate error message in join.pyx +- PR #1308 Add atomicCAS overload for `int8_t`, `int16_t` +- PR #1317 Fix catch polymorphic exception by reference in ipc.cu +- PR #1325 Fix dtype of null bitmasks to int8 +- PR #1326 Update build documentation to use -DCMAKE_CXX11_ABI=ON +- PR #1334 Add "na_position" argument to CategoricalColumn sort_by_values +- PR #1321 Fix out of bounds warning when checking Bzip2 header +- PR #1359 Add atomicAnd/Or/Xor for integers +- PR #1354 Fix `fillna()` behaviour when replacing values with different dtypes +- PR #1347 Fixed core dump issue while passing dict_dtypes without column names in `cudf.read_csv()` +- PR #1379 Fixed build failure caused due to error: 'col_dtype' may be used uninitialized +- PR #1392 Update cudf Dockerfile and package_versions.sh +- PR #1385 Added INT8 type to `_schema_to_dtype` for use in GpuArrowReader +- PR #1393 Fixed a bug in `gdf_count_nonzero_mask()` for the case of 0 bits to count +- PR #1395 Update CONTRIBUTING to use the environment variable CUDF_HOME +- PR #1416 Fix bug at gdf_quantile_exact and gdf_quantile_appox +- PR #1421 Fix remove creation of series multiple times during `add_column()` +- PR #1405 CSV Reader: Fix memory leaks on read_csv() failure +- PR #1328 Fix CategoricalColumn to_arrow() null mask +- PR #1433 Fix NVStrings/categories includes +- PR #1432 Update NVStrings to 0.7.* to coincide with 0.7 development +- PR #1483 Modify CSV reader to avoid cropping blank quoted characters in non-string fields +- PR #1446 Merge 1275 hotfix from master into branch-0.7 +- PR #1447 Fix legacy groupby apply docstring +- PR #1451 Fix hash join estimated result size is not correct +- PR #1454 Fix local build script improperly change directory permissions +- PR #1490 Require Dask 1.1.0+ for `is_dataframe_like` test or skip otherwise. +- PR #1491 Use more specific directories & groups in CODEOWNERS +- PR #1497 Fix Thrust issue on CentOS caused by missing default constructor of host_vector elements +- PR #1498 Add missing include guard to device_atomics.cuh and separated DEVICE_ATOMICS_TEST +- PR #1506 Fix csv-write call to updated NVStrings method +- PR #1510 Added nvstrings `fillna()` function +- PR #1507 Parquet Reader: Default string data to GDF_STRING +- PR #1535 Fix doc issue to ensure correct labelling of cudf.series +- PR #1537 Fix `undefined reference` link error in HashPartitionTest +- PR #1548 Fix ci/local/build.sh README from using an incorrect image example +- PR #1551 CSV Reader: Fix integer column name indexing +- PR #1586 Fix broken `scalar_wrapper::operator==` +- PR #1591 ORC/Parquet Reader: Fix missing import for FileNotFoundError exception +- PR #1573 Parquet Reader: Fix crash due to clash with ORC reader datasource +- PR #1607 Revert change of `column.to_dense_buffer` always return by copy for performance concerns +- PR #1618 ORC reader: fix assert & data output when nrows/skiprows isn't aligned to stripe boundaries +- PR #1631 Fix failure of TYPES_TEST on some gcc-7 based systems. +- PR #1641 CSV Reader: Fix skip_blank_lines behavior with Windows line terminators ( +) +- PR #1648 ORC reader: fix non-deterministic output when skiprows is non-zero +- PR #1676 Fix groupby `as_index` behaviour with `MultiIndex` +- PR #1659 Fix bug caused by empty groupbys and multiindex slicing throwing exceptions +- PR #1656 Correct Groupby failure in dask when un-aggregable columns are left in dataframe. +- PR #1689 Fix groupby performance regression +- PR #1694 Add Cython as a runtime dependency since it's required in `setup.py` + + +# cuDF 0.6.1 (25 Mar 2019) + +## Bug Fixes + +- PR #1275 Fix CentOS exception in DataFrame.hash_partition from using value "returned" by a void function + + +# cuDF 0.6.0 (22 Mar 2019) + +## New Features + +- PR #760 Raise `FileNotFoundError` instead of `GDF_FILE_ERROR` in `read_csv` if the file does not exist +- PR #539 Add Python bindings for replace function +- PR #823 Add Doxygen configuration to enable building HTML documentation for libcudf C/C++ API +- PR #807 CSV Reader: Add byte_range parameter to specify the range in the input file to be read +- PR #857 Add Tail method for Series/DataFrame and update Head method to use iloc +- PR #858 Add series feature hashing support +- PR #871 CSV Reader: Add support for NA values, including user specified strings +- PR #893 Adds PyArrow based parquet readers / writers to Python, fix category dtype handling, fix arrow ingest buffer size issues +- PR #867 CSV Reader: Add support for ignoring blank lines and comment lines +- PR #887 Add Series digitize method +- PR #895 Add Series groupby +- PR #898 Add DataFrame.groupby(level=0) support +- PR #920 Add feather, JSON, HDF5 readers / writers from PyArrow / Pandas +- PR #888 CSV Reader: Add prefix parameter for column names, used when parsing without a header +- PR #913 Add DLPack support: convert between cuDF DataFrame and DLTensor +- PR #939 Add ORC reader from PyArrow +- PR #918 Add Series.groupby(level=0) support +- PR #906 Add binary and comparison ops to DataFrame +- PR #958 Support unary and binary ops on indexes +- PR #964 Add `rename` method to `DataFrame`, `Series`, and `Index` +- PR #985 Add `Series.to_frame` method +- PR #985 Add `drop=` keyword to reset_index method +- PR #994 Remove references to pygdf +- PR #990 Add external series groupby support +- PR #988 Add top-level merge function to cuDF +- PR #992 Add comparison binaryops to DateTime columns +- PR #996 Replace relative path imports with absolute paths in tests +- PR #995 CSV Reader: Add index_col parameter to specify the column name or index to be used as row labels +- PR #1004 Add `from_gpu_matrix` method to DataFrame +- PR #997 Add property index setter +- PR #1007 Replace relative path imports with absolute paths in cudf +- PR #1013 select columns with df.columns +- PR #1016 Rename Series.unique_count() to nunique() to match pandas API +- PR #947 Prefixsum to handle nulls and float types +- PR #1029 Remove rest of relative path imports +- PR #1021 Add filtered selection with assignment for Dataframes +- PR #872 Adding NVCategory support to cudf apis +- PR #1052 Add left/right_index and left/right_on keywords to merge +- PR #1091 Add `indicator=` and `suffixes=` keywords to merge +- PR #1107 Add unsupported keywords to Series.fillna +- PR #1032 Add string support to cuDF python +- PR #1136 Removed `gdf_concat` +- PR #1153 Added function for getting the padded allocation size for valid bitmask +- PR #1148 Add cudf.sqrt for dataframes and Series +- PR #1159 Add Python bindings for libcudf dlpack functions +- PR #1155 Add __array_ufunc__ for DataFrame and Series for sqrt +- PR #1168 to_frame for series accepts a name argument + + +## Improvements + +- PR #1218 Add dask-cudf page to API docs +- PR #892 Add support for heterogeneous types in binary ops with JIT +- PR #730 Improve performance of `gdf_table` constructor +- PR #561 Add Doxygen style comments to Join CUDA functions +- PR #813 unified libcudf API functions by replacing gpu_ with gdf_ +- PR #822 Add support for `__cuda_array_interface__` for ingest +- PR #756 Consolidate common helper functions from unordered map and multimap +- PR #753 Improve performance of groupby sum and average, especially for cases with few groups. +- PR #836 Add ingest support for arrow chunked arrays in Column, Series, DataFrame creation +- PR #763 Format doxygen comments for csv_read_arg struct +- PR #532 CSV Reader: Use type dispatcher instead of switch block +- PR #694 Unit test utilities improvements +- PR #878 Add better indexing to Groupby +- PR #554 Add `empty` method and `is_monotonic` attribute to `Index` +- PR #1040 Fixed up Doxygen comment tags +- PR #909 CSV Reader: Avoid host->device->host copy for header row data +- PR #916 Improved unit testing and error checking for `gdf_column_concat` +- PR #941 Replace `numpy` call in `Series.hash_encode` with `numba` +- PR #942 Added increment/decrement operators for wrapper types +- PR #943 Updated `count_nonzero_mask` to return `num_rows` when the mask is null +- PR #952 Added trait to map C++ type to `gdf_dtype` +- PR #966 Updated RMM submodule. +- PR #998 Add IO reader/writer modules to API docs, fix for missing cudf.Series docs +- PR #1017 concatenate along columns for Series and DataFrames +- PR #1002 Support indexing a dataframe with another boolean dataframe +- PR #1018 Better concatenation for Series and Dataframes +- PR #1036 Use Numpydoc style docstrings +- PR #1047 Adding gdf_dtype_extra_info to gdf_column_view_augmented +- PR #1054 Added default ctor to SerialTrieNode to overcome Thrust issue in CentOS7 + CUDA10 +- PR #1024 CSV Reader: Add support for hexadecimal integers in integral-type columns +- PR #1033 Update `fillna()` to use libcudf function `gdf_replace_nulls` +- PR #1066 Added inplace assignment for columns and select_dtypes for dataframes +- PR #1026 CSV Reader: Change the meaning and type of the quoting parameter to match Pandas +- PR #1100 Adds `CUDF_EXPECTS` error-checking macro +- PR #1092 Fix select_dtype docstring +- PR #1111 Added cudf::table +- PR #1108 Sorting for datetime columns +- PR #1120 Return a `Series` (not a `Column`) from `Series.cat.set_categories()` +- PR #1128 CSV Reader: The last data row does not need to be line terminated +- PR #1183 Bump Arrow version to 0.12.1 +- PR #1208 Default to CXX11_ABI=ON +- PR #1252 Fix NVStrings dependencies for cuda 9.2 and 10.0 +- PR #2037 Optimize the existing `gather` and `scatter` routines in `libcudf` + +## Bug Fixes + +- PR #821 Fix flake8 issues revealed by flake8 update +- PR #808 Resolved renamed `d_columns_valids` variable name +- PR #820 CSV Reader: fix the issue where reader adds additional rows when file uses + as a line terminator +- PR #780 CSV Reader: Fix scientific notation parsing and null values for empty quotes +- PR #815 CSV Reader: Fix data parsing when tabs are present in the input CSV file +- PR #850 Fix bug where left joins where the left df has 0 rows causes a crash +- PR #861 Fix memory leak by preserving the boolean mask index +- PR #875 Handle unnamed indexes in to/from arrow functions +- PR #877 Fix ingest of 1 row arrow tables in from arrow function +- PR #876 Added missing `` include +- PR #889 Deleted test_rmm.py which has now moved to RMM repo +- PR #866 Merge v0.5.1 numpy ABI hotfix into 0.6 +- PR #917 value_counts return int type on empty columns +- PR #611 Renamed `gdf_reduce_optimal_output_size()` -> `gdf_reduction_get_intermediate_output_size()` +- PR #923 fix index for negative slicing for cudf dataframe and series +- PR #927 CSV Reader: Fix category GDF_CATEGORY hashes not being computed properly +- PR #921 CSV Reader: Fix parsing errors with delim_whitespace, quotations in the header row, unnamed columns +- PR #933 Fix handling objects of all nulls in series creation +- PR #940 CSV Reader: Fix an issue where the last data row is missing when using byte_range +- PR #945 CSV Reader: Fix incorrect datetime64 when milliseconds or space separator are used +- PR #959 Groupby: Problem with column name lookup +- PR #950 Converting dataframe/recarry with non-contiguous arrays +- PR #963 CSV Reader: Fix another issue with missing data rows when using byte_range +- PR #999 Fix 0 sized kernel launches and empty sort_index exception +- PR #993 Fix dtype in selecting 0 rows from objects +- PR #1009 Fix performance regression in `to_pandas` method on DataFrame +- PR #1008 Remove custom dask communication approach +- PR #1001 CSV Reader: Fix a memory access error when reading a large (>2GB) file with date columns +- PR #1019 Binary Ops: Fix error when one input column has null mask but other doesn't +- PR #1014 CSV Reader: Fix false positives in bool value detection +- PR #1034 CSV Reader: Fix parsing floating point precision and leading zero exponents +- PR #1044 CSV Reader: Fix a segfault when byte range aligns with a page +- PR #1058 Added support for `DataFrame.loc[scalar]` +- PR #1060 Fix column creation with all valid nan values +- PR #1073 CSV Reader: Fix an issue where a column name includes the return character +- PR #1090 Updating Doxygen Comments +- PR #1080 Fix dtypes returned from loc / iloc because of lists +- PR #1102 CSV Reader: Minor fixes and memory usage improvements +- PR #1174: Fix release script typo +- PR #1137 Add prebuild script for CI +- PR #1118 Enhanced the `DataFrame.from_records()` feature +- PR #1129 Fix join performance with index parameter from using numpy array +- PR #1145 Issue with .agg call on multi-column dataframes +- PR #908 Some testing code cleanup +- PR #1167 Fix issue with null_count not being set after inplace fillna() +- PR #1184 Fix iloc performance regression +- PR #1185 Support left_on/right_on and also on=str in merge +- PR #1200 Fix allocating bitmasks with numba instead of rmm in allocate_mask function +- PR #1213 Fix bug with csv reader requesting subset of columns using wrong datatype +- PR #1223 gpuCI: Fix label on rapidsai channel on gpu build scripts +- PR #1242 Add explicit Thrust exec policy to fix NVCATEGORY_TEST segfault on some platforms +- PR #1246 Fix categorical tests that failed due to bad implicit type conversion +- PR #1255 Fix overwriting conda package main label uploads +- PR #1259 Add dlpack includes to pip build + + +# cuDF 0.5.1 (05 Feb 2019) + +## Bug Fixes + +- PR #842 Avoid using numpy via cimport to prevent ABI issues in Cython compilation + + +# cuDF 0.5.0 (28 Jan 2019) + +## New Features + +- PR #722 Add bzip2 decompression support to `read_csv()` +- PR #693 add ZLIB-based GZIP/ZIP support to `read_csv_strings()` +- PR #411 added null support to gdf_order_by (new API) and cudf_table::sort +- PR #525 Added GitHub Issue templates for bugs, documentation, new features, and questions +- PR #501 CSV Reader: Add support for user-specified decimal point and thousands separator to read_csv_strings() +- PR #455 CSV Reader: Add support for user-specified decimal point and thousands separator to read_csv() +- PR #439 add `DataFrame.drop` method similar to pandas +- PR #356 add `DataFrame.transpose` method and `DataFrame.T` property similar to pandas +- PR #505 CSV Reader: Add support for user-specified boolean values +- PR #350 Implemented Series replace function +- PR #490 Added print_env.sh script to gather relevant environment details when reporting cuDF issues +- PR #474 add ZLIB-based GZIP/ZIP support to `read_csv()` +- PR #547 Added melt similar to `pandas.melt()` +- PR #491 Add CI test script to check for updates to CHANGELOG.md in PRs +- PR #550 Add CI test script to check for style issues in PRs +- PR #558 Add CI scripts for cpu-based conda and gpu-based test builds +- PR #524 Add Boolean Indexing +- PR #564 Update python `sort_values` method to use updated libcudf `gdf_order_by` API +- PR #509 CSV Reader: Input CSV file can now be passed in as a text or a binary buffer +- PR #607 Add `__iter__` and iteritems to DataFrame class +- PR #643 added a new api gdf_replace_nulls that allows a user to replace nulls in a column + +## Improvements + +- PR #426 Removed sort-based groupby and refactored existing groupby APIs. Also improves C++/CUDA compile time. +- PR #461 Add `CUDF_HOME` variable in README.md to replace relative pathing. +- PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy +- PR #500 Improved the concurrent hash map class to support partitioned (multi-pass) hash table building. +- PR #454 Improve CSV reader docs and examples +- PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**` +- PR #513 `.gitignore` tweaks +- PR #521 Add `assert_eq` function for testing +- PR #502 Simplify Dockerfile for local dev, eliminate old conda/pip envs +- PR #549 Adds `-rdynamic` compiler flag to nvcc for Debug builds +- PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy +- PR #577 Added external C++ API for scatter/gather functions +- PR #500 Improved the concurrent hash map class to support partitioned (multi-pass) hash table building +- PR #583 Updated `gdf_size_type` to `int` +- PR #500 Improved the concurrent hash map class to support partitioned (multi-pass) hash table building +- PR #617 Added .dockerignore file. Prevents adding stale cmake cache files to the docker container +- PR #658 Reduced `JOIN_TEST` time by isolating overflow test of hash table size computation +- PR #664 Added Debugging instructions to README +- PR #651 Remove noqa marks in `__init__.py` files +- PR #671 CSV Reader: uncompressed buffer input can be parsed without explicitly specifying compression as None +- PR #684 Make RMM a submodule +- PR #718 Ensure sum, product, min, max methods pandas compatibility on empty datasets +- PR #720 Refactored Index classes to make them more Pandas-like, added CategoricalIndex +- PR #749 Improve to_arrow and from_arrow Pandas compatibility +- PR #766 Remove TravisCI references, remove unused variables from CMake, fix ARROW_VERSION in Cmake +- PR #773 Add build-args back to Dockerfile and handle dependencies based on environment yml file +- PR #781 Move thirdparty submodules to root and symlink in /cpp +- PR #843 Fix broken cudf/python API examples, add new methods to the API index + +## Bug Fixes + +- PR #569 CSV Reader: Fix days being off-by-one when parsing some dates +- PR #531 CSV Reader: Fix incorrect parsing of quoted numbers +- PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**` +- PR #473 Added missing include +- PR #478 CSV Reader: Add api support for auto column detection, header, mangle_dupe_cols, usecols +- PR #495 Updated README to correct where cffi pytest should be executed +- PR #501 Fix the intermittent segfault caused by the `thousands` and `compression` parameters in the csv reader +- PR #502 Simplify Dockerfile for local dev, eliminate old conda/pip envs +- PR #512 fix bug for `on` parameter in `DataFrame.merge` to allow for None or single column name +- PR #511 Updated python/cudf/bindings/join.pyx to fix cudf merge printing out dtypes +- PR #513 `.gitignore` tweaks +- PR #521 Add `assert_eq` function for testing +- PR #537 Fix CMAKE_CUDA_STANDARD_REQURIED typo in CMakeLists.txt +- PR #447 Fix silent failure in initializing DataFrame from generator +- PR #545 Temporarily disable csv reader thousands test to prevent segfault (test re-enabled in PR #501) +- PR #559 Fix Assertion error while using `applymap` to change the output dtype +- PR #575 Update `print_env.sh` script to better handle missing commands +- PR #612 Prevent an exception from occurring with true division on integer series. +- PR #630 Fix deprecation warning for `pd.core.common.is_categorical_dtype` +- PR #622 Fix Series.append() behaviour when appending values with different numeric dtype +- PR #603 Fix error while creating an empty column using None. +- PR #673 Fix array of strings not being caught in from_pandas +- PR #644 Fix return type and column support of dataframe.quantile() +- PR #634 Fix create `DataFrame.from_pandas()` with numeric column names +- PR #654 Add resolution check for GDF_TIMESTAMP in Join +- PR #648 Enforce one-to-one copy required when using `numba>=0.42.0` +- PR #645 Fix cmake build type handling not setting debug options when CMAKE_BUILD_TYPE=="Debug" +- PR #669 Fix GIL deadlock when launching multiple python threads that make Cython calls +- PR #665 Reworked the hash map to add a way to report the destination partition for a key +- PR #670 CMAKE: Fix env include path taking precedence over libcudf source headers +- PR #674 Check for gdf supported column types +- PR #677 Fix 'gdf_csv_test_Dates' gtest failure due to missing nrows parameter +- PR #604 Fix the parsing errors while reading a csv file using `sep` instead of `delimiter`. +- PR #686 Fix converting nulls to NaT values when converting Series to Pandas/Numpy +- PR #689 CSV Reader: Fix behavior with skiprows+header to match pandas implementation +- PR #691 Fixes Join on empty input DFs +- PR #706 CSV Reader: Fix broken dtype inference when whitespace is in data +- PR #717 CSV reader: fix behavior when parsing a csv file with no data rows +- PR #724 CSV Reader: fix build issue due to parameter type mismatch in a std::max call +- PR #734 Prevents reading undefined memory in gpu_expand_mask_bits numba kernel +- PR #747 CSV Reader: fix an issue where CUDA allocations fail with some large input files +- PR #750 Fix race condition for handling NVStrings in CMake +- PR #719 Fix merge column ordering +- PR #770 Fix issue where RMM submodule pointed to wrong branch and pin other to correct branches +- PR #778 Fix hard coded ABI off setting +- PR #784 Update RMM submodule commit-ish and pip paths +- PR #794 Update `rmm::exec_policy` usage to fix segmentation faults when used as temporary allocator. +- PR #800 Point git submodules to branches of forks instead of exact commits + + +# cuDF 0.4.0 (05 Dec 2018) + +## New Features + +- PR #398 add pandas-compatible `DataFrame.shape()` and `Series.shape()` +- PR #394 New documentation feature "10 Minutes to cuDF" +- PR #361 CSV Reader: Add support for strings with delimiters + +## Improvements + + - PR #436 Improvements for type_dispatcher and wrapper structs + - PR #429 Add CHANGELOG.md (this file) + - PR #266 use faster CUDA-accelerated DataFrame column/Series concatenation. + - PR #379 new C++ `type_dispatcher` reduces code complexity in supporting many data types. + - PR #349 Improve performance for creating columns from memoryview objects + - PR #445 Update reductions to use type_dispatcher. Adds integer types support to sum_of_squares. + - PR #448 Improve installation instructions in README.md + - PR #456 Change default CMake build to Release, and added option for disabling compilation of tests + +## Bug Fixes + + - PR #444 Fix csv_test CUDA too many resources requested fail. + - PR #396 added missing output buffer in validity tests for groupbys. + - PR #408 Dockerfile updates for source reorganization + - PR #437 Add cffi to Dockerfile conda env, fixes "cannot import name 'librmm'" + - PR #417 Fix `map_test` failure with CUDA 10 + - PR #414 Fix CMake installation include file paths + - PR #418 Properly cast string dtypes to programmatic dtypes when instantiating columns + - PR #427 Fix and tests for Concatenation illegal memory access with nulls + + +# cuDF 0.3.0 (23 Nov 2018) + +## New Features + + - PR #336 CSV Reader string support + +## Improvements + + - PR #354 source code refactored for better organization. CMake build system overhaul. Beginning of transition to Cython bindings. + - PR #290 Add support for typecasting to/from datetime dtype + - PR #323 Add handling pyarrow boolean arrays in input/out, add tests + - PR #325 GDF_VALIDITY_UNSUPPORTED now returned for algorithms that don't support non-empty valid bitmasks + - PR #381 Faster InputTooLarge Join test completes in ms rather than minutes. + - PR #373 .gitignore improvements + - PR #367 Doc cleanup & examples for DataFrame methods + - PR #333 Add Rapids Memory Manager documentation + - PR #321 Rapids Memory Manager adds file/line location logging and convenience macros + - PR #334 Implement DataFrame `__copy__` and `__deepcopy__` + - PR #271 Add NVTX ranges to pygdf + - PR #311 Document system requirements for conda install + +## Bug Fixes + + - PR #337 Retain index on `scale()` function + - PR #344 Fix test failure due to PyArrow 0.11 Boolean handling + - PR #364 Remove noexcept from managed_allocator; CMakeLists fix for NVstrings + - PR #357 Fix bug that made all series be considered booleans for indexing + - PR #351 replace conda env configuration for developers + - PRs #346 #360 Fix CSV reading of negative numbers + - PR #342 Fix CMake to use conda-installed nvstrings + - PR #341 Preserve categorical dtype after groupby aggregations + - PR #315 ReadTheDocs build update to fix missing libcuda.so + - PR #320 FIX out-of-bounds access error in reductions.cu + - PR #319 Fix out-of-bounds memory access in libcudf count_valid_bits + - PR #303 Fix printing empty dataframe + + +# cuDF 0.2.0 and cuDF 0.1.0 + +These were initial releases of cuDF based on previously separate pyGDF and libGDF libraries. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..07537e7 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,398 @@ +# Contributing to cuDF + +Contributions to cuDF fall into the following categories: + +1. To report a bug, request a new feature, or report a problem with documentation, please file an + [issue](https://github.com/rapidsai/cudf/issues/new/choose) describing the problem or new feature + in detail. The RAPIDS team evaluates and triages issues, and schedules them for a release. If you + believe the issue needs priority attention, please comment on the issue to notify the team. +2. To propose and implement a new feature, please file a new feature request + [issue](https://github.com/rapidsai/cudf/issues/new/choose). Describe the intended feature and + discuss the design and implementation with the team and community. Once the team agrees that the + plan looks good, go ahead and implement it, using the [code contributions](#code-contributions) + guide below. +3. To implement a feature or bug fix for an existing issue, please follow the [code + contributions](#code-contributions) guide below. If you need more context on a particular issue, + please ask in a comment. + +As contributors and maintainers to this project, you are expected to abide by cuDF's code of +conduct. More information can be found at: +[Contributor Code of Conduct](https://docs.rapids.ai/resources/conduct/). + +## Code contributions + +### Your first issue + +1. Follow the guide at the bottom of this page for + [Setting up your build environment](#setting-up-your-build-environment). +2. Find an issue to work on. The best way is to look for the + [good first issue](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) + or [help wanted](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) + labels. +3. Comment on the issue stating that you are going to work on it. +4. Create a fork of the cudf repository and check out a branch with a name that + describes your planned work. For example, `fix-documentation`. +5. Write code to address the issue or implement the feature. +6. Add unit tests and unit benchmarks. +7. [Create your pull request](https://github.com/rapidsai/cudf/compare). To run continuous integration (CI) tests without requesting review, open a draft pull request. +8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks). + Fix if needed. +9. Wait for other developers to review your code and update code as needed. +10. Once reviewed and approved, a RAPIDS developer will merge your pull request. + +If you are unsure about anything, don't hesitate to comment on issues and ask for clarification! + +### Seasoned developers + +Once you have gotten your feet wet and are more comfortable with the code, you can look at the +prioritized issues for our next release in our +[project boards](https://github.com/rapidsai/cudf/projects). + +**Note:** Always look at the release board that is +[currently under development](https://docs.rapids.ai/maintainers) for issues to work on. This is +where RAPIDS developers also focus their efforts. + +Look at the unassigned issues, and find an issue to which you are comfortable contributing. Start +with _Step 3_ above, commenting on the issue to let others know you are working on it. If you have +any questions related to the implementation of the issue, ask them in the issue instead of the PR. + +## Setting up your build environment + +The following instructions are for developers and contributors to cuDF development. These +instructions are tested on Ubuntu Linux LTS releases. Use these instructions to build cuDF from +source and contribute to its development. Other operating systems may be compatible, but are not +currently tested. + +Building cudf with the provided conda environment is recommended for users who wish to enable all +library features. The following instructions are for building with a conda environment. Dependencies +for a minimal build of libcudf without using conda are also listed below. + +### General requirements + +Compilers: + +* `gcc` version 9.3+ +* `nvcc` version 11.5+ +* `cmake` version 3.26.4+ + +CUDA/GPU: + +* CUDA 11.5+ +* NVIDIA driver 450.80.02+ +* Pascal architecture or better + +You can obtain CUDA from +[https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). + +### Create the build environment + +- Clone the repository: + +```bash +CUDF_HOME=$(pwd)/cudf +git clone https://github.com/rapidsai/cudf.git $CUDF_HOME +cd $CUDF_HOME +``` + +#### Building with a conda environment + +**Note:** Using a conda environment is the easiest way to satisfy the library's dependencies. +Instructions for a minimal build environment without conda are included below. + +- Create the conda development environment: + +```bash +# create the conda environment (assuming in base `cudf` directory) +# note: RAPIDS currently doesn't support `channel_priority: strict`; +# use `channel_priority: flexible` instead +conda env create --name cudf_dev --file conda/environments/all_cuda-118_arch-x86_64.yaml +# activate the environment +conda activate cudf_dev +``` + +- **Note**: the conda environment files are updated frequently, so the + development environment may also need to be updated if dependency versions or + pinnings are changed. + +#### Building without a conda environment + +- libcudf has the following minimal dependencies (in addition to those listed in the [General + requirements](#general-requirements)). The packages listed below use Ubuntu package names: + + - `build-essential` + - `libssl-dev` + - `libz-dev` + - `libpython3-dev` (required if building cudf) + +### Build cuDF from source + +- A `build.sh` script is provided in `$CUDF_HOME`. Running the script with no additional arguments + will install the `libcudf`, `cudf` and `dask_cudf` libraries. By default, the libraries are + installed to the `$CONDA_PREFIX` directory. To install into a different location, set the location + in `$INSTALL_PREFIX`. Finally, note that the script depends on the `nvcc` executable being on your + path, or defined in `$CUDACXX`. + +```bash +cd $CUDF_HOME + +# Choose one of the following commands, depending on whether +# you want to build and install the libcudf C++ library only, +# or include the cudf and/or dask_cudf Python libraries: + +./build.sh # libcudf, cudf and dask_cudf +./build.sh libcudf # libcudf only +./build.sh libcudf cudf # libcudf and cudf only +``` + +- Other libraries like `cudf-kafka` and `custreamz` can be installed with this script. For the + complete list of libraries as well as details about the script usage, run the `help` command: + +```bash +./build.sh --help +``` + +### Build, install and test cuDF libraries for contributors + +The general workflow is provided below. Please also see the last section about +[code formatting](#code-formatting). + +#### `libcudf` (C++) + +- If you're only interested in building the library (and not the unit tests): + +```bash +cd $CUDF_HOME +./build.sh libcudf +``` + +- If, in addition, you want to build tests: + +```bash +./build.sh libcudf tests +``` + +- To run the tests: + +```bash +make test +``` + +#### `cudf` (Python) + +- First, build the `libcudf` C++ library following the steps above + +- To build and install in edit/develop `cudf` Python package: +```bash +cd $CUDF_HOME/python/cudf +python setup.py build_ext --inplace +python setup.py develop +``` + +- To run `cudf` tests: +```bash +cd $CUDF_HOME/python +pytest -v cudf/cudf/tests +``` + +#### `dask-cudf` (Python) + +- First, build the `libcudf` C++ and `cudf` Python libraries following the steps above + +- To install the `dask-cudf` Python package in editable/develop mode: +```bash +cd $CUDF_HOME/python/dask_cudf +python setup.py build_ext --inplace +python setup.py develop +``` + +- To run `dask_cudf` tests: +```bash +cd $CUDF_HOME/python +pytest -v dask_cudf +``` + +#### `libcudf_kafka` (C++) + +- If you're only interested in building the library (and not the unit tests): + +```bash +cd $CUDF_HOME +./build.sh libcudf_kafka +``` + +- If, in addition, you want to build tests: + +```bash +./build.sh libcudf_kafka tests +``` + +- To run the tests: + +```bash +make test +``` + +#### `cudf-kafka` (Python) + +- First, build the `libcudf` and `libcudf_kafka` libraries following the steps above + +- To install the `cudf-kafka` Python package in editable/develop mode: + +```bash +cd $CUDF_HOME/python/cudf_kafka +python setup.py build_ext --inplace +python setup.py develop +``` + +#### `custreamz` (Python) + +- First, build `libcudf`, `libcudf_kafka`, and `cudf_kafka` following the steps above + +- To install the `custreamz` Python package in editable/develop mode: + +```bash +cd $CUDF_HOME/python/custreamz +python setup.py build_ext --inplace +python setup.py develop +``` + +- To run `custreamz` tests : + +```bash +cd $CUDF_HOME/python +pytest -v custreamz +``` + +#### `cudf` (Java): + +- First, build the `libcudf` C++ library following the steps above + +- Then, refer to the [Java README](java/README.md) + +Done! You are ready to develop for the cuDF project. Please review the project's +[code formatting guidelines](#code-formatting). + +## Debugging cuDF + +### Building in debug mode from source + +Follow the instructions to [build from source](#build-cudf-from-source) and add `-g` to the +`./build.sh` command. + +For example: + +```bash +./build.sh libcudf -g +``` + +This builds `libcudf` in debug mode which enables some `assert` safety checks and includes symbols +in the library for debugging. + +All other steps for installing `libcudf` into your environment are the same. + +### Debugging with `cuda-gdb` and `cuda-memcheck` + +When you have a debug build of `libcudf` installed, debugging with the `cuda-gdb` and +`cuda-memcheck` is easy. + +If you are debugging a Python script, run the following: + +```bash +cuda-gdb -ex r --args python .py +``` + +```bash +cuda-memcheck python .py +``` + +### Device debug symbols + +The device debug symbols are not automatically added with the cmake `Debug` build type because it +causes a runtime delay of several minutes when loading the libcudf.so library. + +Therefore, it is recommended to add device debug symbols only to specific files by setting the `-G` +compile option locally in your `cpp/CMakeLists.txt` for that file. Here is an example of adding the +`-G` option to the compile command for `src/copying/copy.cu` source file: + +```cmake +set_source_files_properties(src/copying/copy.cu PROPERTIES COMPILE_OPTIONS "-G") +``` + +This will add the device debug symbols for this object file in `libcudf.so`. You can then use +`cuda-dbg` to debug into the kernels in that source file. + +## Code Formatting + +### Using pre-commit hooks + +cuDF uses [pre-commit](https://pre-commit.com/) to execute all code linters and formatters. These +tools ensure a consistent code format throughout the project. Using pre-commit ensures that linter +versions and options are aligned for all developers. Additionally, there is a CI check in place to +enforce that committed code follows our standards. + +To use `pre-commit`, install via `conda` or `pip`: + +```bash +conda install -c conda-forge pre-commit +``` + +```bash +pip install pre-commit +``` + +Then run pre-commit hooks before committing code: + +```bash +pre-commit run +``` + +By default, pre-commit runs on staged files (only changes and additions that will be committed). +To run pre-commit checks on all files, execute: + +```bash +pre-commit run --all-files +``` + +Optionally, you may set up the pre-commit hooks to run automatically when you make a git commit. This can be done by running: + +```bash +pre-commit install +``` + +Now code linters and formatters will be run each time you commit changes. + +You can skip these checks with `git commit --no-verify` or with the short version `git commit -n`. + +### Summary of pre-commit hooks + +The following section describes some of the core pre-commit hooks used by the repository. +See `.pre-commit-config.yaml` for a full list. + +C++/CUDA is formatted with [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html). + +[`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. +In order to run doxygen as a linter on C++/CUDA code, run + +```bash +./ci/checks/doxygen.sh +``` + +Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/), +[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/). + +cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling +mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes, +you can run `codespell -i 3 -w .` from the repository root directory. +This will bring up an interactive prompt to select which spelling fixes to apply. + +## Developer Guidelines + +The [C++ Developer Guide](cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md) includes details on contributing to libcudf C++ code. + +The [Python Developer Guide](https://docs.rapids.ai/api/cudf/stable/developer_guide/index.html) includes details on contributing to cuDF Python code. + + +## Attribution + +Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md +Portions adopted from https://github.com/dask/dask/blob/master/docs/source/develop.rst diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..18bcb43 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..64c980d --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +#
 cuDF - GPU DataFrames
+ +**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cudf/blob/main/README.md) ensure you are on the `main` branch. + +## Resources + +- [cuDF Reference Documentation](https://docs.rapids.ai/api/cudf/stable/): Python API reference, tutorials, and topic guides. +- [libcudf Reference Documentation](https://docs.rapids.ai/api/libcudf/stable/): C/C++ CUDA library API reference. +- [Getting Started](https://rapids.ai/start.html): Instructions for installing cuDF. +- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate. +- [GitHub repository](https://github.com/rapidsai/cudf): Download the cuDF source code. +- [Issue tracker](https://github.com/rapidsai/cudf/issues): Report issues or request features. + +## Overview + +Built based on the [Apache Arrow](http://arrow.apache.org/) columnar memory format, cuDF is a GPU DataFrame library for loading, joining, aggregating, filtering, and otherwise manipulating data. + +cuDF provides a pandas-like API that will be familiar to data engineers & data scientists, so they can use it to easily accelerate their workflows without going into the details of CUDA programming. + +For example, the following snippet downloads a CSV, then uses the GPU to parse it into rows and columns and run calculations: +```python +import cudf, requests +from io import StringIO + +url = "https://github.com/plotly/datasets/raw/master/tips.csv" +content = requests.get(url).content.decode('utf-8') + +tips_df = cudf.read_csv(StringIO(content)) +tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100 + +# display average tip by dining party size +print(tips_df.groupby('size').tip_percentage.mean()) +``` + +Output: +``` +size +1 21.729201548727808 +2 16.571919173482897 +3 15.215685473711837 +4 14.594900639351332 +5 14.149548965142023 +6 15.622920072028379 +Name: tip_percentage, dtype: float64 +``` + +For additional examples, browse our complete [API documentation](https://docs.rapids.ai/api/cudf/stable/), or check out our more detailed [notebooks](https://github.com/rapidsai/notebooks-contrib). + +## Quick Start + +Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you're running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize cuDF. + +## Installation + + +### CUDA/GPU requirements + +* CUDA 11.2+ +* NVIDIA driver 450.80.02+ +* Pascal architecture or better (Compute Capability >=6.0) + +### Conda + +cuDF can be installed with conda (via [miniconda](https://conda.io/miniconda.html) or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel: + +```bash +conda install -c rapidsai -c conda-forge -c nvidia \ + cudf=23.10 python=3.10 cuda-version=11.8 +``` + +We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD +of our latest development branch. + +Note: cuDF is supported only on Linux, and with Python versions 3.9 and later. + +See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info. + +## Build/Install from Source +See build [instructions](CONTRIBUTING.md#setting-up-your-build-environment). + +## Contributing + +Please see our [guide for contributing to cuDF](CONTRIBUTING.md). + +## Contact + +Find out more details on the [RAPIDS site](https://rapids.ai/community.html) + +##
Open GPU Data Science + +The RAPIDS suite of open source software libraries aim to enable execution of end-to-end data science and analytics pipelines entirely on GPUs. It relies on NVIDIA® CUDA® primitives for low-level compute optimization, but exposing that GPU parallelism and high-bandwidth memory speed through user-friendly Python interfaces. + +

+ +### Apache Arrow on GPU + +The GPU version of [Apache Arrow](https://arrow.apache.org/) is a common API that enables efficient interchange of tabular data between processes running on the GPU. End-to-end computation on the GPU avoids unnecessary copying and converting of data off the GPU, reducing compute time and cost for high-performance analytics common in artificial intelligence workloads. As the name implies, cuDF uses the Apache Arrow columnar data format on the GPU. Currently, a subset of the features in Apache Arrow are supported. diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..2ad6971 --- /dev/null +++ b/build.sh @@ -0,0 +1,383 @@ +#!/bin/bash + +# Copyright (c) 2019-2023, NVIDIA CORPORATION. + +# cuDF build script + +# This script is used to build the component(s) in this repo from +# source, and can be called with various options to customize the +# build as needed (see the help output for details) +# Abort script on first error +set -e + +NUMARGS=$# +ARGS=$* + +# NOTE: ensure all dir changes are relative to the location of this +# script, and that this script resides in the repo dir! +REPODIR=$(cd $(dirname $0); pwd) + +VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats" +HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"\\\"] + clean - remove all existing build artifacts and configuration (start + over) + libcudf - build the cudf C++ code only + cudf - build the cudf Python package + cudfjar - build cudf JAR with static libcudf using devtoolset toolchain + dask_cudf - build the dask_cudf Python package + benchmarks - build benchmarks + tests - build tests + libcudf_kafka - build the libcudf_kafka C++ code only + cudf_kafka - build the cudf_kafka Python package + custreamz - build the custreamz Python package + -v - verbose build mode + -g - build for debug + -n - no install step (does not affect Python) + --allgpuarch - build for all supported GPU architectures + --disable_nvtx - disable inserting NVTX profiling ranges + --opensource_nvcomp - disable use of proprietary nvcomp extensions + --show_depr_warn - show cmake deprecation warnings + --ptds - enable per-thread default stream + --build_metrics - generate build metrics report for libcudf + --incl_cache_stats - include cache statistics in build metrics report + --cmake-args=\\\"\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument) + -h | --h[elp] - print this text + + default action (no args) is to build and install 'libcudf' then 'cudf' + then 'dask_cudf' targets +" +LIB_BUILD_DIR=${LIB_BUILD_DIR:=${REPODIR}/cpp/build} +KAFKA_LIB_BUILD_DIR=${KAFKA_LIB_BUILD_DIR:=${REPODIR}/cpp/libcudf_kafka/build} +CUDF_KAFKA_BUILD_DIR=${REPODIR}/python/cudf_kafka/build +CUDF_BUILD_DIR=${REPODIR}/python/cudf/build +DASK_CUDF_BUILD_DIR=${REPODIR}/python/dask_cudf/build +CUSTREAMZ_BUILD_DIR=${REPODIR}/python/custreamz/build +CUDF_JAR_JAVA_BUILD_DIR="$REPODIR/java/target" + +BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR}" + +# Set defaults for vars modified by flags to this script +VERBOSE_FLAG="" +BUILD_TYPE=Release +INSTALL_TARGET=install +BUILD_BENCHMARKS=OFF +BUILD_ALL_GPU_ARCH=0 +BUILD_NVTX=ON +BUILD_TESTS=OFF +BUILD_DISABLE_DEPRECATION_WARNINGS=ON +BUILD_PER_THREAD_DEFAULT_STREAM=OFF +BUILD_REPORT_METRICS=OFF +BUILD_REPORT_INCL_CACHE_STATS=OFF +USE_PROPRIETARY_NVCOMP=ON + +# Set defaults for vars that may not have been defined externally +# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check +# CONDA_PREFIX, but there is no fallback from there! +INSTALL_PREFIX=${INSTALL_PREFIX:=${PREFIX:=${CONDA_PREFIX}}} +PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)} + +function hasArg { + (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") +} + +function cmakeArgs { + # Check for multiple cmake args options + if [[ $(echo $ARGS | { grep -Eo "\-\-cmake\-args" || true; } | wc -l ) -gt 1 ]]; then + echo "Multiple --cmake-args options were provided, please provide only one: ${ARGS}" + exit 1 + fi + + # Check for cmake args option + if [[ -n $(echo $ARGS | { grep -E "\-\-cmake\-args" || true; } ) ]]; then + # There are possible weird edge cases that may cause this regex filter to output nothing and fail silently + # the true pipe will catch any weird edge cases that may happen and will cause the program to fall back + # on the invalid option error + EXTRA_CMAKE_ARGS=$(echo $ARGS | { grep -Eo "\-\-cmake\-args=\".+\"" || true; }) + if [[ -n ${EXTRA_CMAKE_ARGS} ]]; then + # Remove the full EXTRA_CMAKE_ARGS argument from list of args so that it passes validArgs function + ARGS=${ARGS//$EXTRA_CMAKE_ARGS/} + # Filter the full argument down to just the extra string that will be added to cmake call + EXTRA_CMAKE_ARGS=$(echo $EXTRA_CMAKE_ARGS | grep -Eo "\".+\"" | sed -e 's/^"//' -e 's/"$//') + fi + fi +} + +function buildAll { + ((${NUMARGS} == 0 )) || !(echo " ${ARGS} " | grep -q " [^-]\+ ") +} + +function buildLibCudfJniInDocker { + local cudaVersion="11.5.0" + local imageName="cudf-build:${cudaVersion}-devel-centos7" + local CMAKE_GENERATOR="${CMAKE_GENERATOR:-Ninja}" + local workspaceDir="/rapids" + local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} + local workspaceRepoDir="$workspaceDir/cudf" + local workspaceMavenRepoDir="$workspaceDir/.m2/repository" + local workspaceCcacheDir="$workspaceDir/.ccache" + mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build" + mkdir -p "$HOME/.ccache" "$HOME/.m2" + nvidia-docker build \ + -f java/ci/Dockerfile.centos7 \ + --build-arg CUDA_VERSION=${cudaVersion} \ + -t $imageName . + nvidia-docker run -it -u $(id -u):$(id -g) --rm \ + -e PARALLEL_LEVEL \ + -e CCACHE_DISABLE \ + -e CCACHE_DIR="$workspaceCcacheDir" \ + -v "/etc/group:/etc/group:ro" \ + -v "/etc/passwd:/etc/passwd:ro" \ + -v "/etc/shadow:/etc/shadow:ro" \ + -v "/etc/sudoers.d:/etc/sudoers.d:ro" \ + -v "$HOME/.ccache:$workspaceCcacheDir:rw" \ + -v "$REPODIR:$workspaceRepoDir:rw" \ + -v "$localMavenRepo:$workspaceMavenRepoDir:rw" \ + --workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \ + ${imageName} \ + scl enable devtoolset-9 \ + "cmake $workspaceRepoDir/cpp \ + -G${CMAKE_GENERATOR} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_LINKER_LAUNCHER=ccache \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCUDA_STATIC_RUNTIME=ON \ + -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \ + -DCMAKE_INSTALL_PREFIX=/usr/local/rapids \ + -DUSE_NVTX=ON \ + -DCUDF_USE_PROPRIETARY_NVCOMP=ON \ + -DCUDF_USE_ARROW_STATIC=ON \ + -DCUDF_ENABLE_ARROW_S3=OFF \ + -DBUILD_TESTS=OFF \ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON \ + -DRMM_LOGGING_LEVEL=OFF \ + -DBUILD_SHARED_LIBS=OFF && \ + cmake --build . --parallel ${PARALLEL_LEVEL} && \ + cd $workspaceRepoDir/java && \ + mvn ${MVN_PHASES:-"package"} \ + -Dmaven.repo.local=$workspaceMavenRepoDir \ + -DskipTests=${SKIP_TESTS:-false} \ + -Dparallel.level=${PARALLEL_LEVEL} \ + -Dcmake.ccache.opts='-DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_LINKER_LAUNCHER=ccache' \ + -DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \ + -DCUDA_STATIC_RUNTIME=ON \ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON \ + -DUSE_GDS=ON \ + -DGPU_ARCHS=${CUDF_CMAKE_CUDA_ARCHITECTURES} \ + -DCUDF_JNI_LIBCUDF_STATIC=ON \ + -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest" +} + +if hasArg -h || hasArg --h || hasArg --help; then + echo "${HELP}" + exit 0 +fi + +# Check for valid usage +if (( ${NUMARGS} != 0 )); then + # Check for cmake args + cmakeArgs + for a in ${ARGS}; do + if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then + echo "Invalid option or formatting, check --help: ${a}" + exit 1 + fi + done +fi + +# Process flags +if hasArg -v; then + VERBOSE_FLAG="-v" +fi +if hasArg -g; then + BUILD_TYPE=Debug +fi +if hasArg -n; then + INSTALL_TARGET="" + LIBCUDF_BUILD_DIR=${LIB_BUILD_DIR} +fi +if hasArg --allgpuarch; then + BUILD_ALL_GPU_ARCH=1 +fi +if hasArg benchmarks; then + BUILD_BENCHMARKS=ON +fi +if hasArg tests; then + BUILD_TESTS=ON +fi +if hasArg --disable_nvtx; then + BUILD_NVTX="OFF" +fi +if hasArg --opensource_nvcomp; then + USE_PROPRIETARY_NVCOMP="OFF" +fi +if hasArg --show_depr_warn; then + BUILD_DISABLE_DEPRECATION_WARNINGS=OFF +fi +if hasArg --ptds; then + BUILD_PER_THREAD_DEFAULT_STREAM=ON +fi +if hasArg --build_metrics; then + BUILD_REPORT_METRICS=ON +fi + +if hasArg --incl_cache_stats; then + BUILD_REPORT_INCL_CACHE_STATS=ON +fi + +# Append `-DFIND_CUDF_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. +if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then + EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON" +fi + + +# If clean given, run it prior to any other steps +if hasArg clean; then + # If the dirs to clean are mounted dirs in a container, the + # contents should be removed but the mounted dirs will remain. + # The find removes all contents but leaves the dirs, the rmdir + # attempts to remove the dirs but can fail safely. + for bd in ${BUILD_DIRS}; do + if [ -d ${bd} ]; then + find ${bd} -mindepth 1 -delete + rmdir ${bd} || true + fi + done + + # Cleaning up python artifacts + find ${REPODIR}/python/ | grep -E "(__pycache__|\.pyc|\.pyo|\.so|\_skbuild$)" | xargs rm -rf + +fi + + +################################################################################ +# Configure, build, and install libcudf + +if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then + if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then + CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}" + if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then + echo "Building for the architecture of the GPU in the system..." + else + echo "Building for the GPU architecture(s) $CUDF_CMAKE_CUDA_ARCHITECTURES ..." + fi + else + CUDF_CMAKE_CUDA_ARCHITECTURES="RAPIDS" + echo "Building for *ALL* supported GPU architectures..." + fi +fi + +if buildAll || hasArg libcudf; then + # get the current count before the compile starts + if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then + # zero the sccache statistics + sccache --zero-stats + fi + + cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \ + -DUSE_NVTX=${BUILD_NVTX} \ + -DCUDF_USE_PROPRIETARY_NVCOMP=${USE_PROPRIETARY_NVCOMP} \ + -DBUILD_TESTS=${BUILD_TESTS} \ + -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \ + -DDISABLE_DEPRECATION_WARNINGS=${BUILD_DISABLE_DEPRECATION_WARNINGS} \ + -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + ${EXTRA_CMAKE_ARGS} + + cd ${LIB_BUILD_DIR} + + compile_start=$(date +%s) + cmake --build . -j${PARALLEL_LEVEL} ${VERBOSE_FLAG} + compile_end=$(date +%s) + compile_total=$(( compile_end - compile_start )) + + # Record build times + if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then + echo "Formatting build metrics" + MSG="" + # get some sccache stats after the compile + if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then + COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }') + CACHE_HITS=$(sccache -s | grep "Cache hits \+ [0-9]\+$" | awk '{ print $NF }') + HIT_RATE=$(echo - | awk "{printf \"%.2f\n\", $CACHE_HITS / $COMPILE_REQUESTS * 100}") + MSG="${MSG}
cache hit rate ${HIT_RATE} %" + fi + MSG="${MSG}
parallel setting: $PARALLEL_LEVEL" + MSG="${MSG}
parallel build time: $compile_total seconds" + if [[ -f "${LIB_BUILD_DIR}/libcudf.so" ]]; then + LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}') + MSG="${MSG}
libcudf.so size: $LIBCUDF_FS" + fi + BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"} + echo "Metrics output dir: [$BMR_DIR]" + mkdir -p ${BMR_DIR} + MSG_OUTFILE="$(mktemp)" + echo "$MSG" > "${MSG_OUTFILE}" + python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html + cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log + fi + + if [[ ${INSTALL_TARGET} != "" ]]; then + cmake --build . -j${PARALLEL_LEVEL} --target install ${VERBOSE_FLAG} + fi +fi + +# Build and install the cudf Python package +if buildAll || hasArg cudf; then + + cd ${REPODIR}/python/cudf + SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS}" \ + SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \ + python -m pip install --no-build-isolation --no-deps . +fi + + +# Build and install the dask_cudf Python package +if buildAll || hasArg dask_cudf; then + + cd ${REPODIR}/python/dask_cudf + python -m pip install --no-build-isolation --no-deps . +fi + +if hasArg cudfjar; then + buildLibCudfJniInDocker +fi + +# Build libcudf_kafka library +if hasArg libcudf_kafka; then + cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DBUILD_TESTS=${BUILD_TESTS} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + ${EXTRA_CMAKE_ARGS} + + + cd ${KAFKA_LIB_BUILD_DIR} + cmake --build . -j${PARALLEL_LEVEL} ${VERBOSE_FLAG} + + if [[ ${INSTALL_TARGET} != "" ]]; then + cmake --build . -j${PARALLEL_LEVEL} --target install ${VERBOSE_FLAG} + fi +fi + +# build cudf_kafka Python package +if hasArg cudf_kafka; then + cd ${REPODIR}/python/cudf_kafka + SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \ + SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \ + python -m pip install --no-build-isolation --no-deps . +fi + +# build custreamz Python package +if hasArg custreamz; then + cd ${REPODIR}/python/custreamz + SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \ + SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \ + python -m pip install --no-build-isolation --no-deps . +fi diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh new file mode 100755 index 0000000..8b757fe --- /dev/null +++ b/ci/build_cpp.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +set -euo pipefail + +source rapids-env-update + +export CMAKE_GENERATOR=Ninja + +rapids-print-env + +rapids-logger "Begin cpp build" + +# With boa installed conda build forward to boa +rapids-conda-retry mambabuild \ + conda/recipes/libcudf + +rapids-upload-conda-to-s3 cpp diff --git a/ci/build_docs.sh b/ci/build_docs.sh new file mode 100755 index 0000000..9149b5e --- /dev/null +++ b/ci/build_docs.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create test conda environment" +. /opt/conda/etc/profile.d/conda.sh + +rapids-dependency-file-generator \ + --output conda \ + --file_key docs \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n docs +conda activate docs + +rapids-print-env + +rapids-logger "Downloading artifacts from previous jobs" +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ + libcudf cudf dask-cudf + +export RAPIDS_VERSION_NUMBER="23.10" +export RAPIDS_DOCS_DIR="$(mktemp -d)" + +rapids-logger "Build CPP docs" +pushd cpp/doxygen +aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_NUMBER}/rmm.tag . || echo "Failed to download rmm Doxygen tag" +doxygen Doxyfile +mkdir -p "${RAPIDS_DOCS_DIR}/libcudf/html" +mv html/* "${RAPIDS_DOCS_DIR}/libcudf/html" +popd + +rapids-logger "Build Python docs" +pushd docs/cudf +make dirhtml +make text +mkdir -p "${RAPIDS_DOCS_DIR}/cudf/"{html,txt} +mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/cudf/html" +mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt" +popd + +rapids-logger "Build dask-cuDF Sphinx docs" +pushd docs/dask_cudf +make dirhtml +make text +mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/"{html,txt} +mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html" +mv build/text/* "${RAPIDS_DOCS_DIR}/dask-cudf/txt" +popd + +rapids-upload-docs diff --git a/ci/build_python.sh b/ci/build_python.sh new file mode 100755 index 0000000..61f160b --- /dev/null +++ b/ci/build_python.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +set -euo pipefail + +source rapids-env-update + +export CMAKE_GENERATOR=Ninja + +rapids-print-env + +rapids-logger "Begin py build" + +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) + +# TODO: Remove `--no-test` flag once importing on a CPU +# node works correctly +# With boa installed conda build forwards to the boa builder +rapids-conda-retry mambabuild \ + --no-test \ + --channel "${CPP_CHANNEL}" \ + conda/recipes/cudf + +rapids-conda-retry mambabuild \ + --no-test \ + --channel "${CPP_CHANNEL}" \ + --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ + conda/recipes/dask-cudf + +rapids-conda-retry mambabuild \ + --no-test \ + --channel "${CPP_CHANNEL}" \ + --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ + conda/recipes/cudf_kafka + +rapids-conda-retry mambabuild \ + --no-test \ + --channel "${CPP_CHANNEL}" \ + --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ + conda/recipes/custreamz + + +rapids-upload-conda-to-s3 python diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh new file mode 100755 index 0000000..a1d52c5 --- /dev/null +++ b/ci/build_wheel.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +package_name=$1 +package_dir=$2 + +source rapids-configure-sccache +source rapids-date-string + +# Use gha-tools rapids-pip-wheel-version to generate wheel version then +# update the necessary files +version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})" + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# This is the version of the suffix with a preceding hyphen. It's used +# everywhere except in the final wheel name. +PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}" + +# Patch project metadata files to include the CUDA version suffix and version override. +pyproject_file="${package_dir}/pyproject.toml" + +sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file} +sed -i "s/^name = .*/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file} + +# For nightlies we want to ensure that we're pulling in alphas as well. The +# easiest way to do so is to augment the spec with a constraint containing a +# min alpha version that doesn't affect the version bounds but does allow usage +# of alpha versions for that dependency without --pre +alpha_spec='' +if ! rapids-is-release-build; then + alpha_spec=',>=0.0.0a0' +fi + +if [[ ${package_name} == "dask_cudf" ]]; then + sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file} +else + sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file} + # ptxcompiler and cubinlinker aren't version constrained + sed -r -i "s/ptxcompiler\"/ptxcompiler${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file} + sed -r -i "s/cubinlinker\"/cubinlinker${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file} +fi + +if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then + sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file} + sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file} + sed -i "/ptxcompiler/d" ${pyproject_file} + sed -i "/cubinlinker/d" ${pyproject_file} +fi + +cd "${package_dir}" + +python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh new file mode 100755 index 0000000..1b2285b --- /dev/null +++ b/ci/build_wheel_cudf.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/cudf" + +export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + +# Force a build using the latest version of the code before this PR +CUDF_BUILD_BRANCH=${1:-""} +WHEEL_NAME="cudf" +if [[ "${CUDF_BUILD_BRANCH}" == "main" ]]; then + MAIN_COMMIT=$(git merge-base HEAD origin/branch-23.10-xdf) + git checkout $MAIN_COMMIT + WHEEL_NAME="${WHEEL_NAME}_${CUDF_BUILD_BRANCH}" +fi + +./ci/build_wheel.sh ${WHEEL_NAME} ${package_dir} + +mkdir -p ${package_dir}/final_dist +python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* + + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="${WHEEL_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist diff --git a/ci/build_wheel_dask_cudf.sh b/ci/build_wheel_dask_cudf.sh new file mode 100755 index 0000000..47e35c4 --- /dev/null +++ b/ci/build_wheel_dask_cudf.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/dask_cudf" + +./ci/build_wheel.sh dask_cudf ${package_dir} + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist diff --git a/ci/check_style.sh b/ci/check_style.sh new file mode 100755 index 0000000..e96ad8b --- /dev/null +++ b/ci/check_style.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright (c) 2020-2023, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create checks conda environment" +. /opt/conda/etc/profile.d/conda.sh + +rapids-dependency-file-generator \ + --output conda \ + --file_key checks \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n checks +conda activate checks + +FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.10/cmake-format-rapids-cmake.json +export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json +mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE}) +wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL} + +# Run pre-commit checks +pre-commit run --all-files --show-diff-on-failure diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py new file mode 100644 index 0000000..dd89b09 --- /dev/null +++ b/ci/checks/copyright.py @@ -0,0 +1,277 @@ +# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import datetime +import os +import re +import sys + +import git + +FilesToCheck = [ + re.compile(r"[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx)$"), + re.compile(r"CMakeLists[.]txt$"), + re.compile(r"CMakeLists_standalone[.]txt$"), + re.compile(r"setup[.]cfg$"), + re.compile(r"meta[.]yaml$"), +] +ExemptFiles = [ + re.compile(r"cpp/include/cudf_test/cxxopts.hpp"), +] + +# this will break starting at year 10000, which is probably OK :) +CheckSimple = re.compile( + r"Copyright *(?:\(c\))? *(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" +) +CheckDouble = re.compile( + r"Copyright *(?:\(c\))? *(\d{4})-(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" # noqa: E501 +) + + +def checkThisFile(f): + if isinstance(f, git.Diff): + if f.deleted_file or f.b_blob.size == 0: + return False + f = f.b_path + elif not os.path.exists(f) or os.stat(f).st_size == 0: + # This check covers things like symlinks which point to files that DNE + return False + for exempt in ExemptFiles: + if exempt.search(f): + return False + for checker in FilesToCheck: + if checker.search(f): + return True + return False + + +def modifiedFiles(): + """Get a set of all modified files, as Diff objects. + + The files returned have been modified in git since the merge base of HEAD + and the upstream of the target branch. We return the Diff objects so that + we can read only the staged changes. + """ + repo = git.Repo() + # Use the environment variable TARGET_BRANCH or RAPIDS_BASE_BRANCH (defined in CI) if possible + target_branch = os.environ.get("TARGET_BRANCH", os.environ.get("RAPIDS_BASE_BRANCH")) + if target_branch is None: + # Fall back to the closest branch if not on CI + target_branch = repo.git.describe( + all=True, tags=True, match="branch-*", abbrev=0 + ).lstrip("heads/") + + upstream_target_branch = None + if target_branch in repo.heads: + # Use the tracking branch of the local reference if it exists. This + # returns None if no tracking branch is set. + upstream_target_branch = repo.heads[target_branch].tracking_branch() + if upstream_target_branch is None: + # Fall back to the remote with the newest target_branch. This code + # path is used on CI because the only local branch reference is + # current-pr-branch, and thus target_branch is not in repo.heads. + # This also happens if no tracking branch is defined for the local + # target_branch. We use the remote with the latest commit if + # multiple remotes are defined. + candidate_branches = [ + remote.refs[target_branch] for remote in repo.remotes + if target_branch in remote.refs + ] + if len(candidate_branches) > 0: + upstream_target_branch = sorted( + candidate_branches, + key=lambda branch: branch.commit.committed_datetime, + )[-1] + else: + # If no remotes are defined, try to use the local version of the + # target_branch. If this fails, the repo configuration must be very + # strange and we can fix this script on a case-by-case basis. + upstream_target_branch = repo.heads[target_branch] + merge_base = repo.merge_base("HEAD", upstream_target_branch.commit)[0] + diff = merge_base.diff() + changed_files = {f for f in diff if f.b_path is not None} + return changed_files + + +def getCopyrightYears(line): + res = CheckSimple.search(line) + if res: + return int(res.group(1)), int(res.group(1)) + res = CheckDouble.search(line) + if res: + return int(res.group(1)), int(res.group(2)) + return None, None + + +def replaceCurrentYear(line, start, end): + # first turn a simple regex into double (if applicable). then update years + res = CheckSimple.sub(r"Copyright (c) \1-\1, NVIDIA CORPORATION", line) + res = CheckDouble.sub( + rf"Copyright (c) {start:04d}-{end:04d}, NVIDIA CORPORATION", + res, + ) + return res + + +def checkCopyright(f, update_current_year): + """Checks for copyright headers and their years.""" + errs = [] + thisYear = datetime.datetime.now().year + lineNum = 0 + crFound = False + yearMatched = False + + if isinstance(f, git.Diff): + path = f.b_path + lines = f.b_blob.data_stream.read().decode().splitlines(keepends=True) + else: + path = f + with open(f, encoding="utf-8") as fp: + lines = fp.readlines() + + for line in lines: + lineNum += 1 + start, end = getCopyrightYears(line) + if start is None: + continue + crFound = True + if start > end: + e = [ + path, + lineNum, + "First year after second year in the copyright " + "header (manual fix required)", + None, + ] + errs.append(e) + elif thisYear < start or thisYear > end: + e = [ + path, + lineNum, + "Current year not included in the copyright header", + None, + ] + if thisYear < start: + e[-1] = replaceCurrentYear(line, thisYear, end) + if thisYear > end: + e[-1] = replaceCurrentYear(line, start, thisYear) + errs.append(e) + else: + yearMatched = True + # copyright header itself not found + if not crFound: + e = [ + path, + 0, + "Copyright header missing or formatted incorrectly " + "(manual fix required)", + None, + ] + errs.append(e) + # even if the year matches a copyright header, make the check pass + if yearMatched: + errs = [] + + if update_current_year: + errs_update = [x for x in errs if x[-1] is not None] + if len(errs_update) > 0: + lines_changed = ", ".join(str(x[1]) for x in errs_update) + print(f"File: {path}. Changing line(s) {lines_changed}") + for _, lineNum, __, replacement in errs_update: + lines[lineNum - 1] = replacement + with open(path, "w", encoding="utf-8") as out_file: + out_file.writelines(lines) + + return errs + + +def getAllFilesUnderDir(root, pathFilter=None): + retList = [] + for dirpath, dirnames, filenames in os.walk(root): + for fn in filenames: + filePath = os.path.join(dirpath, fn) + if pathFilter(filePath): + retList.append(filePath) + return retList + + +def checkCopyright_main(): + """ + Checks for copyright headers in all the modified files. In case of local + repo, this script will just look for uncommitted files and in case of CI + it compares between branches "$PR_TARGET_BRANCH" and "current-pr-branch" + """ + retVal = 0 + + argparser = argparse.ArgumentParser( + "Checks for a consistent copyright header in git's modified files" + ) + argparser.add_argument( + "--update-current-year", + dest="update_current_year", + action="store_true", + required=False, + help="If set, " + "update the current year if a header is already " + "present and well formatted.", + ) + argparser.add_argument( + "--git-modified-only", + dest="git_modified_only", + action="store_true", + required=False, + help="If set, " + "only files seen as modified by git will be " + "processed.", + ) + + args, dirs = argparser.parse_known_args() + + if args.git_modified_only: + files = [f for f in modifiedFiles() if checkThisFile(f)] + else: + files = [] + for d in [os.path.abspath(d) for d in dirs]: + if not os.path.isdir(d): + raise ValueError(f"{d} is not a directory.") + files += getAllFilesUnderDir(d, pathFilter=checkThisFile) + + errors = [] + for f in files: + errors += checkCopyright(f, args.update_current_year) + + if len(errors) > 0: + if any(e[-1] is None for e in errors): + print("Copyright headers incomplete in some of the files!") + for e in errors: + print(" %s:%d Issue: %s" % (e[0], e[1], e[2])) + print("") + n_fixable = sum(1 for e in errors if e[-1] is not None) + path_parts = os.path.abspath(__file__).split(os.sep) + file_from_repo = os.sep.join(path_parts[path_parts.index("ci") :]) + if n_fixable > 0 and not args.update_current_year: + print( + f"You can run `python {file_from_repo} --git-modified-only " + "--update-current-year` and stage the results in git to " + f"fix {n_fixable} of these errors.\n" + ) + retVal = 1 + + return retVal + + +if __name__ == "__main__": + sys.exit(checkCopyright_main()) diff --git a/ci/checks/doxygen.sh b/ci/checks/doxygen.sh new file mode 100755 index 0000000..d932fa0 --- /dev/null +++ b/ci/checks/doxygen.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +############################### +# cuDF doxygen warnings check # +############################### + +# skip if doxygen is not installed +if ! [ -x "$(command -v doxygen)" ]; then + echo -e "warning: doxygen is not installed" + exit 0 +fi + +# Utility to return version as number for comparison +function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; } + +# doxygen supported version 1.9.1 +DOXYGEN_VERSION=`doxygen --version` +if [ ! $(version "$DOXYGEN_VERSION") -eq $(version "1.9.1") ] ; then + echo -e "warning: Unsupported doxygen version $DOXYGEN_VERSION" + echo -e "Expecting doxygen version 1.9.1" + exit 0 +fi + +# Run doxygen, ignore missing tag files error +TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..." +TAG_ERROR2="error: cannot open tag file .*.tag for writing" +DOXYGEN_STDERR=`cd cpp/doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; } | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d"` +RETVAL=$? + +if [ "$RETVAL" != "0" ] || [ ! -z "$DOXYGEN_STDERR" ]; then + echo -e "$DOXYGEN_STDERR" + RETVAL=1 #because return value is not generated by doxygen 1.8.20 +fi + +exit $RETVAL diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh new file mode 100755 index 0000000..37adabd --- /dev/null +++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Download the summarized results of running the Pandas tests on both the main +# branch and the PR branch: + +# Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir +MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json +PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.pr-results.json +aws s3 cp $MAIN_ARTIFACT main-results.json +aws s3 cp $PR_ARTIFACT pr-results.json + +# Compute the diff and prepare job summary: +python -m pip install pandas tabulate +python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY" + +COMMENT=$(head -1 summary.txt) + +echo "$COMMENT" + +# Magic name that the custom-job.yaml workflow reads and re-exports +echo "job_output=${COMMENT}" >> "${GITHUB_OUTPUT}" diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py new file mode 100644 index 0000000..1e83e51 --- /dev/null +++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import sys + +import pandas as pd + + +def get_total_and_passed(results): + total_failed = 0 + total_errored = 0 + total_passed = 0 + for module_name, row in results.items(): + total_failed += row.get("failed", 0) + total_errored += row.get("errored", 0) + total_passed += row.get("passed", 0) + total_tests = total_failed + total_errored + total_passed + return total_tests, total_passed + + +main_json = sys.argv[1] +pr_json = sys.argv[2] + +# read the results of summarize-test-results.py --summary +with open(main_json) as f: + main_results = json.load(f) +main_total, main_passed = get_total_and_passed(main_results) + +with open(pr_json) as f: + pr_results = json.load(f) +pr_total, pr_passed = get_total_and_passed(pr_results) + +passing_percentage = pr_passed / pr_total * 100 +pass_rate_change = abs(pr_passed - main_passed) / main_passed * 100 +rate_change_type = "a decrease" if pr_passed < main_passed else "an increase" + +comment = ( + "Merging this PR would result in " + f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) " + "Pandas tests passing, " + f"{rate_change_type} in the test pass rate by " + f"{pass_rate_change:.2f}%. " + f"Trunk stats: {main_passed}/{main_total}." +) + + +def emoji_passed(x): + if x > 0: + return f"{x}✅" + elif x < 0: + return f"{x}❌" + else: + return f"{x}" + + +def emoji_failed(x): + if x > 0: + return f"{x}❌" + elif x < 0: + return f"{x}✅" + else: + return f"{x}" + + +# convert pr_results to a pandas DataFrame and then a markdown table +pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index() +main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index() +diff_df = pr_df - main_df + +pr_df = pr_df[["total", "passed", "failed", "skipped"]] +diff_df = diff_df[["total", "passed", "failed", "skipped"]] +diff_df.columns = diff_df.columns + "_diff" +diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed) +diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed) +diff_df["skipped_diff"] = diff_df["skipped_diff"].map(emoji_failed) + +df = pd.concat([pr_df, diff_df], axis=1) +df = df.rename_axis("Test module") + +df = df.rename( + columns={ + "total": "Total tests", + "passed": "Passed tests", + "failed": "Failed tests", + "skipped": "Skipped tests", + "total_diff": "Total delta", + "passed_diff": "Passed delta", + "failed_diff": "Failed delta", + "skipped_diff": "Skipped delta", + } +) +df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False) + +print(comment) +print() +print("Here are the results of running the Pandas tests against this PR:") +print() +print(df.to_markdown()) diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh new file mode 100755 index 0000000..920625b --- /dev/null +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +PANDAS_TESTS_BRANCH=${1} + +rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch" +rapids-logger "PR number: $RAPIDS_REF_NAME" + + +COMMIT=$(git rev-parse HEAD) +WHEEL_NAME="cudf" +if [[ "${PANDAS_TESTS_BRANCH}" == "main" ]]; then + COMMIT=$(git merge-base HEAD origin/branch-23.10-xdf) + WHEEL_NAME="${WHEEL_NAME}_${PANDAS_TESTS_BRANCH}" +fi + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="${WHEEL_NAME}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep +python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas_tests] + +git checkout $COMMIT + +bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \ + -n 10 \ + --tb=line \ + --skip-slow \ + --max-worker-restart=3 \ + --import-mode=importlib \ + --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1 + +# summarize the results and save them to artifacts: +python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${PANDAS_TESTS_BRANCH}-results.json +RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} +mkdir -p "${RAPIDS_ARTIFACTS_DIR}" +mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/ diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh new file mode 100755 index 0000000..cc578b5 --- /dev/null +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -eoxu pipefail + +# Function to display script usage +function display_usage { + echo "Usage: $0 [--no-cudf]" +} + +# Default value for the --no-cudf option +no_cudf=false + +# Parse command-line arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --no-cudf) + no_cudf=true + shift + ;; + *) + echo "Error: Unknown option $1" + display_usage + exit 1 + ;; + esac +done + +if [ "$no_cudf" = true ]; then + echo "Skipping cudf install" +else + RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep + python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf_pandas_tests] +fi + +python -m pytest -p cudf.pandas ./python/cudf/cudf_pandas_tests/ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh new file mode 100755 index 0000000..eac64fe --- /dev/null +++ b/ci/release/update-version.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# Copyright (c) 2020-2023, NVIDIA CORPORATION. +######################## +# cuDF Version Updater # +######################## + +## Usage +# bash update-version.sh + + +# Format is YY.MM.PP - no leading 'v' or trailing 'a' +NEXT_FULL_TAG=$1 + +# Get current version +CURRENT_TAG=$(git tag --merged HEAD | grep -xE '^v.*' | sort --version-sort | tail -n 1 | tr -d 'v') +CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}') +CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') +CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') +CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} + +#Get . for next version +NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') +NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') +NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}') +NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} +NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*" + +# Need to distutils-normalize the versions for some use cases +CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") +PATCH_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_PATCH}'))") +echo "current is ${CURRENT_SHORT_TAG_PEP440}, next is ${NEXT_SHORT_TAG_PEP440}" + +echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" + +# Inplace sed replace; workaround for Linux and Mac +function sed_runner() { + sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak +} + +# cpp update +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt + +# Python CMakeLists updates +sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt + +# cpp libcudf_kafka update +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt + +# cpp cudf_jni update +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt + +# Python __init__.py updates +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf/cudf/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/dask_cudf/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py + +# Python pyproject.toml updates +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/custreamz/pyproject.toml + +# Wheel testing script +sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh + +# rapids-cmake version +sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake + +# cmake-format rapids-cmake definitions +sed_runner 's/'"branch-.*\/cmake-format-rapids-cmake.json"'/'"branch-${NEXT_SHORT_TAG}\/cmake-format-rapids-cmake.json"'/g' ci/check_style.sh + +# doxyfile update +sed_runner 's/PROJECT_NUMBER = .*/PROJECT_NUMBER = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile + +# sphinx docs update +sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cudf/source/conf.py +sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cudf/source/conf.py +sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/dask_cudf/source/conf.py +sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/dask_cudf/source/conf.py + +DEPENDENCIES=( + cudf + cudf_kafka + custreamz + dask-cuda + dask-cudf + kvikio + libkvikio + librmm + rmm +) +for DEP in "${DEPENDENCIES[@]}"; do + for FILE in dependencies.yaml conda/environments/*.yaml; do + sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" ${FILE} + done + for FILE in python/*/pyproject.toml; do + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE} + done +done + +# Doxyfile update +sed_runner "s|\(TAGFILES.*librmm/\).*|\1${NEXT_SHORT_TAG}|" cpp/doxygen/Doxyfile + +# README.md update +sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md +sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md + +# Libcudf examples update +sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt +sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt + +# CI files +for FILE in .github/workflows/*.yaml; do + sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" + sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE}; +done +sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh + +# Java files +NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT" +sed_runner "s|.*-SNAPSHOT|${NEXT_FULL_JAVA_TAG}|g" java/pom.xml +sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" java/ci/README.md +sed_runner "s/cudf-.*-SNAPSHOT/cudf-${NEXT_FULL_JAVA_TAG}/g" java/ci/README.md diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh new file mode 100755 index 0000000..30172b7 --- /dev/null +++ b/ci/test_cpp.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +source "$(dirname "$0")/test_cpp_common.sh" + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +# Run libcudf and libcudf_kafka gtests from libcudf-tests package +export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/ + +pushd $CONDA_PREFIX/bin/gtests/libcudf/ +rapids-logger "Run libcudf gtests" +ctest -j20 --output-on-failure +SUITEERROR=$? +popd + +if (( ${SUITEERROR} == 0 )); then + pushd $CONDA_PREFIX/bin/gtests/libcudf_kafka/ + rapids-logger "Run libcudf_kafka gtests" + ctest -j20 --output-on-failure + SUITEERROR=$? + popd +fi + +# Ensure that benchmarks are runnable +pushd $CONDA_PREFIX/bin/benchmarks/libcudf/ +rapids-logger "Run tests of libcudf benchmarks" + +if (( ${SUITEERROR} == 0 )); then + # Run a small Google benchmark + ./MERGE_BENCH --benchmark_filter=/2/ + SUITEERROR=$? +fi + +if (( ${SUITEERROR} == 0 )); then + # Run a small nvbench benchmark + ./STRINGS_NVBENCH --run-once --benchmark 0 --devices 0 + SUITEERROR=$? +fi +popd + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh new file mode 100644 index 0000000..c7c095d --- /dev/null +++ b/ci/test_cpp_common.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +set -euo pipefail + +. /opt/conda/etc/profile.d/conda.sh + +rapids-logger "Generate C++ testing dependencies" +rapids-dependency-file-generator \ + --output conda \ + --file_key test_cpp \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n test + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate test +set -u + +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/ +mkdir -p "${RAPIDS_TESTS_DIR}" + +rapids-print-env + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + libcudf libcudf_kafka libcudf-tests + +rapids-logger "Check GPU usage" +nvidia-smi diff --git a/ci/test_cpp_memcheck.sh b/ci/test_cpp_memcheck.sh new file mode 100755 index 0000000..0e85268 --- /dev/null +++ b/ci/test_cpp_memcheck.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +source "$(dirname "$0")/test_cpp_common.sh" + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +# Run gtests with compute-sanitizer +rapids-logger "Memcheck gtests with rmm_mode=cuda" +export GTEST_CUDF_RMM_MODE=cuda +COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck" +for gt in "$CONDA_PREFIX"/bin/gtests/libcudf/*_TEST ; do + test_name=$(basename ${gt}) + if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then + continue + fi + echo "Running compute-sanitizer on $test_name" + ${COMPUTE_SANITIZER_CMD} ${gt} --gtest_output=xml:"${RAPIDS_TESTS_DIR}${test_name}.xml" +done +unset GTEST_CUDF_RMM_MODE + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_java.sh b/ci/test_java.sh new file mode 100755 index 0000000..e4df625 --- /dev/null +++ b/ci/test_java.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +set -euo pipefail + +. /opt/conda/etc/profile.d/conda.sh + +rapids-logger "Generate Java testing dependencies" +rapids-dependency-file-generator \ + --output conda \ + --file_key test_java \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n test + +export CMAKE_GENERATOR=Ninja + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate test +set -u + +rapids-print-env + +rapids-logger "Downloading artifacts from previous jobs" +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + libcudf + +rapids-logger "Check GPU usage" +nvidia-smi + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +rapids-logger "Run Java tests" +pushd java +mvn test -B -DCUDF_JNI_ENABLE_PROFILING=OFF +popd + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh new file mode 100755 index 0000000..348428f --- /dev/null +++ b/ci/test_notebooks.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Copyright (c) 2020-2023, NVIDIA CORPORATION. + +set -euo pipefail + +. /opt/conda/etc/profile.d/conda.sh + +rapids-logger "Generate notebook testing dependencies" +rapids-dependency-file-generator \ + --output conda \ + --file_key test_notebooks \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n test + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate test +set -u + +rapids-print-env + +rapids-logger "Downloading artifacts from previous jobs" +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ + cudf libcudf + +NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")" +pushd notebooks + +# Add notebooks that should be skipped here +# (space-separated list of filenames without paths) +SKIPNBS="performance-comparisons.ipynb" + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e +for nb in $(find . -name "*.ipynb"); do + nbBasename=$(basename ${nb}) + # Skip all notebooks that use dask (in the code or even in their name) + if ((echo ${nb} | grep -qi dask) || \ + (grep -q dask ${nb})); then + echo "--------------------------------------------------------------------------------" + echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)" + echo "--------------------------------------------------------------------------------" + elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then + echo "--------------------------------------------------------------------------------" + echo "SKIPPING: ${nb} (listed in skip list)" + echo "--------------------------------------------------------------------------------" + else + nvidia-smi + ${NBTEST} ${nbBasename} + fi +done + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh new file mode 100755 index 0000000..0e922c1 --- /dev/null +++ b/ci/test_python_common.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +# Common setup steps shared by Python test jobs + +set -euo pipefail + +. /opt/conda/etc/profile.d/conda.sh + +rapids-logger "Generate Python testing dependencies" +rapids-dependency-file-generator \ + --output conda \ + --file_key test_python \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + +rapids-mamba-retry env create --force -f env.yaml -n test + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate test +set -u + +rapids-logger "Downloading artifacts from previous jobs" +CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) + +RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} +RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} +mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" + +rapids-print-env + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ + cudf libcudf diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh new file mode 100755 index 0000000..bb33d84 --- /dev/null +++ b/ci/test_python_cudf.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +# Common setup steps shared by Python test jobs +source "$(dirname "$0")/test_python_common.sh" + +rapids-logger "Check GPU usage" +nvidia-smi + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +rapids-logger "pytest cudf" +pushd python/cudf/cudf +# It is essential to cd into python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level. +pytest \ + --cache-clear \ + --ignore="benchmarks" \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf.xml" \ + --numprocesses=8 \ + --dist=loadscope \ + --cov-config=../.coveragerc \ + --cov=cudf \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-coverage.xml" \ + --cov-report=term \ + tests +popd + +# Run benchmarks with both cudf and pandas to ensure compatibility is maintained. +# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used. +# Therefore, these runs only verify that benchmarks are valid. +# They do not generate meaningful performance measurements. +pushd python/cudf +rapids-logger "pytest for cudf benchmarks" +CUDF_BENCHMARKS_DEBUG_ONLY=ON \ +pytest \ + --cache-clear \ + --numprocesses=8 \ + --dist=loadscope \ + --cov-config=.coveragerc \ + --cov=cudf \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-coverage.xml" \ + --cov-report=term \ + benchmarks + +rapids-logger "pytest for cudf benchmarks using pandas" +CUDF_BENCHMARKS_USE_PANDAS=ON \ +CUDF_BENCHMARKS_DEBUG_ONLY=ON \ +pytest \ + --cache-clear \ + --numprocesses=8 \ + --dist=loadscope \ + --cov-config=.coveragerc \ + --cov=cudf \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-benchmark-pandas-coverage.xml" \ + --cov-report=term \ + benchmarks +popd + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh new file mode 100755 index 0000000..25c1d68 --- /dev/null +++ b/ci/test_python_other.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +# Common setup steps shared by Python test jobs +source "$(dirname "$0")/test_python_common.sh" + +rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ + dask-cudf cudf_kafka custreamz + +rapids-logger "Check GPU usage" +nvidia-smi + +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + +rapids-logger "pytest dask_cudf" +pushd python/dask_cudf/dask_cudf +pytest \ + --cache-clear \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \ + --numprocesses=8 \ + --dist=loadscope \ + --cov-config=../.coveragerc \ + --cov=dask_cudf \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \ + --cov-report=term \ + . +popd + +rapids-logger "pytest custreamz" +pushd python/custreamz/custreamz +pytest \ + --cache-clear \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \ + --numprocesses=8 \ + --dist=loadscope \ + --cov-config=../.coveragerc \ + --cov=custreamz \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \ + --cov-report=term \ + tests +popd + +rapids-logger "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh new file mode 100755 index 0000000..83e24ab --- /dev/null +++ b/ci/test_wheel_cudf.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -eou pipefail + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/cudf*.whl)[test] + +# Run smoke tests for aarch64 pull requests +if [[ "$(arch)" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then + python ./ci/wheel_smoke_test_cudf.py +else + python -m pytest -n 8 ./python/cudf/cudf/tests +fi diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh new file mode 100755 index 0000000..a0a6fbe --- /dev/null +++ b/ci/test_wheel_dask_cudf.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +set -eou pipefail + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# Download the cudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep +python -m pip install --no-deps ./local-cudf-dep/cudf*.whl + +# Always install latest dask for testing +python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/dask_cudf*.whl)[test] + +python -m pytest -n 8 ./python/dask_cudf/dask_cudf/ diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh new file mode 100755 index 0000000..2a94e2d --- /dev/null +++ b/ci/utils/nbtest.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + +MAGIC_OVERRIDE_CODE=" +def my_run_line_magic(*args, **kwargs): + g=globals() + l={} + for a in args: + try: + exec(str(a),g,l) + except Exception as e: + print('WARNING: %s\n While executing this magic function code:\n%s\n continuing...\n' % (e, a)) + else: + g.update(l) + +def my_run_cell_magic(*args, **kwargs): + my_run_line_magic(*args, **kwargs) + +get_ipython().run_line_magic=my_run_line_magic +get_ipython().run_cell_magic=my_run_cell_magic + +" + +NO_COLORS=--colors=NoColor +EXITCODE=0 +NBTMPDIR="$WORKSPACE/tmp" +mkdir -p ${NBTMPDIR} + +for nb in $*; do + NBFILENAME=$1 + NBNAME=${NBFILENAME%.*} + NBNAME=${NBNAME##*/} + NBTESTSCRIPT=${NBTMPDIR}/${NBNAME}-test.py + shift + + echo -------------------------------------------------------------------------------- + echo STARTING: ${NBNAME} + echo -------------------------------------------------------------------------------- + jupyter nbconvert --to script ${NBFILENAME} --output ${NBTMPDIR}/${NBNAME}-test + echo "${MAGIC_OVERRIDE_CODE}" > ${NBTMPDIR}/tmpfile + cat ${NBTESTSCRIPT} >> ${NBTMPDIR}/tmpfile + mv ${NBTMPDIR}/tmpfile ${NBTESTSCRIPT} + + echo "Running \"ipython ${NO_COLORS} ${NBTESTSCRIPT}\" on $(date)" + echo + time bash -c "ipython ${NO_COLORS} ${NBTESTSCRIPT}; EC=\$?; echo -------------------------------------------------------------------------------- ; echo DONE: ${NBNAME}; exit \$EC" + NBEXITCODE=$? + echo EXIT CODE: ${NBEXITCODE} + echo + EXITCODE=$((EXITCODE | ${NBEXITCODE})) +done + +exit ${EXITCODE} diff --git a/ci/utils/nbtestlog2junitxml.py b/ci/utils/nbtestlog2junitxml.py new file mode 100644 index 0000000..14384af --- /dev/null +++ b/ci/utils/nbtestlog2junitxml.py @@ -0,0 +1,163 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Generate a junit-xml file from parsing a nbtest log + +import re +from xml.etree.ElementTree import Element, ElementTree +from os import path +import string +from enum import Enum + + +startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$") +skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$") +exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$") +folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$") +timePatt = re.compile(r"^real\s+([\d\.ms]+)$") +linePatt = re.compile("^" + ("-" * 80) + "$") + + +def getFileBaseName(filePathName): + return path.splitext(path.basename(filePathName))[0] + + +def makeTestCaseElement(attrDict): + return Element("testcase", attrib=attrDict) + + +def makeSystemOutElement(outputLines): + e = Element("system-out") + e.text = "".join(filter(lambda c: c in string.printable, outputLines)) + return e + + +def makeFailureElement(outputLines): + e = Element("failure", message="failed") + e.text = "".join(filter(lambda c: c in string.printable, outputLines)) + return e + + +def setFileNameAttr(attrDict, fileName): + attrDict.update(file=fileName, + classname="", + line="", + name="", + time="" + ) + +def setClassNameAttr(attrDict, className): + attrDict["classname"] = className + + +def setTestNameAttr(attrDict, testName): + attrDict["name"] = testName + + +def setTimeAttr(attrDict, timeVal): + (mins, seconds) = timeVal.split("m") + seconds = float(seconds.strip("s")) + (60 * int(mins)) + attrDict["time"] = str(seconds) + + +def incrNumAttr(element, attr): + newVal = int(element.attrib.get(attr)) + 1 + element.attrib[attr] = str(newVal) + + +def parseLog(logFile, testSuiteElement): + # Example attrs: + # errors="0" failures="0" hostname="a437d6835edf" name="pytest" skipped="2" tests="6" time="6.174" timestamp="2019-11-18T19:49:47.946307" + + with open(logFile) as lf: + testSuiteElement.attrib["tests"] = "0" + testSuiteElement.attrib["errors"] = "0" + testSuiteElement.attrib["failures"] = "0" + testSuiteElement.attrib["skipped"] = "0" + testSuiteElement.attrib["time"] = "0" + testSuiteElement.attrib["timestamp"] = "" + + attrDict = {} + #setFileNameAttr(attrDict, logFile) + setFileNameAttr(attrDict, "nbtest") + + parserStateEnum = Enum("parserStateEnum", + "newTest startingLine finishLine exitCode") + parserState = parserStateEnum.newTest + + testOutput = "" + + for line in lf.readlines(): + if parserState == parserStateEnum.newTest: + m = folderPatt.match(line) + if m: + setClassNameAttr(attrDict, m.group(1)) + continue + + m = skippingPatt.match(line) + if m: + setTestNameAttr(attrDict, getFileBaseName(m.group(1))) + setTimeAttr(attrDict, "0m0s") + skippedElement = makeTestCaseElement(attrDict) + message = m.group(3) or "" + skippedElement.append(Element("skipped", message=message, type="")) + testSuiteElement.append(skippedElement) + incrNumAttr(testSuiteElement, "skipped") + incrNumAttr(testSuiteElement, "tests") + continue + + m = startingPatt.match(line) + if m: + parserState = parserStateEnum.startingLine + testOutput = "" + setTestNameAttr(attrDict, m.group(1)) + setTimeAttr(attrDict, "0m0s") + continue + + continue + + elif parserState == parserStateEnum.startingLine: + if linePatt.match(line): + parserState = parserStateEnum.finishLine + testOutput = "" + continue + + elif parserState == parserStateEnum.finishLine: + if linePatt.match(line): + parserState = parserStateEnum.exitCode + else: + testOutput += line + continue + + elif parserState == parserStateEnum.exitCode: + m = exitCodePatt.match(line) + if m: + testCaseElement = makeTestCaseElement(attrDict) + if m.group(1) != "0": + failureElement = makeFailureElement(testOutput) + testCaseElement.append(failureElement) + incrNumAttr(testSuiteElement, "failures") + else: + systemOutElement = makeSystemOutElement(testOutput) + testCaseElement.append(systemOutElement) + + testSuiteElement.append(testCaseElement) + parserState = parserStateEnum.newTest + testOutput = "" + incrNumAttr(testSuiteElement, "tests") + continue + + m = timePatt.match(line) + if m: + setTimeAttr(attrDict, m.group(1)) + continue + + continue + + +if __name__ == "__main__": + import sys + + testSuitesElement = Element("testsuites") + testSuiteElement = Element("testsuite", name="nbtest", hostname="") + parseLog(sys.argv[1], testSuiteElement) + testSuitesElement.append(testSuiteElement) + ElementTree(testSuitesElement).write(sys.argv[1]+".xml", xml_declaration=True) diff --git a/ci/wheel_smoke_test_cudf.py b/ci/wheel_smoke_test_cudf.py new file mode 100644 index 0000000..a11a970 --- /dev/null +++ b/ci/wheel_smoke_test_cudf.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +import cudf +import pyarrow as pa + +if __name__ == '__main__': + n_legs = pa.array([2, 4, 5, 100]) + animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]) + names = ["n_legs", "animals"] + foo = pa.table([n_legs, animals], names=names) + df = cudf.DataFrame.from_arrow(foo) + assert df.loc[df["animals"] == "Centipede"]["n_legs"].iloc[0] == 100 + assert df.loc[df["animals"] == "Flamingo"]["n_legs"].iloc[0] == 2 diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..344d4f3 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,11 @@ +#Configuration File for CodeCov +coverage: + status: + project: off + patch: + default: + target: auto + threshold: 5% + +github_checks: + annotations: true diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml new file mode 100644 index 0000000..27a3a84 --- /dev/null +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -0,0 +1,103 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- pytorch +- conda-forge +- nvidia +dependencies: +- aiobotocore>=2.2.0 +- benchmark==1.8.0 +- boto3>=1.21.21 +- botocore>=1.24.21 +- c-compiler +- cachetools +- cmake>=3.26.4 +- cramjam +- cubinlinker +- cuda-nvtx=11.8 +- cuda-python>=11.7.1,<12.0a0 +- cuda-sanitizer-api=11.8.86 +- cuda-version=11.8 +- cudatoolkit +- cupy>=12.0.0 +- cxx-compiler +- cython>=3.0.0 +- dask-core==2023.9.2 +- dask-cuda==23.10.* +- dask==2023.9.2 +- distributed==2023.9.2 +- dlpack>=0.5,<0.6.0a0 +- doxygen=1.9.1 +- fastavro>=0.22.9 +- fmt>=9.1.0,<10 +- fsspec>=0.6.0 +- gcc_linux-64=11.* +- gmock>=1.13.0 +- gtest>=1.13.0 +- hypothesis +- identify>=2.5.20 +- ipython +- libarrow==12.0.1.* +- libcufile-dev=1.4.0.31 +- libcufile=1.4.0.31 +- libcurand-dev=10.3.0.86 +- libcurand=10.3.0.86 +- libkvikio==23.10.* +- librdkafka>=1.9.0,<1.10.0a0 +- librmm==23.10.* +- make +- mimesis>=4.1.0 +- moto>=4.0.8 +- msgpack-python +- myst-nb +- nbsphinx +- ninja +- notebook +- numba>=0.57,<0.58 +- numpy>=1.21,<1.25 +- numpydoc +- nvcc_linux-64=11.8 +- nvcomp==2.6.1 +- nvtx>=0.2.1 +- packaging +- pandas>=1.3,<1.6.0dev0 +- pandoc +- pip +- pre-commit +- protobuf>=4.21,<5 +- ptxcompiler +- pyarrow==12.0.1.* +- pydata-sphinx-theme +- pyorc +- pytest +- pytest-benchmark +- pytest-cases +- pytest-cov +- pytest-xdist +- python-confluent-kafka>=1.9.0,<1.10.0a0 +- python-snappy>=0.6.0 +- python>=3.9,<3.11 +- pytorch<1.12.0 +- rich +- rmm==23.10.* +- s3fs>=2022.3.0 +- scikit-build>=0.13.1 +- scipy +- spdlog>=1.11.0,<1.12 +- sphinx +- sphinx-autobuild +- sphinx-copybutton +- sphinx-markdown-tables +- sphinxcontrib-websupport +- streamz +- sysroot_linux-64==2.17 +- tokenizers==0.13.1 +- transformers==4.24.0 +- typing_extensions>=4.0.0 +- zlib>=1.2.13 +- pip: + - git+https://github.com/python-streamz/streamz.git@master +name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml new file mode 100644 index 0000000..eb229f1 --- /dev/null +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -0,0 +1,100 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- pytorch +- conda-forge +- nvidia +dependencies: +- aiobotocore>=2.2.0 +- benchmark==1.8.0 +- boto3>=1.21.21 +- botocore>=1.24.21 +- c-compiler +- cachetools +- cmake>=3.26.4 +- cramjam +- cuda-cudart-dev +- cuda-nvcc +- cuda-nvrtc-dev +- cuda-nvtx-dev +- cuda-python>=12.0,<13.0a0 +- cuda-sanitizer-api +- cuda-version=12.0 +- cupy>=12.0.0 +- cxx-compiler +- cython>=3.0.0 +- dask-core==2023.9.2 +- dask-cuda==23.10.* +- dask==2023.9.2 +- distributed==2023.9.2 +- dlpack>=0.5,<0.6.0a0 +- doxygen=1.9.1 +- fastavro>=0.22.9 +- fmt>=9.1.0,<10 +- fsspec>=0.6.0 +- gcc_linux-64=11.* +- gmock>=1.13.0 +- gtest>=1.13.0 +- hypothesis +- identify>=2.5.20 +- ipython +- libarrow==12.0.1.* +- libcufile-dev +- libcurand-dev +- libkvikio==23.10.* +- librdkafka>=1.9.0,<1.10.0a0 +- librmm==23.10.* +- make +- mimesis>=4.1.0 +- moto>=4.0.8 +- msgpack-python +- myst-nb +- nbsphinx +- ninja +- notebook +- numba>=0.57,<0.58 +- numpy>=1.21,<1.25 +- numpydoc +- nvcomp==2.6.1 +- nvtx>=0.2.1 +- packaging +- pandas>=1.3,<1.6.0dev0 +- pandoc +- pip +- pre-commit +- protobuf>=4.21,<5 +- pyarrow==12.0.1.* +- pydata-sphinx-theme +- pyorc +- pytest +- pytest-benchmark +- pytest-cases +- pytest-cov +- pytest-xdist +- python-confluent-kafka>=1.9.0,<1.10.0a0 +- python-snappy>=0.6.0 +- python>=3.9,<3.11 +- pytorch<1.12.0 +- rich +- rmm==23.10.* +- s3fs>=2022.3.0 +- scikit-build>=0.13.1 +- scipy +- spdlog>=1.11.0,<1.12 +- sphinx +- sphinx-autobuild +- sphinx-copybutton +- sphinx-markdown-tables +- sphinxcontrib-websupport +- streamz +- sysroot_linux-64==2.17 +- tokenizers==0.13.1 +- transformers==4.24.0 +- typing_extensions>=4.0.0 +- zlib>=1.2.13 +- pip: + - git+https://github.com/python-streamz/streamz.git@master +name: all_cuda-120_arch-x86_64 diff --git a/conda/recipes/cudf/build.sh b/conda/recipes/cudf/build.sh new file mode 100644 index 0000000..43d0464 --- /dev/null +++ b/conda/recipes/cudf/build.sh @@ -0,0 +1,4 @@ +# Copyright (c) 2018-2022, NVIDIA CORPORATION. + +# This assumes the script is executed from the root of the repo directory +./build.sh cudf diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml new file mode 100644 index 0000000..c98c270 --- /dev/null +++ b/conda/recipes/cudf/conda_build_config.yaml @@ -0,0 +1,17 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +sysroot_version: + - "2.17" + +cmake_version: + - ">=3.26.4" + +cuda_compiler: + - cuda-nvcc + +cuda11_compiler: + - nvcc diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml new file mode 100644 index 0000000..d3e15f7 --- /dev/null +++ b/conda/recipes/cudf/meta.yaml @@ -0,0 +1,119 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: cudf + version: {{ version }} + +source: + git_url: ../../.. + +build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=cudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS + ignore_run_exports: + # libcudf's run_exports pinning is looser than we would like + - libcudf + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} + +requirements: + build: + - cmake {{ cmake_version }} + - ninja + - {{ compiler('c') }} + - {{ compiler('cxx') }} + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} + {% endif %} + - cuda-version ={{ cuda_version }} + - sysroot_{{ target_platform }} {{ sysroot_version }} + host: + - protobuf ==4.21.* + - python + - cython >=3.0.0 + - scikit-build >=0.13.1 + - setuptools + - dlpack >=0.5,<0.6.0a0 + - pyarrow =12 + - libcudf ={{ version }} + - rmm ={{ minor_version }} + {% if cuda_major == "11" %} + - cudatoolkit + {% else %} + - cuda-cudart-dev + - cuda-nvrtc + - libcufile-dev # [linux64] + {% endif %} + - cuda-version ={{ cuda_version }} + run: + - {{ pin_compatible('protobuf', min_pin='x.x', max_pin='x') }} + - python + - typing_extensions >=4.0.0 + - pandas >=1.3,<1.6.0dev0 + - cupy >=12.0.0 + # TODO: Pin to numba<0.58 until #14160 is resolved + - numba >=0.57,<0.58 + # TODO: Pin to numpy<1.25 until cudf requires pandas 2 + - numpy >=1.21,<1.25 + - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} + - libcudf ={{ version }} + - {{ pin_compatible('rmm', max_pin='x.x') }} + - fsspec >=0.6.0 + {% if cuda_major == "11" %} + - cudatoolkit + - ptxcompiler >=0.7.0 + - cubinlinker # CUDA enhanced compatibility. + - cuda-python >=11.7.1,<12.0a0 + {% else %} + # Needed by Numba for CUDA support + - cuda-nvcc-impl + # TODO: Add nvjitlink here + # xref: https://github.com/rapidsai/cudf/issues/12822 + - cuda-nvrtc + - cuda-python >=12.0,<13.0a0 + {% endif %} + - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} + - nvtx >=0.2.1 + - packaging + - cachetools + - rich + +test: + requires: + - cuda-version ={{ cuda_version }} + imports: + - cudf + +about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: cuDF GPU DataFrame core library diff --git a/conda/recipes/cudf_kafka/build.sh b/conda/recipes/cudf_kafka/build.sh new file mode 100644 index 0000000..f4bb6e1 --- /dev/null +++ b/conda/recipes/cudf_kafka/build.sh @@ -0,0 +1,16 @@ +# Copyright (c) 2020-2023, NVIDIA CORPORATION. + +# This assumes the script is executed from the root of the repo directory +# Need to set CUDA_HOME inside conda environments because the hacked together +# setup.py for cudf-kafka searches that way. +# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates +# cudf_kafka to use scikit-build +CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*} +if [[ ${CUDA_MAJOR} == "12" ]]; then + target_name="x86_64-linux" + if [[ ! $(arch) == "x86_64" ]]; then + target_name="sbsa-linux" + fi + export CUDA_HOME="${PREFIX}/targets/${target_name}/" +fi +./build.sh -v cudf_kafka diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml new file mode 100644 index 0000000..b63a136 --- /dev/null +++ b/conda/recipes/cudf_kafka/conda_build_config.yaml @@ -0,0 +1,11 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +sysroot_version: + - "2.17" + +cmake_version: + - ">=3.26.4" diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml new file mode 100644 index 0000000..a79c23b --- /dev/null +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -0,0 +1,79 @@ +# Copyright (c) 2020-2023, NVIDIA CORPORATION. + +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: cudf_kafka + version: {{ version }} + +source: + git_url: ../../.. + +build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=cudf-kafka-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + - RAPIDS_CUDA_VERSION + +requirements: + build: + - cmake {{ cmake_version }} + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - ninja + - sysroot_{{ target_platform }} {{ sysroot_version }} + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + {% if cuda_major == "12" %} + - cuda-gdb + {% endif %} + host: + - python + - cython >=3.0.0 + - cuda-version ={{ cuda_version }} + - cudf ={{ version }} + - libcudf_kafka ={{ version }} + - setuptools + {% if cuda_major == "12" %} + - cuda-cudart-dev + {% endif %} + run: + - python + - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} + - libcudf_kafka ={{ version }} + - cudf ={{ version }} + +test: + requires: + - cuda-version ={{ cuda_version }} + imports: + - cudf_kafka + +about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: libcudf_kafka library diff --git a/conda/recipes/custreamz/build.sh b/conda/recipes/custreamz/build.sh new file mode 100644 index 0000000..88fccf9 --- /dev/null +++ b/conda/recipes/custreamz/build.sh @@ -0,0 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + +# This assumes the script is executed from the root of the repo directory +./build.sh -v custreamz diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml new file mode 100644 index 0000000..233d51b --- /dev/null +++ b/conda/recipes/custreamz/meta.yaml @@ -0,0 +1,65 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: custreamz + version: {{ version }} + +source: + git_url: ../../.. + +build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=custreamz-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=custreamz-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS + +requirements: + host: + - python + - python-confluent-kafka >=1.9.0,<1.10.0a0 + - cudf_kafka ={{ version }} + - cuda-version ={{ cuda_version }} + run: + - python + - streamz + - cudf ={{ version }} + - cudf_kafka ={{ version }} + - dask ==2023.9.2 + - dask-core ==2023.9.2 + - distributed ==2023.9.2 + - python-confluent-kafka >=1.9.0,<1.10.0a0 + - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} + +test: + requires: + - cuda-version ={{ cuda_version }} + imports: + - custreamz + +about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: cuStreamz library diff --git a/conda/recipes/dask-cudf/build.sh b/conda/recipes/dask-cudf/build.sh new file mode 100644 index 0000000..473f52c --- /dev/null +++ b/conda/recipes/dask-cudf/build.sh @@ -0,0 +1,4 @@ +# Copyright (c) 2018-2019, NVIDIA CORPORATION. + +# This assumes the script is executed from the root of the repo directory +./build.sh dask_cudf diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml new file mode 100644 index 0000000..4c8af07 --- /dev/null +++ b/conda/recipes/dask-cudf/meta.yaml @@ -0,0 +1,65 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: dask-cudf + version: {{ version }} + +source: + git_url: ../../.. + +build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=dask-cudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=dask-cudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS + +requirements: + host: + - python + - cudf ={{ version }} + - dask ==2023.9.2 + - dask-core ==2023.9.2 + - distributed ==2023.9.2 + - cuda-version ={{ cuda_version }} + run: + - python + - cudf ={{ version }} + - dask ==2023.9.2 + - dask-core ==2023.9.2 + - distributed ==2023.9.2 + - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} + +test: + requires: + - cuda-version ={{ cuda_version }} + imports: + - dask_cudf + + +about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: dask-cudf library diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh new file mode 100644 index 0000000..c79c014 --- /dev/null +++ b/conda/recipes/dask-cudf/run_test.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2020-2023, NVIDIA CORPORATION. + +set -e + +# Logger function for build status output +function logger() { + echo -e "\n>>>> $@\n" +} + +# Importing cudf on arm64 CPU only nodes is currently not working due to a +# difference in reported gpu devices between arm64 and amd64 +ARCH=$(arch) + +if [ "${ARCH}" = "aarch64" ]; then + logger "Skipping tests on arm64" + exit 0 +fi + +# Dask & Distributed option to install main(nightly) or `conda-forge` packages. +export INSTALL_DASK_MAIN=0 + +# Dask version to install when `INSTALL_DASK_MAIN=0` +export DASK_STABLE_VERSION="2023.9.2" + +# Install the conda-forge or nightly version of dask and distributed +if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then + rapids-logger "rapids-mamba-retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'" + rapids-mamba-retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed" +else + rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall" + rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall +fi + +logger "python -c 'import dask_cudf'" +python -c "import dask_cudf" diff --git a/conda/recipes/libcudf/build.sh b/conda/recipes/libcudf/build.sh new file mode 100644 index 0000000..47047f4 --- /dev/null +++ b/conda/recipes/libcudf/build.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +export cudf_ROOT="$(realpath ./cpp/build)" + +./build.sh -n -v \ + libcudf libcudf_kafka benchmarks tests \ + --build_metrics --incl_cache_stats \ + --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib -DCUDF_ENABLE_ARROW_S3=ON -DNVBench_ENABLE_CUPTI=OFF\" diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml new file mode 100644 index 0000000..25b3f19 --- /dev/null +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -0,0 +1,59 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +cuda_compiler: + - cuda-nvcc + +cuda11_compiler: + - nvcc + +sysroot_version: + - "2.17" + +cmake_version: + - ">=3.26.4" + +gbench_version: + - "==1.8.0" + +gtest_version: + - ">=1.13.0" + +libarrow_version: + - "=12" + +dlpack_version: + - ">=0.5,<0.6.0a0" + +librdkafka_version: + - ">=1.9.0,<1.10.0a0" + +fmt_version: + - ">=9.1.0,<10" + +spdlog_version: + - ">=1.11.0,<1.12" + +nvcomp_version: + - "=2.6.1" + +zlib_version: + - ">=1.2.13" +# The CTK libraries below are missing from the conda-forge::cudatoolkit package +# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages +# and the "*_run_*" version specifiers correspond to `11.x` packages. + +cuda11_libcufile_host_version: + - "1.4.0.31" + +cuda11_libcufile_run_version: + - ">=1.0.0.82,<=1.4.0.31" + +cuda11_libcurand_host_version: + - "=10.3.0.86" + +cuda11_libcurand_run_version: + - ">=10.2.5.43,<10.3.1" diff --git a/conda/recipes/libcudf/install_libcudf.sh b/conda/recipes/libcudf/install_libcudf.sh new file mode 100644 index 0000000..173f8cf --- /dev/null +++ b/conda/recipes/libcudf/install_libcudf.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Copyright (c) 2018-2022, NVIDIA CORPORATION. + +cmake --install cpp/build diff --git a/conda/recipes/libcudf/install_libcudf_example.sh b/conda/recipes/libcudf/install_libcudf_example.sh new file mode 100644 index 0000000..e249688 --- /dev/null +++ b/conda/recipes/libcudf/install_libcudf_example.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Copyright (c) 2018-2022, NVIDIA CORPORATION. + +./cpp/examples/build.sh diff --git a/conda/recipes/libcudf/install_libcudf_kafka.sh b/conda/recipes/libcudf/install_libcudf_kafka.sh new file mode 100644 index 0000000..9eae251 --- /dev/null +++ b/conda/recipes/libcudf/install_libcudf_kafka.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Copyright (c) 2018-2022, NVIDIA CORPORATION. + +cmake --install cpp/libcudf_kafka/build diff --git a/conda/recipes/libcudf/install_libcudf_tests.sh b/conda/recipes/libcudf/install_libcudf_tests.sh new file mode 100644 index 0000000..069462e --- /dev/null +++ b/conda/recipes/libcudf/install_libcudf_tests.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Copyright (c) 2018-2022, NVIDIA CORPORATION. + +cmake --install cpp/build --component testing +cmake --install cpp/libcudf_kafka/build --component testing diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml new file mode 100644 index 0000000..6270658 --- /dev/null +++ b/conda/recipes/libcudf/meta.yaml @@ -0,0 +1,227 @@ +# Copyright (c) 2018-2023, NVIDIA CORPORATION. + +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0 +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: libcudf-split + +source: + git_url: ../../.. + +build: + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - RAPIDS_ARTIFACTS_DIR + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS + +requirements: + build: + - cmake {{ cmake_version }} + - {{ compiler('c') }} + - {{ compiler('cxx') }} + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} + {% endif %} + - cuda-version ={{ cuda_version }} + - ninja + - sysroot_{{ target_platform }} {{ sysroot_version }} + host: + - librmm ={{ minor_version }} + - libkvikio ={{ minor_version }} + {% if cuda_major == "11" %} + - cudatoolkit + - libcufile {{ cuda11_libcufile_host_version }} # [linux64] + - libcufile-dev {{ cuda11_libcufile_host_version }} # [linux64] + - libcurand {{ cuda11_libcurand_host_version }} + - libcurand-dev {{ cuda11_libcurand_host_version }} + - cuda-nvrtc ={{ cuda_version }} + - cuda-nvrtc-dev ={{ cuda_version }} + - cuda-nvtx ={{ cuda_version }} + {% else %} + - cuda-nvrtc-dev + - cuda-nvtx-dev + - libcufile-dev # [linux64] + - libcurand-dev + {% endif %} + - cuda-version ={{ cuda_version }} + - nvcomp {{ nvcomp_version }} + - libarrow {{ libarrow_version }} + - dlpack {{ dlpack_version }} + - librdkafka {{ librdkafka_version }} + - fmt {{ fmt_version }} + - spdlog {{ spdlog_version }} + - benchmark {{ gbench_version }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} + - zlib {{ zlib_version }} + +outputs: + - name: libcudf + version: {{ version }} + script: install_libcudf.sh + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + run_exports: + - {{ pin_subpackage("libcudf", max_pin="x.x") }} + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} + requirements: + build: + - cmake {{ cmake_version }} + run: + {% if cuda_major == "11" %} + - cudatoolkit + - libcufile {{ cuda11_libcufile_run_version }} # [linux64] + {% else %} + - cuda-nvrtc + - libcufile # [linux64] + {% endif %} + - cuda-version {{ cuda_spec }} + - nvcomp {{ nvcomp_version }} + - librmm ={{ minor_version }} + - libkvikio ={{ minor_version }} + - libarrow {{ libarrow_version }} + - dlpack {{ dlpack_version }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} + test: + commands: + - test -f $PREFIX/lib/libcudf.so + - test -f $PREFIX/include/cudf/column/column.hpp + about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: libcudf library + - name: libcudf_kafka + version: {{ version }} + script: install_libcudf_kafka.sh + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} + requirements: + build: + - cmake {{ cmake_version }} + host: + - librdkafka {{ librdkafka_version }} + - {{ pin_subpackage('libcudf', exact=True) }} + run: + - librdkafka {{ librdkafka_version }} + - {{ pin_subpackage('libcudf', exact=True) }} + test: + commands: + - test -f $PREFIX/lib/libcudf_kafka.so + about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: libcudf_kafka library + - name: libcudf-example + version: {{ version }} + script: install_libcudf_example.sh + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} + requirements: + build: + - cmake {{ cmake_version }} + - {{ compiler('c') }} + - {{ compiler('cxx') }} + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} + {% endif %} + - cuda-version ={{ cuda_version }} + - ninja + - sysroot_{{ target_platform }} {{ sysroot_version }} + host: + - {{ pin_subpackage('libcudf', exact=True) }} + {% if cuda_major == "11" %} + - cuda-nvtx ={{ cuda_version }} + {% else %} + - cuda-nvtx-dev + {% endif %} + - cuda-version ={{ cuda_version }} + run: + - {{ pin_subpackage('libcudf', exact=True) }} + about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: libcudf_example library + - name: libcudf-tests + version: {{ version }} + script: install_libcudf_tests.sh + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} + requirements: + build: + - cmake {{ cmake_version }} + host: + - {{ pin_subpackage('libcudf', exact=True) }} + - {{ pin_subpackage('libcudf_kafka', exact=True) }} + - cuda-version {{ cuda_spec }} + {% if cuda_major == "11" %} + - libcurand {{ cuda11_libcurand_run_version }} + {% else %} + - libcurand-dev + {% endif %} + - benchmark {{ gbench_version }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} + run: + - {{ pin_subpackage('libcudf', exact=True) }} + - {{ pin_subpackage('libcudf_kafka', exact=True) }} + - cuda-version {{ cuda_spec }} + {% if cuda_major == "11" %} + - libcurand {{ cuda11_libcurand_run_version }} + {% endif %} + - benchmark {{ gbench_version }} + - gtest {{ gtest_version }} + - gmock {{ gtest_version }} + about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: libcudf test & benchmark executables diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy new file mode 100644 index 0000000..d766d98 --- /dev/null +++ b/cpp/.clang-tidy @@ -0,0 +1,27 @@ +--- +Checks: + 'modernize-*, + -modernize-use-equals-default, + -modernize-concat-nested-namespaces, + -modernize-use-trailing-return-type' + + # -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly) + # -modernize-concat-nested-namespaces # auto-fix is broken (can delete code) + # -modernize-use-trailing-return-type # just a preference + +WarningsAsErrors: '' +HeaderFilterRegex: '' +AnalyzeTemporaryDtors: false +FormatStyle: none +CheckOptions: + - key: modernize-loop-convert.MaxCopySize + value: '16' + - key: modernize-loop-convert.MinConfidence + value: reasonable + - key: modernize-pass-by-value.IncludeStyle + value: llvm + - key: modernize-replace-auto-ptr.IncludeStyle + value: llvm + - key: modernize-use-nullptr.NullMacros + value: 'NULL' +... diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt new file mode 100644 index 0000000..ec58c39 --- /dev/null +++ b/cpp/CMakeLists.txt @@ -0,0 +1,1099 @@ +# ============================================================================= +# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../fetch_rapids.cmake) +include(rapids-cmake) +include(rapids-cpm) +include(rapids-cuda) +include(rapids-export) +include(rapids-find) + +rapids_cuda_init_architectures(CUDF) + +project( + CUDF + VERSION 23.10.00 + LANGUAGES C CXX CUDA +) +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5) + message( + FATAL_ERROR + "libcudf requires CUDA Toolkit 11.5+ to compile (nvcc ${CMAKE_CUDA_COMPILER_VERSION} provided)" + ) +endif() + +# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to +# have different values for the `Threads::Threads` target. Setting this flag ensures +# `Threads::Threads` is the same value in first run and subsequent runs. +set(THREADS_PREFER_PTHREAD_FLAG ON) + +# ################################################################################################## +# * build options --------------------------------------------------------------------------------- + +option(USE_NVTX "Build with NVTX support" ON) +option(BUILD_TESTS "Configure CMake to build tests" ON) +option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF) +option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) +option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) +option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON) +mark_as_advanced(CUDF_BUILD_TESTUTIL) +option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON) +option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) +option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) +option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF) +option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF) +option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" OFF) +option( + CUDF_USE_PER_THREAD_DEFAULT_STREAM + "Build cuDF with per-thread default stream, including passing the per-thread default + stream to external libraries." + OFF +) +# Option to add all symbols to the dynamic symbol table in the library file, allowing to retrieve +# human-readable stacktrace for debugging. +option( + CUDF_BUILD_STACKTRACE_DEBUG + "Replace the current optimization flags by the options '-rdynamic -Og -NDEBUG', useful for debugging with stacktrace retrieval" + OFF +) +option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated declarations." OFF) +# Option to enable line info in CUDA device compilation to allow introspection when profiling / +# memchecking +option(CUDA_ENABLE_LINEINFO + "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF +) +option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) +# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking +option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) + +set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) +if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) + set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF) +endif() +option( + CUDF_BUILD_STREAMS_TEST_UTIL + "Whether to build the utilities for stream testing contained in libcudf" + ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL} +) +mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) + +option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) +mark_as_advanced(USE_LIBARROW_FROM_PYARROW) + +message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") +message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") +message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") +message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}") +message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}") +message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}") +message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}") +message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}") +message( + VERBOSE + "CUDF: Replace the current optimization flags by the options '-rdynamic -Og' (useful for debugging with stacktrace retrieval): ${CUDF_BUILD_STACKTRACE_DEBUG}" +) +message( + VERBOSE + "CUDF: Disable warnings generated from deprecated declarations: ${DISABLE_DEPRECATION_WARNINGS}" +) +message( + VERBOSE + "CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler): ${CUDA_ENABLE_LINEINFO}" +) +message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") + +# Set a default build type if none was specified +rapids_cmake_build_type("Release") +set(CUDF_BUILD_TESTS ${BUILD_TESTS}) +set(CUDF_BUILD_BENCHMARKS ${BUILD_BENCHMARKS}) +if(BUILD_TESTS AND NOT CUDF_BUILD_TESTUTIL) + message( + FATAL_ERROR + "Tests cannot be built without building cudf test utils. Please set CUDF_BUILD_TESTUTIL=ON or BUILD_TESTS=OFF" + ) +endif() + +if(CUDF_BUILD_STACKTRACE_DEBUG AND NOT CMAKE_COMPILER_IS_GNUCXX) + message(FATAL_ERROR "CUDF_BUILD_STACKTRACE_DEBUG is only supported with GCC compiler") +endif() + +set(CUDF_CXX_FLAGS "") +set(CUDF_CUDA_FLAGS "") +set(CUDF_CXX_DEFINITIONS "") +set(CUDF_CUDA_DEFINITIONS "") + +# Set logging level +set(LIBCUDF_LOGGING_LEVEL + "INFO" + CACHE STRING "Choose the logging level." +) +set_property( + CACHE LIBCUDF_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" + "OFF" +) +message(VERBOSE "CUDF: LIBCUDF_LOGGING_LEVEL = '${LIBCUDF_LOGGING_LEVEL}'.") + +if(NOT CUDF_GENERATED_INCLUDE_DIR) + set(CUDF_GENERATED_INCLUDE_DIR ${CUDF_BINARY_DIR}) +endif() + +# ################################################################################################## +# * conda environment ----------------------------------------------------------------------------- +rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) + +# ################################################################################################## +# * compiler options ------------------------------------------------------------------------------ +rapids_find_package( + CUDAToolkit REQUIRED + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports +) +include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags + +# ################################################################################################## +# * dependencies ---------------------------------------------------------------------------------- + +# find zlib +rapids_find_package(ZLIB REQUIRED) + +if(CUDF_BUILD_TESTUTIL) + # find Threads (needed by cudftestutil) + rapids_find_package( + Threads REQUIRED + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports + ) +endif() + +# add third party dependencies using CPM +rapids_cpm_init() +# find jitify +include(cmake/thirdparty/get_jitify.cmake) +# find nvCOMP +include(cmake/thirdparty/get_nvcomp.cmake) +# find thrust/cub +include(cmake/thirdparty/get_thrust.cmake) +# find rmm +include(cmake/thirdparty/get_rmm.cmake) +# find arrow +include(cmake/thirdparty/get_arrow.cmake) +# find dlpack +include(cmake/thirdparty/get_dlpack.cmake) +# find libcu++ +include(cmake/thirdparty/get_libcudacxx.cmake) +# find cuCollections Should come after including thrust and libcudacxx +include(cmake/thirdparty/get_cucollections.cmake) +# find or install GoogleTest +if(CUDF_BUILD_TESTUTIL) + include(cmake/thirdparty/get_gtest.cmake) +endif() +# preprocess jitify-able kernels +include(cmake/Modules/JitifyPreprocessKernels.cmake) +# find cuFile +include(cmake/thirdparty/get_cufile.cmake) +# find KvikIO +include(cmake/thirdparty/get_kvikio.cmake) +# find fmt +include(cmake/thirdparty/get_fmt.cmake) +# find spdlog +include(cmake/thirdparty/get_spdlog.cmake) + +# Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved +if(NOT BUILD_SHARED_LIBS) + include("${rapids-cmake-dir}/export/find_package_file.cmake") + list(APPEND METADATA_KINDS BUILD INSTALL) + list(APPEND dependencies KvikIO ZLIB nvcomp) + if(TARGET cufile::cuFile_interface) + list(APPEND dependencies cuFile) + endif() + + foreach(METADATA_KIND IN LISTS METADATA_KINDS) + foreach(dep IN LISTS dependencies) + rapids_export_package(${METADATA_KIND} ${dep} cudf-exports) + endforeach() + endforeach() + + if(TARGET conda_env) + install(TARGETS conda_env EXPORT cudf-exports) + endif() +endif() + +# ################################################################################################## +# * library targets ------------------------------------------------------------------------------- + +add_library( + cudf + src/aggregation/aggregation.cpp + src/aggregation/aggregation.cu + src/aggregation/result_cache.cpp + src/ast/expression_parser.cpp + src/ast/expressions.cpp + src/binaryop/binaryop.cpp + src/binaryop/compiled/ATan2.cu + src/binaryop/compiled/Add.cu + src/binaryop/compiled/BitwiseAnd.cu + src/binaryop/compiled/BitwiseOr.cu + src/binaryop/compiled/BitwiseXor.cu + src/binaryop/compiled/Div.cu + src/binaryop/compiled/FloorDiv.cu + src/binaryop/compiled/Greater.cu + src/binaryop/compiled/GreaterEqual.cu + src/binaryop/compiled/IntPow.cu + src/binaryop/compiled/Less.cu + src/binaryop/compiled/LessEqual.cu + src/binaryop/compiled/LogBase.cu + src/binaryop/compiled/LogicalAnd.cu + src/binaryop/compiled/LogicalOr.cu + src/binaryop/compiled/Mod.cu + src/binaryop/compiled/Mul.cu + src/binaryop/compiled/NullEquals.cu + src/binaryop/compiled/NullLogicalAnd.cu + src/binaryop/compiled/NullLogicalOr.cu + src/binaryop/compiled/NullMax.cu + src/binaryop/compiled/NullMin.cu + src/binaryop/compiled/PMod.cu + src/binaryop/compiled/Pow.cu + src/binaryop/compiled/PyMod.cu + src/binaryop/compiled/ShiftLeft.cu + src/binaryop/compiled/ShiftRight.cu + src/binaryop/compiled/ShiftRightUnsigned.cu + src/binaryop/compiled/Sub.cu + src/binaryop/compiled/TrueDiv.cu + src/binaryop/compiled/binary_ops.cu + src/binaryop/compiled/equality_ops.cu + src/binaryop/compiled/util.cpp + src/labeling/label_bins.cu + src/bitmask/null_mask.cu + src/bitmask/is_element_valid.cpp + src/column/column.cu + src/column/column_device_view.cu + src/column/column_factories.cpp + src/column/column_factories.cu + src/column/column_view.cpp + src/copying/concatenate.cu + src/copying/contiguous_split.cu + src/copying/copy.cpp + src/copying/copy.cu + src/copying/copy_range.cu + src/copying/gather.cu + src/copying/get_element.cu + src/copying/pack.cpp + src/copying/purge_nonempty_nulls.cu + src/copying/reverse.cu + src/copying/sample.cu + src/copying/scatter.cu + src/copying/shift.cu + src/copying/slice.cu + src/copying/split.cpp + src/copying/segmented_shift.cu + src/datetime/datetime_ops.cu + src/dictionary/add_keys.cu + src/dictionary/decode.cu + src/dictionary/detail/concatenate.cu + src/dictionary/detail/merge.cu + src/dictionary/dictionary_column_view.cpp + src/dictionary/dictionary_factories.cu + src/dictionary/encode.cu + src/dictionary/remove_keys.cu + src/dictionary/replace.cu + src/dictionary/search.cu + src/dictionary/set_keys.cu + src/filling/calendrical_month_sequence.cu + src/filling/fill.cu + src/filling/repeat.cu + src/filling/sequence.cu + src/groupby/groupby.cu + src/groupby/hash/groupby.cu + src/groupby/sort/aggregate.cpp + src/groupby/sort/group_argmax.cu + src/groupby/sort/group_argmin.cu + src/groupby/sort/group_collect.cu + src/groupby/sort/group_correlation.cu + src/groupby/sort/group_count.cu + src/groupby/sort/group_histogram.cu + src/groupby/sort/group_m2.cu + src/groupby/sort/group_max.cu + src/groupby/sort/group_min.cu + src/groupby/sort/group_merge_lists.cu + src/groupby/sort/group_merge_m2.cu + src/groupby/sort/group_nth_element.cu + src/groupby/sort/group_nunique.cu + src/groupby/sort/group_product.cu + src/groupby/sort/group_quantiles.cu + src/groupby/sort/group_std.cu + src/groupby/sort/group_sum.cu + src/groupby/sort/scan.cpp + src/groupby/sort/group_count_scan.cu + src/groupby/sort/group_max_scan.cu + src/groupby/sort/group_min_scan.cu + src/groupby/sort/group_rank_scan.cu + src/groupby/sort/group_replace_nulls.cu + src/groupby/sort/group_sum_scan.cu + src/groupby/sort/sort_helper.cu + src/hash/hashing.cu + src/hash/md5_hash.cu + src/hash/murmurhash3_x86_32.cu + src/hash/murmurhash3_x64_128.cu + src/hash/spark_murmurhash3_x86_32.cu + src/hash/xxhash_64.cu + src/interop/dlpack.cpp + src/interop/from_arrow.cu + src/interop/to_arrow.cu + src/interop/detail/arrow_allocator.cpp + src/io/avro/avro.cpp + src/io/avro/avro_gpu.cu + src/io/avro/reader_impl.cu + src/io/comp/brotli_dict.cpp + src/io/comp/cpu_unbz2.cpp + src/io/comp/debrotli.cu + src/io/comp/gpuinflate.cu + src/io/comp/nvcomp_adapter.cpp + src/io/comp/nvcomp_adapter.cu + src/io/comp/snap.cu + src/io/comp/statistics.cu + src/io/comp/uncomp.cpp + src/io/comp/unsnap.cu + src/io/csv/csv_gpu.cu + src/io/csv/durations.cu + src/io/csv/reader_impl.cu + src/io/csv/writer_impl.cu + src/io/functions.cpp + src/io/json/byte_range_info.cu + src/io/json/json_column.cu + src/io/json/json_tree.cu + src/io/json/nested_json_gpu.cu + src/io/json/read_json.cu + src/io/json/legacy/json_gpu.cu + src/io/json/legacy/reader_impl.cu + src/io/json/write_json.cu + src/io/orc/aggregate_orc_metadata.cpp + src/io/orc/dict_enc.cu + src/io/orc/orc.cpp + src/io/orc/reader_impl.cu + src/io/orc/stats_enc.cu + src/io/orc/stripe_data.cu + src/io/orc/stripe_enc.cu + src/io/orc/stripe_init.cu + src/datetime/timezone.cpp + src/io/orc/writer_impl.cu + src/io/parquet/compact_protocol_reader.cpp + src/io/parquet/compact_protocol_writer.cpp + src/io/parquet/decode_preprocess.cu + src/io/parquet/page_data.cu + src/io/parquet/chunk_dict.cu + src/io/parquet/page_enc.cu + src/io/parquet/page_hdr.cu + src/io/parquet/page_delta_decode.cu + src/io/parquet/page_string_decode.cu + src/io/parquet/predicate_pushdown.cpp + src/io/parquet/reader.cpp + src/io/parquet/reader_impl.cpp + src/io/parquet/reader_impl_helpers.cpp + src/io/parquet/reader_impl_preprocess.cu + src/io/parquet/writer_impl.cu + src/io/statistics/orc_column_statistics.cu + src/io/statistics/parquet_column_statistics.cu + src/io/text/byte_range_info.cpp + src/io/text/data_chunk_source_factories.cpp + src/io/text/bgzip_data_chunk_source.cu + src/io/text/bgzip_utils.cpp + src/io/text/multibyte_split.cu + src/io/utilities/arrow_io_source.cpp + src/io/utilities/column_buffer.cpp + src/io/utilities/config_utils.cpp + src/io/utilities/data_casting.cu + src/io/utilities/data_sink.cpp + src/io/utilities/datasource.cpp + src/io/utilities/file_io_utilities.cpp + src/io/utilities/parsing_utils.cu + src/io/utilities/row_selection.cpp + src/io/utilities/type_inference.cu + src/io/utilities/trie.cu + src/jit/cache.cpp + src/jit/parser.cpp + src/jit/util.cpp + src/join/conditional_join.cu + src/join/cross_join.cu + src/join/hash_join.cu + src/join/join.cu + src/join/join_utils.cu + src/join/mixed_join.cu + src/join/mixed_join_kernel.cu + src/join/mixed_join_kernel_nulls.cu + src/join/mixed_join_kernels_semi.cu + src/join/mixed_join_semi.cu + src/join/mixed_join_size_kernel.cu + src/join/mixed_join_size_kernel_nulls.cu + src/join/mixed_join_size_kernels_semi.cu + src/join/semi_join.cu + src/lists/contains.cu + src/lists/combine/concatenate_list_elements.cu + src/lists/combine/concatenate_rows.cu + src/lists/copying/concatenate.cu + src/lists/copying/copying.cu + src/lists/copying/gather.cu + src/lists/copying/segmented_gather.cu + src/lists/copying/scatter_helper.cu + src/lists/count_elements.cu + src/lists/dremel.cu + src/lists/explode.cu + src/lists/extract.cu + src/lists/interleave_columns.cu + src/lists/lists_column_factories.cu + src/lists/lists_column_view.cu + src/lists/reverse.cu + src/lists/segmented_sort.cu + src/lists/sequences.cu + src/lists/set_operations.cu + src/lists/stream_compaction/apply_boolean_mask.cu + src/lists/stream_compaction/distinct.cu + src/lists/utilities.cu + src/merge/merge.cu + src/partitioning/partitioning.cu + src/partitioning/round_robin.cu + src/quantiles/tdigest/tdigest.cu + src/quantiles/tdigest/tdigest_aggregation.cu + src/quantiles/tdigest/tdigest_column_view.cpp + src/quantiles/quantile.cu + src/quantiles/quantiles.cu + src/reductions/all.cu + src/reductions/any.cu + src/reductions/collect_ops.cu + src/reductions/histogram.cu + src/reductions/max.cu + src/reductions/mean.cu + src/reductions/min.cu + src/reductions/minmax.cu + src/reductions/nth_element.cu + src/reductions/product.cu + src/reductions/reductions.cpp + src/reductions/scan/rank_scan.cu + src/reductions/scan/scan.cpp + src/reductions/scan/scan_exclusive.cu + src/reductions/scan/scan_inclusive.cu + src/reductions/segmented/all.cu + src/reductions/segmented/any.cu + src/reductions/segmented/counts.cu + src/reductions/segmented/max.cu + src/reductions/segmented/mean.cu + src/reductions/segmented/min.cu + src/reductions/segmented/nunique.cu + src/reductions/segmented/product.cu + src/reductions/segmented/reductions.cpp + src/reductions/segmented/std.cu + src/reductions/segmented/sum.cu + src/reductions/segmented/sum_of_squares.cu + src/reductions/segmented/update_validity.cu + src/reductions/segmented/var.cu + src/reductions/std.cu + src/reductions/sum.cu + src/reductions/sum_of_squares.cu + src/reductions/var.cu + src/replace/clamp.cu + src/replace/nans.cu + src/replace/nulls.cu + src/replace/replace.cu + src/reshape/byte_cast.cu + src/reshape/interleave_columns.cu + src/reshape/tile.cu + src/rolling/detail/optimized_unbounded_window.cpp + src/rolling/detail/rolling_collect_list.cu + src/rolling/detail/rolling_fixed_window.cu + src/rolling/detail/rolling_variable_window.cu + src/rolling/grouped_rolling.cu + src/rolling/range_window_bounds.cpp + src/rolling/rolling.cu + src/round/round.cu + src/scalar/scalar.cpp + src/scalar/scalar_factories.cpp + src/search/contains_column.cu + src/search/contains_scalar.cu + src/search/contains_table.cu + src/search/search_ordered.cu + src/sort/is_sorted.cu + src/sort/rank.cu + src/sort/segmented_sort.cu + src/sort/sort_column.cu + src/sort/sort.cu + src/sort/stable_segmented_sort.cu + src/sort/stable_sort_column.cu + src/sort/stable_sort.cu + src/stream_compaction/apply_boolean_mask.cu + src/stream_compaction/distinct.cu + src/stream_compaction/distinct_count.cu + src/stream_compaction/distinct_helpers.cu + src/stream_compaction/drop_nans.cu + src/stream_compaction/drop_nulls.cu + src/stream_compaction/stable_distinct.cu + src/stream_compaction/unique.cu + src/stream_compaction/unique_count.cu + src/stream_compaction/unique_count_column.cu + src/strings/attributes.cu + src/strings/capitalize.cu + src/strings/case.cu + src/strings/char_types/char_cases.cu + src/strings/char_types/char_types.cu + src/strings/combine/concatenate.cu + src/strings/combine/join.cu + src/strings/combine/join_list_elements.cu + src/strings/contains.cu + src/strings/convert/convert_booleans.cu + src/strings/convert/convert_datetime.cu + src/strings/convert/convert_durations.cu + src/strings/convert/convert_fixed_point.cu + src/strings/convert/convert_floats.cu + src/strings/convert/convert_hex.cu + src/strings/convert/convert_integers.cu + src/strings/convert/convert_ipv4.cu + src/strings/convert/convert_urls.cu + src/strings/convert/convert_lists.cu + src/strings/copying/concatenate.cu + src/strings/copying/copying.cu + src/strings/copying/shift.cu + src/strings/count_matches.cu + src/strings/extract/extract.cu + src/strings/extract/extract_all.cu + src/strings/filling/fill.cu + src/strings/filter_chars.cu + src/strings/like.cu + src/strings/padding.cu + src/strings/json/json_path.cu + src/strings/regex/regcomp.cpp + src/strings/regex/regexec.cpp + src/strings/regex/regex_program.cpp + src/strings/repeat_strings.cu + src/strings/replace/backref_re.cu + src/strings/replace/multi.cu + src/strings/replace/multi_re.cu + src/strings/replace/replace.cu + src/strings/replace/replace_re.cu + src/strings/reverse.cu + src/strings/search/findall.cu + src/strings/search/find.cu + src/strings/search/find_multiple.cu + src/strings/slice.cu + src/strings/split/partition.cu + src/strings/split/split.cu + src/strings/split/split_re.cu + src/strings/split/split_record.cu + src/strings/strings_column_factories.cu + src/strings/strings_column_view.cpp + src/strings/strings_scalar_factories.cpp + src/strings/strip.cu + src/strings/translate.cu + src/strings/utilities.cu + src/strings/wrap.cu + src/structs/copying/concatenate.cu + src/structs/structs_column_factories.cu + src/structs/structs_column_view.cpp + src/structs/utilities.cpp + src/table/row_operators.cu + src/table/table.cpp + src/table/table_device_view.cu + src/table/table_view.cpp + src/text/detokenize.cu + src/text/edit_distance.cu + src/text/generate_ngrams.cu + src/text/jaccard.cu + src/text/minhash.cu + src/text/ngrams_tokenize.cu + src/text/normalize.cu + src/text/replace.cu + src/text/stemmer.cu + src/text/subword/bpe_tokenizer.cu + src/text/subword/data_normalizer.cu + src/text/subword/load_hash_file.cu + src/text/subword/load_merges_file.cu + src/text/subword/subword_tokenize.cu + src/text/subword/wordpiece_tokenizer.cu + src/text/tokenize.cu + src/text/vocabulary_tokenize.cu + src/transform/bools_to_mask.cu + src/transform/compute_column.cu + src/transform/encode.cu + src/transform/mask_to_bools.cu + src/transform/nans_to_nulls.cu + src/transform/one_hot_encode.cu + src/transform/row_bit_count.cu + src/transform/transform.cpp + src/transpose/transpose.cu + src/unary/cast_ops.cu + src/unary/math_ops.cu + src/unary/nan_ops.cu + src/unary/null_ops.cu + src/utilities/default_stream.cpp + src/utilities/linked_column.cpp + src/utilities/logger.cpp + src/utilities/stacktrace.cpp + src/utilities/stream_pool.cpp + src/utilities/traits.cpp + src/utilities/type_checks.cpp + src/utilities/type_dispatcher.cpp +) + +# Anything that includes jitify needs to be compiled with _FILE_OFFSET_BITS=64 due to a limitation +# in how conda builds glibc +set_source_files_properties( + src/binaryop/binaryop.cpp + src/jit/cache.cpp + src/rolling/detail/rolling_fixed_window.cu + src/rolling/detail/rolling_variable_window.cu + src/rolling/grouped_rolling.cu + src/rolling/rolling.cu + src/transform/transform.cpp + PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64" +) + +set_target_properties( + cudf + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + # For std:: support of __int128_t. Can be removed once using cuda::std + CXX_EXTENSIONS ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON +) + +target_compile_options( + cudf PRIVATE "$<$:${CUDF_CXX_FLAGS}>" + "$<$:${CUDF_CUDA_FLAGS}>" +) + +if(CUDF_BUILD_STACKTRACE_DEBUG) + # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option + # 'optimize'". + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}") + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE}") + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS_MINSIZEREL + "${CMAKE_CUDA_FLAGS_MINSIZEREL}" + ) + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS_RELWITHDEBINFO + "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}" + ) + + add_library(cudf_backtrace INTERFACE) + target_compile_definitions(cudf_backtrace INTERFACE CUDF_BUILD_STACKTRACE_DEBUG) + target_compile_options( + cudf_backtrace INTERFACE "$<$:-Og>" + "$<$:-Xcompiler=-Og>" + ) + target_link_options( + cudf_backtrace INTERFACE "$<$:-rdynamic>" + "$<$:-Xlinker=-rdynamic>" + ) + target_link_libraries(cudf PRIVATE cudf_backtrace) +endif() + +# Specify include paths for the current target and dependents +target_include_directories( + cudf + PUBLIC "$" + "$" + "$" + "$" + PRIVATE "$" + INTERFACE "$" +) + +target_compile_definitions( + cudf PUBLIC "$<$:${CUDF_CXX_DEFINITIONS}>" + "$:${CUDF_CUDA_DEFINITIONS}>>" +) + +# Disable Jitify log printing. See https://github.com/NVIDIA/jitify/issues/79 +target_compile_definitions(cudf PRIVATE "JITIFY_PRINT_LOG=0") + +if(JITIFY_USE_CACHE) + # Instruct src/jit/cache what version of cudf we are building so it can compute a cal-ver cache + # directory. We isolate this definition to the single source so it doesn't effect compiling + # caching for all of libcudf + set_property( + SOURCE src/jit/cache.cpp + APPEND + PROPERTY COMPILE_DEFINITIONS "JITIFY_USE_CACHE" "CUDF_VERSION=${PROJECT_VERSION}" + ) +endif() + +# Per-thread default stream +if(CUDF_USE_PER_THREAD_DEFAULT_STREAM) + target_compile_definitions( + cudf PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM CUDF_USE_PER_THREAD_DEFAULT_STREAM + ) +endif() + +# Disable NVTX if necessary +if(NOT USE_NVTX) + target_compile_definitions(cudf PUBLIC NVTX_DISABLE) +endif() + +# Define RMM logging level +target_compile_definitions(cudf PRIVATE "RMM_LOGGING_LEVEL=LIBCUDF_LOGGING_LEVEL") + +# Define spdlog level +target_compile_definitions(cudf PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${LIBCUDF_LOGGING_LEVEL}") + +# Compile stringified JIT sources first +add_dependencies(cudf jitify_preprocess_run) + +# Specify the target module library dependencies +target_link_libraries( + cudf + PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx cudf::Thrust rmm::rmm + PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio + $ +) + +# Add Conda library, and include paths if specified +if(TARGET conda_env) + target_link_libraries(cudf PRIVATE conda_env) +endif() + +if(CUDA_STATIC_RUNTIME) + # Tell CMake what CUDA language runtime to use + set_target_properties(cudf PROPERTIES CUDA_RUNTIME_LIBRARY Static) + # Make sure to export to consumers what runtime we used + target_link_libraries(cudf PUBLIC CUDA::cudart_static) +else() + # Tell CMake what CUDA language runtime to use + set_target_properties(cudf PROPERTIES CUDA_RUNTIME_LIBRARY Shared) + # Make sure to export to consumers what runtime we used + target_link_libraries(cudf PUBLIC CUDA::cudart) +endif() + +file( + WRITE "${CUDF_BINARY_DIR}/fatbin.ld" + [=[ +SECTIONS +{ + .nvFatBinSegment : { *(.nvFatBinSegment) } + .nv_fatbin : { *(.nv_fatbin) } +} +]=] +) +target_link_options(cudf PRIVATE "$") + +add_library(cudf::cudf ALIAS cudf) + +# ################################################################################################## +# * tests and benchmarks -------------------------------------------------------------------------- +# ################################################################################################## + +# ################################################################################################## +# * build cudftestutil ---------------------------------------------------------------------------- + +if(CUDF_BUILD_TESTUTIL) + add_library( + cudftest_default_stream + # When compiled as a dynamic library allows us to use LD_PRELOAD injection of symbols. We + # currently leverage this for stream-related library validation and may make use of it for + # other similar features in the future. + tests/utilities/default_stream.cpp + ) + set_target_properties( + cudftest_default_stream + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + ) + target_link_libraries( + cudftest_default_stream + PUBLIC cudf + PRIVATE $ + ) + + add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream) + + # Needs to be static so that we support usage of static builds of gtest which doesn't compile with + # fPIC enabled and therefore can't be embedded into shared libraries. + add_library( + cudftestutil STATIC + tests/io/metadata_utilities.cpp + tests/utilities/base_fixture.cpp + tests/utilities/column_utilities.cu + tests/utilities/table_utilities.cu + tests/utilities/tdigest_utilities.cu + ) + + set_target_properties( + cudftestutil + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + ) + + target_compile_options( + cudftestutil PUBLIC "$:${CUDF_CXX_FLAGS}>>" + "$:${CUDF_CUDA_FLAGS}>>" + ) + + target_link_libraries( + cudftestutil + PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf cudftest_default_stream + PRIVATE $ + ) + + target_include_directories( + cudftestutil PUBLIC "$" + "$" + ) + add_library(cudf::cudftestutil ALIAS cudftestutil) + +endif() + +# * build cudf_identify_stream_usage -------------------------------------------------------------- + +if(CUDF_BUILD_STREAMS_TEST_UTIL) + if(CUDA_STATIC_RUNTIME) + message( + FATAL_ERROR + "Stream identification cannot be used with a static CUDA runtime. Please set CUDA_STATIC_RUNTIME=OFF or CUDF_BUILD_STREAMS_TEST_UTIL=OFF." + ) + endif() + + # Libraries for stream-related testing. We build the library twice, one with STREAM_MODE_TESTING + # on and one with it set to off. Each test will then be configured to use the appropriate library + # depending via ctest and whether it has been updated to expose public stream APIs. + foreach(_mode cudf testing) + set(_tgt "cudf_identify_stream_usage_mode_${_mode}") + add_library( + ${_tgt} SHARED src/utilities/stacktrace.cpp tests/utilities/identify_stream_usage.cpp + ) + + set_target_properties( + ${_tgt} + PROPERTIES # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ) + target_compile_options( + ${_tgt} PRIVATE "$:${CUDF_CXX_FLAGS}>>" + ) + target_include_directories(${_tgt} PRIVATE "$") + target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm) + if(CUDF_BUILD_STACKTRACE_DEBUG) + target_link_libraries(${_tgt} PRIVATE cudf_backtrace) + endif() + add_library(cudf::${_tgt} ALIAS ${_tgt}) + + if("${_mode}" STREQUAL "testing") + target_compile_definitions(${_tgt} PUBLIC STREAM_MODE_TESTING) + endif() + endforeach() +endif() + +# ################################################################################################## +# * add tests ------------------------------------------------------------------------------------- + +if(CUDF_BUILD_TESTS) + # include CTest module -- automatically calls enable_testing() + include(CTest) + + # ctest cuda memcheck + find_program(CUDA_SANITIZER compute-sanitizer) + set(MEMORYCHECK_COMMAND ${CUDA_SANITIZER}) + set(MEMORYCHECK_TYPE CudaSanitizer) + set(CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck") + + # Always print verbose output when tests fail if run using `make test`. + list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") + add_subdirectory(tests) +endif() + +# ################################################################################################## +# * add benchmarks -------------------------------------------------------------------------------- + +if(CUDF_BUILD_BENCHMARKS) + # Find or install GoogleBench + include(${rapids-cmake-dir}/cpm/gbench.cmake) + rapids_cpm_gbench() + + # Find or install nvbench + include(cmake/thirdparty/get_nvbench.cmake) + + add_subdirectory(benchmarks) +endif() + +# ################################################################################################## +# * install targets ------------------------------------------------------------------------------- +rapids_cmake_install_lib_dir(lib_dir) +include(CPack) +include(GNUInstallDirs) + +set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME cudf) + +# install target for cudf_base and the proxy libcudf.so +install( + TARGETS cudf + DESTINATION ${lib_dir} + EXPORT cudf-exports +) + +set(_components_export_string) +if(TARGET cudftestutil) + install( + TARGETS cudftest_default_stream cudftestutil + DESTINATION ${lib_dir} + EXPORT cudf-testing-exports + ) + set(_components_export_string COMPONENTS testing COMPONENTS_EXPORT_SET cudf-testing-exports) +endif() + +install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cudf_test + ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +if(CUDF_BUILD_STREAMS_TEST_UTIL) + install(TARGETS cudf_identify_stream_usage_mode_cudf DESTINATION ${lib_dir}) + install(TARGETS cudf_identify_stream_usage_mode_testing DESTINATION ${lib_dir}) +endif() + +set(doc_string + [=[ +Provide targets for the cudf library. + +Built based on the Apache Arrow columnar memory format, cuDF is a GPU DataFrame +library for loading, joining, aggregating, filtering, and otherwise +manipulating data. + +cuDF provides a pandas-like API that will be familiar to data engineers & +data scientists, so they can use it to easily accelerate their workflows +without going into the details of CUDA programming. + + +Imported Targets +^^^^^^^^^^^^^^^^ + +If cudf is found, this module defines the following IMPORTED GLOBAL +targets: + + cudf::cudf - The main cudf library. + +This module offers an optional testing component which defines the +following IMPORTED GLOBAL targets: + + cudf::cudftestutil - The main cudf testing library + ]=] +) + +set(common_code_string + [=[ +if(NOT TARGET cudf::Thrust) + thrust_create_target(cudf::Thrust FROM_OPTIONS) +endif() +]=] +) + +if(CUDF_ENABLE_ARROW_PARQUET) + string( + APPEND + install_code_string + [=[ + if(NOT Parquet_DIR) + set(Parquet_DIR "${Arrow_DIR}") + endif() + set(ArrowDataset_DIR "${Arrow_DIR}") + find_dependency(ArrowDataset) + ]=] + ) +endif() + +string( + APPEND + install_code_string + [=[ +if(testing IN_LIST cudf_FIND_COMPONENTS) + enable_language(CUDA) +endif() +]=] +) +string(APPEND install_code_string "${common_code_string}") + +rapids_export( + INSTALL cudf + EXPORT_SET cudf-exports ${_components_export_string} + GLOBAL_TARGETS cudf cudftestutil + NAMESPACE cudf:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK install_code_string +) + +# ################################################################################################## +# * build export ------------------------------------------------------------------------------- +set(build_code_string + [=[ +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") +endif() +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") +endif() +]=] +) + +string(APPEND build_code_string "${common_code_string}") + +rapids_export( + BUILD cudf + EXPORT_SET cudf-exports ${_components_export_string} + GLOBAL_TARGETS cudf cudftestutil + NAMESPACE cudf:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK build_code_string +) + +# ################################################################################################## +# * make documentation ---------------------------------------------------------------------------- + +# doc targets for cuDF +add_custom_command( + OUTPUT CUDF_DOXYGEN + WORKING_DIRECTORY ${CUDF_SOURCE_DIR}/doxygen + COMMAND doxygen Doxyfile + VERBATIM + COMMENT "Custom command for building cudf doxygen docs." +) + +add_custom_target( + docs_cudf + DEPENDS CUDF_DOXYGEN + COMMENT "Custom command for building cudf doxygen docs." +) + +# ################################################################################################## +# * make gdb helper scripts ------------------------------------------------------------------------ + +# build pretty-printer load script +if(Thrust_SOURCE_DIR AND rmm_SOURCE_DIR) + configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY) +endif() diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt new file mode 100644 index 0000000..cd6b3cf --- /dev/null +++ b/cpp/benchmarks/CMakeLists.txt @@ -0,0 +1,336 @@ +# ============================================================================= +# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +find_package(Threads REQUIRED) + +add_library(cudf_datagen STATIC common/generate_input.cu) +target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17) + +target_compile_options( + cudf_datagen PUBLIC "$<$:${CUDF_CXX_FLAGS}>" + "$<$:${CUDF_CUDA_FLAGS}>" +) + +target_link_libraries( + cudf_datagen + PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf + cudftestutil + PRIVATE $ +) + +target_include_directories( + cudf_datagen + PUBLIC "$" "$" + "$" +) + +# ################################################################################################## +# * compiler function ----------------------------------------------------------------------------- + +# Use an OBJECT library so we only compile these helper source files only once +add_library( + cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/base_fixture.cpp" + synchronization/synchronization.cpp io/cuio_common.cpp +) +target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $) +add_custom_command( + OUTPUT CUDF_BENCHMARKS + COMMAND echo Running benchmarks + COMMAND mkdir -p results + VERBATIM + COMMENT "Running cudf benchmarks." + USES_TERMINAL +) + +# This function takes in a benchmark name and benchmark source and handles setting all of the +# associated properties and linking to build the benchmark +function(ConfigureBench CMAKE_BENCH_NAME) + add_executable(${CMAKE_BENCH_NAME} ${ARGN}) + set_target_properties( + ${CMAKE_BENCH_NAME} + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" + INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + # For std:: support of __int128_t. Can be removed once using cuda::std + CXX_EXTENSIONS ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + ) + target_link_libraries( + ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main + $ + ) + add_custom_command( + OUTPUT CUDF_BENCHMARKS + COMMAND ${CMAKE_BENCH_NAME} --benchmark_out_format=json + --benchmark_out=results/${CMAKE_BENCH_NAME}.json + APPEND + COMMENT "Adding ${CMAKE_BENCH_NAME}" + ) + + install( + TARGETS ${CMAKE_BENCH_NAME} + COMPONENT testing + DESTINATION bin/benchmarks/libcudf + EXCLUDE_FROM_ALL + ) +endfunction() + +# This function takes in a benchmark name and benchmark source for nvbench benchmarks and handles +# setting all of the associated properties and linking to build the benchmark +function(ConfigureNVBench CMAKE_BENCH_NAME) + add_executable(${CMAKE_BENCH_NAME} ${ARGN} fixture/nvbench_main.cpp) + set_target_properties( + ${CMAKE_BENCH_NAME} + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" + INSTALL_RPATH "\$ORIGIN/../../../lib" + ) + target_link_libraries( + ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen nvbench::nvbench + $ + ) + install( + TARGETS ${CMAKE_BENCH_NAME} + COMPONENT testing + DESTINATION bin/benchmarks/libcudf + EXCLUDE_FROM_ALL + ) +endfunction() + +# ################################################################################################## +# * column benchmarks ----------------------------------------------------------------------------- +ConfigureBench(COLUMN_CONCAT_BENCH column/concatenate.cpp) + +# ################################################################################################## +# * gather benchmark ------------------------------------------------------------------------------ +ConfigureBench(GATHER_BENCH copying/gather.cu) + +# ################################################################################################## +# * scatter benchmark ----------------------------------------------------------------------------- +ConfigureBench(SCATTER_BENCH copying/scatter.cu) + +# ################################################################################################## +# * lists scatter benchmark ----------------------------------------------------------------------- +ConfigureBench(SCATTER_LISTS_BENCH lists/copying/scatter_lists.cu) + +# ################################################################################################## +# * Other list-related operartions benchmark ------------------------------------------------------ +ConfigureNVBench(SET_OPS_NVBENCH lists/set_operations.cpp) + +# ################################################################################################## +# * contiguous_split benchmark ------------------------------------------------------------------- +ConfigureBench(CONTIGUOUS_SPLIT_BENCH copying/contiguous_split.cu) + +# ################################################################################################## +# * shift benchmark ------------------------------------------------------------------------------- +ConfigureBench(SHIFT_BENCH copying/shift.cu) + +# ################################################################################################## +# * copy-if-else benchmark +# ----------------------------------------------------------------------------- +ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else.cpp) + +# ################################################################################################## +# * transpose benchmark --------------------------------------------------------------------------- +ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp) + +# ################################################################################################## +# * apply_boolean_mask benchmark ------------------------------------------------------------------ +ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp) + +# ################################################################################################## +# * stream_compaction benchmark ------------------------------------------------------------------- +ConfigureNVBench( + STREAM_COMPACTION_NVBENCH + stream_compaction/distinct.cpp + stream_compaction/distinct_count.cpp + stream_compaction/stable_distinct.cpp + stream_compaction/unique.cpp + stream_compaction/unique_count.cpp +) + +# ################################################################################################## +# * join benchmark -------------------------------------------------------------------------------- +ConfigureBench(JOIN_BENCH join/left_join.cu join/conditional_join.cu) +ConfigureNVBench(JOIN_NVBENCH join/join.cu join/mixed_join.cu) + +# ################################################################################################## +# * iterator benchmark ---------------------------------------------------------------------------- +ConfigureBench(ITERATOR_BENCH iterator/iterator.cu) + +# ################################################################################################## +# * search benchmark ------------------------------------------------------------------------------ +ConfigureBench(SEARCH_BENCH search/search.cpp) +ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp search/contains_table.cpp) + +# ################################################################################################## +# * sort benchmark -------------------------------------------------------------------------------- +ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp) +ConfigureNVBench( + SORT_NVBENCH sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp + sort/sort_lists.cpp sort/sort_structs.cpp +) + +# ################################################################################################## +# * quantiles benchmark +# -------------------------------------------------------------------------------- +ConfigureBench(QUANTILES_BENCH quantiles/quantiles.cpp) + +# ################################################################################################## +# * type_dispatcher benchmark --------------------------------------------------------------------- +ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu) + +# ################################################################################################## +# * reduction benchmark --------------------------------------------------------------------------- +ConfigureBench( + REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp + reduction/reduce.cpp reduction/scan.cpp +) +ConfigureNVBench( + REDUCTION_NVBENCH reduction/rank.cpp reduction/scan_structs.cpp reduction/segmented_reduce.cpp +) + +# ################################################################################################## +# * reduction benchmark --------------------------------------------------------------------------- +ConfigureBench(REPLACE_BENCH replace/clamp.cpp replace/nans.cpp) + +# ################################################################################################## +# * filling benchmark ----------------------------------------------------------------------------- +ConfigureBench(FILL_BENCH filling/repeat.cpp) + +# ################################################################################################## +# * groupby benchmark ----------------------------------------------------------------------------- +ConfigureBench( + GROUPBY_BENCH groupby/group_sum.cpp groupby/group_nth.cpp groupby/group_shift.cpp + groupby/group_struct_values.cpp groupby/group_no_requests.cpp groupby/group_scan.cpp +) + +ConfigureNVBench( + GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_nunique.cpp groupby/group_rank.cpp + groupby/group_struct_keys.cpp +) + +# ################################################################################################## +# * hashing benchmark ----------------------------------------------------------------------------- +ConfigureBench(HASHING_BENCH hashing/partition.cpp) +ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) + +# ################################################################################################## +# * merge benchmark ------------------------------------------------------------------------------- +ConfigureBench(MERGE_BENCH merge/merge.cpp) + +# ################################################################################################## +# * null_mask benchmark --------------------------------------------------------------------------- +ConfigureBench(NULLMASK_BENCH null_mask/set_null_mask.cpp) + +# ################################################################################################## +# * parquet writer benchmark ---------------------------------------------------------------------- +ConfigureNVBench( + PARQUET_WRITER_NVBENCH io/parquet/parquet_writer.cpp io/parquet/parquet_writer_chunks.cpp +) + +# ################################################################################################## +# * parquet reader benchmark ---------------------------------------------------------------------- +ConfigureNVBench( + PARQUET_READER_NVBENCH io/parquet/parquet_reader_input.cpp io/parquet/parquet_reader_options.cpp +) + +# ################################################################################################## +# * orc reader benchmark -------------------------------------------------------------------------- +ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reader_options.cpp) + +# ################################################################################################## +# * csv reader benchmark -------------------------------------------------------------------------- +ConfigureNVBench(CSV_READER_NVBENCH io/csv/csv_reader_input.cpp io/csv/csv_reader_options.cpp) + +# ################################################################################################## +# * orc writer benchmark -------------------------------------------------------------------------- +ConfigureNVBench(ORC_WRITER_NVBENCH io/orc/orc_writer.cpp io/orc/orc_writer_chunks.cpp) + +# ################################################################################################## +# * csv writer benchmark -------------------------------------------------------------------------- +ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp) + +# ################################################################################################## +# * ast benchmark --------------------------------------------------------------------------------- +ConfigureBench(AST_BENCH ast/transform.cpp) + +# ################################################################################################## +# * binaryop benchmark ---------------------------------------------------------------------------- +ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp) + +# ################################################################################################## +# * nvtext benchmark ------------------------------------------------------------------- +ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp) + +ConfigureNVBench( + TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp + text/normalize.cpp text/replace.cpp text/tokenize.cpp +) + +# ################################################################################################## +# * strings benchmark ------------------------------------------------------------------- +ConfigureBench( + STRINGS_BENCH + string/combine.cpp + string/convert_datetime.cpp + string/convert_durations.cpp + string/convert_fixed_point.cpp + string/convert_numerics.cpp + string/copy.cu + string/factory.cu + string/filter.cpp + string/find.cpp + string/repeat_strings.cpp + string/replace.cpp + string/slice.cpp + string/translate.cpp + string/url_decode.cu +) + +ConfigureNVBench( + STRINGS_NVBENCH + string/case.cpp + string/char_types.cpp + string/contains.cpp + string/count.cpp + string/extract.cpp + string/gather.cpp + string/join_strings.cpp + string/lengths.cpp + string/like.cpp + string/replace_re.cpp + string/reverse.cpp + string/split.cpp + string/split_re.cpp +) + +# ################################################################################################## +# * json benchmark ------------------------------------------------------------------- +ConfigureBench(JSON_BENCH string/json.cu) +ConfigureNVBench(FST_NVBENCH io/fst.cu) +ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp) +ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp) + +# ################################################################################################## +# * io benchmark --------------------------------------------------------------------- +ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp) +target_link_libraries(MULTIBYTE_SPLIT_NVBENCH PRIVATE ZLIB::ZLIB) + +add_custom_target( + run_benchmarks + DEPENDS CUDF_BENCHMARKS + COMMENT "Custom command for running cudf benchmarks." +) diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp new file mode 100644 index 0000000..65a4453 --- /dev/null +++ b/cpp/benchmarks/ast/transform.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +enum class TreeType { + IMBALANCED_LEFT // All operator expressions have a left child operator expression and a right + // child column reference +}; + +template +class AST : public cudf::benchmark {}; + +template +static void BM_ast_transform(benchmark::State& state) +{ + auto const table_size{static_cast(state.range(0))}; + auto const tree_levels{static_cast(state.range(1))}; + + // Create table data + auto const n_cols = reuse_columns ? 1 : tree_levels + 1; + auto const source_table = + create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{table_size}, + Nullable ? std::optional{0.5} : std::nullopt); + auto table = source_table->view(); + + // Create column references + auto column_refs = std::vector(); + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_cols), + std::back_inserter(column_refs), + [](auto const& column_id) { + return cudf::ast::column_reference(reuse_columns ? 0 : column_id); + }); + + // Create expression trees + + // Note that a std::list is required here because of its guarantees against reference invalidation + // when items are added or removed. References to items in a std::vector are not safe if the + // vector must re-allocate. + auto expressions = std::list(); + + // Construct tree that chains additions like (((a + b) + c) + d) + auto const op = cudf::ast::ast_operator::ADD; + if (reuse_columns) { + expressions.push_back(cudf::ast::operation(op, column_refs.at(0), column_refs.at(0))); + for (cudf::size_type i = 0; i < tree_levels - 1; i++) { + expressions.push_back(cudf::ast::operation(op, expressions.back(), column_refs.at(0))); + } + } else { + expressions.push_back(cudf::ast::operation(op, column_refs.at(0), column_refs.at(1))); + std::transform(std::next(column_refs.cbegin(), 2), + column_refs.cend(), + std::back_inserter(expressions), + [&](auto const& column_ref) { + return cudf::ast::operation(op, expressions.back(), column_ref); + }); + } + + auto const& expression_tree_root = expressions.back(); + + // Execute benchmark + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::compute_column(table, expression_tree_root); + } + + // Use the number of bytes read from global memory + state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * + (tree_levels + 1) * sizeof(key_type)); +} + +static void CustomRanges(benchmark::internal::Benchmark* b) +{ + auto row_counts = std::vector{100'000, 1'000'000, 10'000'000, 100'000'000}; + auto operation_counts = std::vector{1, 5, 10}; + for (auto const& row_count : row_counts) { + for (auto const& operation_count : operation_counts) { + b->Args({row_count, operation_count}); + } + } +} + +#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable) \ + (::benchmark::State & st) \ + { \ + BM_ast_transform(st); \ + } \ + BENCHMARK_REGISTER_F(AST, name) \ + ->Apply(CustomRanges) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime(); + +AST_TRANSFORM_BENCHMARK_DEFINE( + ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false); +AST_TRANSFORM_BENCHMARK_DEFINE( + ast_int32_imbalanced_reuse, int32_t, TreeType::IMBALANCED_LEFT, true, false); +AST_TRANSFORM_BENCHMARK_DEFINE( + ast_double_imbalanced_unique, double, TreeType::IMBALANCED_LEFT, false, false); + +AST_TRANSFORM_BENCHMARK_DEFINE( + ast_int32_imbalanced_unique_nulls, int32_t, TreeType::IMBALANCED_LEFT, false, true); +AST_TRANSFORM_BENCHMARK_DEFINE( + ast_int32_imbalanced_reuse_nulls, int32_t, TreeType::IMBALANCED_LEFT, true, true); +AST_TRANSFORM_BENCHMARK_DEFINE( + ast_double_imbalanced_unique_nulls, double, TreeType::IMBALANCED_LEFT, false, true); diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp new file mode 100644 index 0000000..fa98d9e --- /dev/null +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +// This set of benchmarks is designed to be a comparison for the AST benchmarks + +enum class TreeType { + IMBALANCED_LEFT // All operator expressions have a left child operator expression and a right + // child column reference +}; + +template +class BINARYOP : public cudf::benchmark {}; + +template +static void BM_binaryop_transform(benchmark::State& state) +{ + auto const table_size{static_cast(state.range(0))}; + auto const tree_levels{static_cast(state.range(1))}; + + // Create table data + auto const n_cols = reuse_columns ? 1 : tree_levels + 1; + auto const source_table = create_sequence_table( + cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{table_size}); + cudf::table_view table{*source_table}; + + // Execute benchmark + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + // Execute tree that chains additions like (((a + b) + c) + d) + auto const op = cudf::binary_operator::ADD; + auto const result_data_type = cudf::data_type(cudf::type_to_id()); + if (reuse_columns) { + auto result = cudf::binary_operation(table.column(0), table.column(0), op, result_data_type); + for (cudf::size_type i = 0; i < tree_levels - 1; i++) { + result = cudf::binary_operation(result->view(), table.column(0), op, result_data_type); + } + } else { + auto result = cudf::binary_operation(table.column(0), table.column(1), op, result_data_type); + std::for_each(std::next(table.begin(), 2), table.end(), [&](auto const& col) { + result = cudf::binary_operation(result->view(), col, op, result_data_type); + }); + } + } + + // Use the number of bytes read from global memory + state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * + (tree_levels + 1) * sizeof(key_type)); +} + +#define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \ + BENCHMARK_TEMPLATE_DEFINE_F(BINARYOP, name, key_type, tree_type, reuse_columns) \ + (::benchmark::State & st) { BM_binaryop_transform(st); } + +BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique, + int32_t, + TreeType::IMBALANCED_LEFT, + false); +BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_reuse, + int32_t, + TreeType::IMBALANCED_LEFT, + true); +BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique, + double, + TreeType::IMBALANCED_LEFT, + false); + +static void CustomRanges(benchmark::internal::Benchmark* b) +{ + auto row_counts = std::vector{100'000, 1'000'000, 10'000'000, 100'000'000}; + auto operation_counts = std::vector{1, 2, 5, 10}; + for (auto const& row_count : row_counts) { + for (auto const& operation_count : operation_counts) { + b->Args({row_count, operation_count}); + } + } +} + +BENCHMARK_REGISTER_F(BINARYOP, binaryop_int32_imbalanced_unique) + ->Apply(CustomRanges) + ->Unit(benchmark::kMillisecond) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(BINARYOP, binaryop_int32_imbalanced_reuse) + ->Apply(CustomRanges) + ->Unit(benchmark::kMillisecond) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(BINARYOP, binaryop_double_imbalanced_unique) + ->Apply(CustomRanges) + ->Unit(benchmark::kMillisecond) + ->UseManualTime(); diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp new file mode 100644 index 0000000..a1131df --- /dev/null +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +class COMPILED_BINARYOP : public cudf::benchmark {}; + +template +void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop) +{ + auto const column_size{static_cast(state.range(0))}; + + auto const source_table = create_random_table( + {cudf::type_to_id(), cudf::type_to_id()}, row_count{column_size}); + + auto lhs = cudf::column_view(source_table->get_column(0)); + auto rhs = cudf::column_view(source_table->get_column(1)); + + auto output_dtype = cudf::data_type(cudf::type_to_id()); + + // Call once for hot cache. + cudf::binary_operation(lhs, rhs, binop, output_dtype); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::binary_operation(lhs, rhs, binop, output_dtype); + } + + // use number of bytes read and written to global memory + state.SetBytesProcessed(static_cast(state.iterations()) * column_size * + (sizeof(TypeLhs) + sizeof(TypeRhs) + sizeof(TypeOut))); +} + +// TODO tparam boolean for null. +#define BM_BINARYOP_BENCHMARK_DEFINE(name, lhs, rhs, bop, tout) \ + BENCHMARK_DEFINE_F(COMPILED_BINARYOP, name) \ + (::benchmark::State & st) \ + { \ + BM_compiled_binaryop(st, cudf::binary_operator::bop); \ + } \ + BENCHMARK_REGISTER_F(COMPILED_BINARYOP, name) \ + ->Unit(benchmark::kMicrosecond) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +#define build_name(a, b, c, d) a##_##b##_##c##_##d + +#define BINARYOP_BENCHMARK_DEFINE(lhs, rhs, bop, tout) \ + BM_BINARYOP_BENCHMARK_DEFINE(build_name(bop, lhs, rhs, tout), lhs, rhs, bop, tout) + +using cudf::duration_D; +using cudf::duration_ms; +using cudf::duration_ns; +using cudf::duration_s; +using cudf::timestamp_D; +using cudf::timestamp_ms; +using cudf::timestamp_s; +using numeric::decimal32; + +// clang-format off +BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); +BINARYOP_BENCHMARK_DEFINE(float, float, ADD, float); +BINARYOP_BENCHMARK_DEFINE(timestamp_s, duration_s, ADD, timestamp_s); +BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, SUB, int64_t); +BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t); +BINARYOP_BENCHMARK_DEFINE(duration_s, int64_t, MUL, duration_s); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(duration_ms, int32_t, DIV, duration_ms); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); +BINARYOP_BENCHMARK_DEFINE(duration_ms, int64_t, MOD, duration_ms); +BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); +BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); +BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double); +BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); +BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); +BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); +BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); +BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); +BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); +BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp new file mode 100644 index 0000000..51106c7 --- /dev/null +++ b/cpp/benchmarks/column/concatenate.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +class Concatenate : public cudf::benchmark {}; + +template +static void BM_concatenate(benchmark::State& state) +{ + cudf::size_type const num_rows = state.range(0); + cudf::size_type const num_cols = state.range(1); + + auto input = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, num_cols), + row_count{num_rows}, + Nullable ? std::optional{2.0 / 3.0} : std::nullopt); + auto input_columns = input->view(); + std::vector column_views(input_columns.begin(), input_columns.end()); + + CUDF_CHECK_CUDA(0); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + auto result = cudf::concatenate(column_views); + } + + state.SetBytesProcessed(state.iterations() * num_cols * num_rows * sizeof(T)); +} + +#define CONCAT_BENCHMARK_DEFINE(type, nullable) \ + BENCHMARK_DEFINE_F(Concatenate, BM_concatenate##_##nullable_##nullable) \ + (::benchmark::State & st) { BM_concatenate(st); } \ + BENCHMARK_REGISTER_F(Concatenate, BM_concatenate##_##nullable_##nullable) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 6, 1 << 18}, {2, 1024}}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime(); + +CONCAT_BENCHMARK_DEFINE(int64_t, false) +CONCAT_BENCHMARK_DEFINE(int64_t, true) + +template +static void BM_concatenate_tables(benchmark::State& state) +{ + cudf::size_type const num_rows = state.range(0); + cudf::size_type const num_cols = state.range(1); + cudf::size_type const num_tables = state.range(2); + + std::vector> tables(num_tables); + std::generate_n(tables.begin(), num_tables, [&]() { + return create_sequence_table(cycle_dtypes({cudf::type_to_id()}, num_cols), + row_count{num_rows}, + Nullable ? std::optional{2.0 / 3.0} : std::nullopt); + }); + + // Generate table views + std::vector table_views(num_tables); + std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) mutable { + return table->view(); + }); + + CUDF_CHECK_CUDA(0); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + auto result = cudf::concatenate(table_views); + } + + state.SetBytesProcessed(state.iterations() * num_cols * num_rows * num_tables * sizeof(T)); +} + +#define CONCAT_TABLES_BENCHMARK_DEFINE(type, nullable) \ + BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \ + (::benchmark::State & st) { BM_concatenate_tables(st); } \ + BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 8, 1 << 12}, {2, 32}, {2, 128}}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime(); + +CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, false) +CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, true) + +class ConcatenateStrings : public cudf::benchmark {}; + +template +static void BM_concatenate_strings(benchmark::State& state) +{ + using column_wrapper = cudf::test::strings_column_wrapper; + + auto const num_rows = state.range(0); + auto const num_chars = state.range(1); + auto const num_cols = state.range(2); + + std::string str(num_chars, 'a'); + + // Create owning columns + std::vector columns; + columns.reserve(num_cols); + std::generate_n(std::back_inserter(columns), num_cols, [num_rows, c_str = str.c_str()]() { + auto iter = thrust::make_constant_iterator(c_str); + if (Nullable) { + auto count_it = thrust::make_counting_iterator(0); + auto valid_iter = + thrust::make_transform_iterator(count_it, [](auto i) { return i % 3 == 0; }); + return column_wrapper(iter, iter + num_rows, valid_iter); + } else { + return column_wrapper(iter, iter + num_rows); + } + }); + + // Generate column views + std::vector column_views; + column_views.reserve(columns.size()); + std::transform( + columns.begin(), columns.end(), std::back_inserter(column_views), [](auto const& col) { + return static_cast(col); + }); + + CUDF_CHECK_CUDA(0); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + auto result = cudf::concatenate(column_views); + } + + state.SetBytesProcessed(state.iterations() * num_cols * num_rows * + (sizeof(int32_t) + num_chars)); // offset + chars +} + +#define CONCAT_STRINGS_BENCHMARK_DEFINE(nullable) \ + BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \ + (::benchmark::State & st) { BM_concatenate_strings(st); } \ + BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 8, 1 << 14}, {8, 128}, {2, 256}}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime(); + +CONCAT_STRINGS_BENCHMARK_DEFINE(false) +CONCAT_STRINGS_BENCHMARK_DEFINE(true) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu new file mode 100644 index 0000000..aef3d92 --- /dev/null +++ b/cpp/benchmarks/common/generate_input.cu @@ -0,0 +1,946 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "generate_input.hpp" +#include "random_distribution_factory.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/** + * @brief Mersenne Twister pseudo-random engine. + */ +auto deterministic_engine(unsigned seed) { return thrust::minstd_rand{seed}; } + +/** + * Computes the mean value for a distribution of given type and value bounds. + */ +template +T get_distribution_mean(distribution_params const& dist) +{ + switch (dist.id) { + case distribution_id::NORMAL: + case distribution_id::UNIFORM: return (dist.lower_bound / 2.) + (dist.upper_bound / 2.); + case distribution_id::GEOMETRIC: { + auto const range_size = dist.lower_bound < dist.upper_bound + ? dist.upper_bound - dist.lower_bound + : dist.lower_bound - dist.upper_bound; + auto const p = geometric_dist_p(range_size); + if (dist.lower_bound < dist.upper_bound) + return dist.lower_bound + (1. / p); + else + return dist.lower_bound - (1. / p); + } + default: CUDF_FAIL("Unsupported distribution type."); + } +} + +/** + * @brief Computes the average element size in a column, given the data profile. + * + * Random distribution parameters like average string length and maximum list nesting level affect + * the element size of non-fixed-width columns. For lists and structs, `avg_element_size` is called + * recursively to determine the size of nested columns. + */ +size_t avg_element_size(data_profile const& profile, cudf::data_type dtype); + +// Utilities to determine the mean size of an element, given the data profile +template ())> +size_t non_fixed_width_size(data_profile const& profile) +{ + CUDF_FAIL("Should not be called, use `size_of` for this type instead"); +} + +template ())> +size_t non_fixed_width_size(data_profile const& profile) +{ + CUDF_FAIL("not implemented!"); +} + +template <> +size_t non_fixed_width_size(data_profile const& profile) +{ + auto const dist = profile.get_distribution_params().length_params; + return get_distribution_mean(dist); +} + +double geometric_sum(size_t n, double p) +{ + if (p == 1) { return n; } + return (1 - std::pow(p, n)) / (1 - p); +} + +template <> +size_t non_fixed_width_size(data_profile const& profile) +{ + auto const dist_params = profile.get_distribution_params(); + auto const single_level_mean = get_distribution_mean(dist_params.length_params); + + auto const element_size = avg_element_size(profile, cudf::data_type{dist_params.element_type}); + auto const element_count = std::pow(single_level_mean, dist_params.max_depth); + + // Each nesting level includes offsets, this is the sum of all levels + // Also include an additional offset per level for the size of the last element + auto const total_offset_count = + geometric_sum(dist_params.max_depth, single_level_mean) + dist_params.max_depth; + + return sizeof(cudf::size_type) * total_offset_count + element_size * element_count; +} + +template <> +size_t non_fixed_width_size(data_profile const& profile) +{ + auto const dist_params = profile.get_distribution_params(); + return std::accumulate(dist_params.leaf_types.cbegin(), + dist_params.leaf_types.cend(), + 0ul, + [&](auto& sum, auto type_id) { + return sum + avg_element_size(profile, cudf::data_type{type_id}); + }); +} + +struct non_fixed_width_size_fn { + template + size_t operator()(data_profile const& profile) + { + return non_fixed_width_size(profile); + } +}; + +size_t avg_element_size(data_profile const& profile, cudf::data_type dtype) +{ + if (cudf::is_fixed_width(dtype)) { return cudf::size_of(dtype); } + return cudf::type_dispatcher(dtype, non_fixed_width_size_fn{}, profile); +} + +/** + * @brief bool generator with given probability [0.0 - 1.0] of returning true. + */ +struct bool_generator { + thrust::minstd_rand engine; + thrust::uniform_real_distribution dist; + double probability_true; + bool_generator(thrust::minstd_rand engine, double probability_true) + : engine(engine), dist{0, 1}, probability_true{probability_true} + { + } + bool_generator(unsigned seed, double probability_true) + : engine(seed), dist{0, 1}, probability_true{probability_true} + { + } + + __device__ bool operator()(size_t n) + { + engine.discard(n); + return dist(engine) < probability_true; + } +}; + +/** + * @brief Functor that computes a random column element with the given data profile. + * + * The implementation is SFINAEd for different type groups. Currently only used for fixed-width + * types. + */ +template +struct random_value_fn; + +/** + * @brief Creates an random timestamp/duration value + */ +template +struct random_value_fn()>> { + distribution_fn seconds_gen; + distribution_fn nanoseconds_gen; + + random_value_fn(distribution_params params) + { + using cuda::std::chrono::duration_cast; + + std::pair const range_s = { + duration_cast(typename T::duration{params.lower_bound}), + duration_cast(typename T::duration{params.upper_bound})}; + if (range_s.first != range_s.second) { + seconds_gen = + make_distribution(params.id, range_s.first.count(), range_s.second.count()); + + nanoseconds_gen = make_distribution(distribution_id::UNIFORM, 0l, 1000000000l); + } else { + // Don't need a random seconds generator for sub-second intervals + seconds_gen = [range_s](thrust::minstd_rand&, size_t size) { + rmm::device_uvector result(size, cudf::get_default_stream()); + thrust::fill(thrust::device, result.begin(), result.end(), range_s.second.count()); + return result; + }; + + std::pair const range_ns = { + duration_cast(typename T::duration{params.lower_bound}), + duration_cast(typename T::duration{params.upper_bound})}; + nanoseconds_gen = make_distribution(distribution_id::UNIFORM, + std::min(range_ns.first.count(), 0l), + std::max(range_ns.second.count(), 0l)); + } + } + + rmm::device_uvector operator()(thrust::minstd_rand& engine, unsigned size) + { + auto const sec = seconds_gen(engine, size); + auto const ns = nanoseconds_gen(engine, size); + rmm::device_uvector result(size, cudf::get_default_stream()); + thrust::transform( + thrust::device, + sec.begin(), + sec.end(), + ns.begin(), + result.begin(), + [] __device__(int64_t sec_value, int64_t nanoseconds_value) { + auto const timestamp_ns = + cudf::duration_s{sec_value} + cudf::duration_ns{nanoseconds_value}; + // Return value in the type's precision + return T(cuda::std::chrono::duration_cast(timestamp_ns)); + }); + return result; + } +}; + +/** + * @brief Creates an random fixed_point value. + */ +template +struct random_value_fn()>> { + using DeviceType = cudf::device_storage_type_t; + DeviceType const lower_bound; + DeviceType const upper_bound; + distribution_fn dist; + std::optional scale; + + random_value_fn(distribution_params const& desc) + : lower_bound{desc.lower_bound}, + upper_bound{desc.upper_bound}, + dist{make_distribution(desc.id, desc.lower_bound, desc.upper_bound)} + { + } + + [[nodiscard]] numeric::scale_type get_scale(thrust::minstd_rand& engine) + { + if (not scale.has_value()) { + constexpr int max_scale = std::numeric_limits::digits10; + std::uniform_int_distribution scale_dist{-max_scale, max_scale}; + std::mt19937 engine_scale(engine()); + scale = numeric::scale_type{scale_dist(engine_scale)}; + } + return scale.value_or(numeric::scale_type{0}); + } + + rmm::device_uvector operator()(thrust::minstd_rand& engine, unsigned size) + { + return dist(engine, size); + } +}; + +/** + * @brief Creates an random numeric value with the given distribution. + */ +template +struct random_value_fn && cudf::is_numeric()>> { + T const lower_bound; + T const upper_bound; + distribution_fn dist; + + random_value_fn(distribution_params const& desc) + : lower_bound{desc.lower_bound}, + upper_bound{desc.upper_bound}, + dist{make_distribution(desc.id, desc.lower_bound, desc.upper_bound)} + { + } + + auto operator()(thrust::minstd_rand& engine, unsigned size) { return dist(engine, size); } +}; + +/** + * @brief Creates an boolean value with given probability of returning `true`. + */ +template +struct random_value_fn>> { + // Bernoulli distribution + distribution_fn dist; + + random_value_fn(distribution_params const& desc) + : dist{[valid_prob = desc.probability_true](thrust::minstd_rand& engine, + size_t size) -> rmm::device_uvector { + rmm::device_uvector result(size, cudf::get_default_stream()); + thrust::tabulate( + thrust::device, result.begin(), result.end(), bool_generator(engine, valid_prob)); + return result; + }} + { + } + auto operator()(thrust::minstd_rand& engine, unsigned size) { return dist(engine, size); } +}; + +auto create_run_length_dist(cudf::size_type avg_run_len) +{ + // Distribution with low probability of generating 0-1 even with a low `avg_run_len` value + static constexpr float alpha = 4.f; + return std::gamma_distribution{alpha, avg_run_len / alpha}; +} + +/** + * @brief Generate indices within range [0 , cardinality) repeating with average run length + * `avg_run_len` + * + * @param avg_run_len Average run length of the generated indices + * @param cardinality Number of unique values in the output vector + * @param num_rows Number of indices to generate + * @param engine Random engine + * @return Generated indices of type `cudf::size_type` + */ +rmm::device_uvector sample_indices_with_run_length(cudf::size_type avg_run_len, + cudf::size_type cardinality, + cudf::size_type num_rows, + thrust::minstd_rand& engine) +{ + auto sample_dist = random_value_fn{ + distribution_params{distribution_id::UNIFORM, 0, cardinality - 1}}; + if (avg_run_len > 1) { + auto avglen_dist = + random_value_fn{distribution_params{distribution_id::UNIFORM, 1, 2 * avg_run_len}}; + auto const approx_run_len = num_rows / avg_run_len + 1; + auto run_lens = avglen_dist(engine, approx_run_len); + thrust::inclusive_scan( + thrust::device, run_lens.begin(), run_lens.end(), run_lens.begin(), std::plus{}); + auto const samples_indices = sample_dist(engine, approx_run_len + 1); + // This is gather. + auto avg_repeated_sample_indices_iterator = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [rb = run_lens.begin(), + re = run_lens.end(), + samples_indices = samples_indices.begin()] __device__(cudf::size_type i) { + auto sample_idx = thrust::upper_bound(thrust::seq, rb, re, i) - rb; + return samples_indices[sample_idx]; + }); + rmm::device_uvector repeated_sample_indices(num_rows, + cudf::get_default_stream()); + thrust::copy(thrust::device, + avg_repeated_sample_indices_iterator, + avg_repeated_sample_indices_iterator + num_rows, + repeated_sample_indices.begin()); + return repeated_sample_indices; + } else { + // generate n samples. + return sample_dist(engine, num_rows); + } +} + +/** + * @brief Creates a column with random content of type @ref T. + * + * @param profile Parameters for the random generator + * @param engine Pseudo-random engine + * @param num_rows Size of the output column + * + * @tparam T Data type of the output column + * @return Column filled with random data + */ +template +std::unique_ptr create_random_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + // Bernoulli distribution + auto valid_dist = random_value_fn( + distribution_params{1. - profile.get_null_probability().value_or(0)}); + auto value_dist = random_value_fn{profile.get_distribution_params()}; + + using DeviceType = cudf::device_storage_type_t; + cudf::data_type const dtype = [&]() { + if constexpr (cudf::is_fixed_point()) + return cudf::data_type{cudf::type_to_id(), value_dist.get_scale(engine)}; + else + return cudf::data_type{cudf::type_to_id()}; + }(); + + // Distribution for picking elements from the array of samples + auto const avg_run_len = profile.get_avg_run_length(); + rmm::device_uvector data(0, cudf::get_default_stream()); + rmm::device_uvector null_mask(0, cudf::get_default_stream()); + + if (profile.get_cardinality() == 0 and avg_run_len == 1) { + data = value_dist(engine, num_rows); + null_mask = valid_dist(engine, num_rows); + } else { + auto const cardinality = [profile_cardinality = profile.get_cardinality(), num_rows] { + return (profile_cardinality == 0 or profile_cardinality > num_rows) ? num_rows + : profile_cardinality; + }(); + rmm::device_uvector samples_null_mask = valid_dist(engine, cardinality); + rmm::device_uvector samples = value_dist(engine, cardinality); + + // generate n samples and gather. + auto const sample_indices = + sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine); + data = rmm::device_uvector(num_rows, cudf::get_default_stream()); + null_mask = rmm::device_uvector(num_rows, cudf::get_default_stream()); + thrust::gather( + thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin()); + thrust::gather(thrust::device, + sample_indices.begin(), + sample_indices.end(), + samples_null_mask.begin(), + null_mask.begin()); + } + + auto [result_bitmask, null_count] = + cudf::detail::valid_if(null_mask.begin(), + null_mask.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + + return std::make_unique( + dtype, + num_rows, + data.release(), + profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}, + profile.get_null_probability().has_value() ? null_count : 0); +} + +struct valid_or_zero { + template + __device__ T operator()(thrust::tuple len_valid) const + { + return thrust::get<1>(len_valid) ? thrust::get<0>(len_valid) : T{0}; + } +}; + +struct string_generator { + char* chars; + thrust::minstd_rand engine; + thrust::uniform_int_distribution char_dist; + string_generator(char* c, thrust::minstd_rand& engine) + : chars(c), engine(engine), char_dist(32, 137) + // ~90% ASCII, ~10% UTF-8. + // ~80% not-space, ~20% space. + // range 32-127 is ASCII; 127-136 will be multi-byte UTF-8 + { + } + __device__ void operator()(thrust::tuple str_begin_end) + { + auto begin = thrust::get<0>(str_begin_end); + auto end = thrust::get<1>(str_begin_end); + engine.discard(begin); + for (auto i = begin; i < end; ++i) { + auto ch = char_dist(engine); + if (i == end - 1 && ch >= '\x7F') ch = ' '; // last element ASCII only. + if (ch >= '\x7F') // x7F is at the top edge of ASCII + chars[i++] = '\xC4'; // these characters are assigned two bytes + chars[i] = static_cast(ch + (ch >= '\x7F')); + } + } +}; + +/** + * @brief Create a UTF-8 string column with the average length. + * + */ +std::unique_ptr create_random_utf8_string_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + auto len_dist = + random_value_fn{profile.get_distribution_params().length_params}; + auto valid_dist = random_value_fn( + distribution_params{1. - profile.get_null_probability().value_or(0)}); + auto lengths = len_dist(engine, num_rows + 1); + auto null_mask = valid_dist(engine, num_rows + 1); + thrust::transform_if( + thrust::device, + lengths.begin(), + lengths.end(), + null_mask.begin(), + lengths.begin(), + [] __device__(auto) { return 0; }, + thrust::logical_not{}); + auto valid_lengths = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())), + valid_or_zero{}); + rmm::device_uvector offsets(num_rows + 1, cudf::get_default_stream()); + thrust::exclusive_scan( + thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin()); + // offsets are ready. + auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1); + rmm::device_uvector chars(chars_length, cudf::get_default_stream()); + thrust::for_each_n(thrust::device, + thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1), + num_rows, + string_generator{chars.data(), engine}); + auto [result_bitmask, null_count] = + cudf::detail::valid_if(null_mask.begin(), + null_mask.end() - 1, + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + return cudf::make_strings_column( + num_rows, + std::move(offsets), + std::move(chars), + profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}, + null_count); +} + +/** + * @brief Creates a string column with random content. + * + * @param profile Parameters for the random generator + * @param engine Pseudo-random engine + * @param num_rows Size of the output column + * + * @return Column filled with random strings + */ +template <> +std::unique_ptr create_random_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + auto const cardinality = std::min(profile.get_cardinality(), num_rows); + auto const avg_run_len = profile.get_avg_run_length(); + + auto sample_strings = + create_random_utf8_string_column(profile, engine, cardinality == 0 ? num_rows : cardinality); + if (cardinality == 0) { return sample_strings; } + auto sample_indices = sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine); + auto str_table = cudf::detail::gather(cudf::table_view{{sample_strings->view()}}, + sample_indices, + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + return std::move(str_table->release()[0]); +} + +template <> +std::unique_ptr create_random_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + CUDF_FAIL("not implemented yet"); +} + +/** + * @brief Functor to dispatch create_random_column calls. + */ +struct create_rand_col_fn { + public: + template + std::unique_ptr operator()(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) + { + return create_random_column(profile, engine, num_rows); + } +}; + +/** + * @brief Calculates the number of direct parents needed to generate a struct column hierarchy with + * lowest maximum number of children in any nested column. + * + * Used to generate an "evenly distributed" struct column hierarchy with the given number of leaf + * columns and nesting levels. The column tree is considered evenly distributed if all columns have + * nearly the same number of child columns (difference not larger than one). + */ +int num_direct_parents(int num_lvls, int num_leaf_columns) +{ + // Estimated average number of children in the hierarchy; + auto const num_children_avg = std::pow(num_leaf_columns, 1. / num_lvls); + // Minimum number of children columns for any column in the hierarchy + int const num_children_min = std::floor(num_children_avg); + // Maximum number of children columns for any column in the hierarchy + int const num_children_max = num_children_min + 1; + + // Minimum number of columns needed so that their number of children does not exceed the maximum + int const min_for_current_nesting = std::ceil((double)num_leaf_columns / num_children_max); + // Minimum number of columns needed so that columns at the higher levels have at least the minimum + // number of children + int const min_for_upper_nesting = std::pow(num_children_min, num_lvls - 1); + // Both conditions need to be satisfied + return std::max(min_for_current_nesting, min_for_upper_nesting); +} + +template <> +std::unique_ptr create_random_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + auto const dist_params = profile.get_distribution_params(); + + // Generate leaf columns + std::vector> children; + children.reserve(dist_params.leaf_types.size()); + std::transform(dist_params.leaf_types.cbegin(), + dist_params.leaf_types.cend(), + std::back_inserter(children), + [&](auto& type_id) { + return cudf::type_dispatcher( + cudf::data_type(type_id), create_rand_col_fn{}, profile, engine, num_rows); + }); + + auto valid_dist = random_value_fn( + distribution_params{1. - profile.get_null_probability().value_or(0)}); + + // Generate the column bottom-up + for (int lvl = dist_params.max_depth; lvl > 0; --lvl) { + // Generating the next level + std::vector> parents; + parents.resize(num_direct_parents(lvl, children.size())); + + auto current_child = children.begin(); + for (auto current_parent = parents.begin(); current_parent != parents.end(); ++current_parent) { + auto [null_mask, null_count] = [&]() { + if (profile.get_null_probability().has_value()) { + auto valids = valid_dist(engine, num_rows); + return cudf::detail::valid_if(valids.begin(), + valids.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + } + return std::pair{}; + }(); + + // Adopt remaining children as evenly as possible + auto const num_to_adopt = cudf::util::div_rounding_up_unsafe( + std::distance(current_child, children.end()), std::distance(current_parent, parents.end())); + CUDF_EXPECTS(num_to_adopt > 0, "No children columns left to adopt"); + + std::vector> children_to_adopt; + children_to_adopt.insert(children_to_adopt.end(), + std::make_move_iterator(current_child), + std::make_move_iterator(current_child + num_to_adopt)); + current_child += children_to_adopt.size(); + + *current_parent = cudf::make_structs_column( + num_rows, std::move(children_to_adopt), null_count, std::move(null_mask)); + } + + if (lvl == 1) { + CUDF_EXPECTS(parents.size() == 1, "There should be one top-level column"); + return std::move(parents.front()); + } + children = std::move(parents); + } + CUDF_FAIL("Reached unreachable code in struct column creation"); +} + +template +struct clamp_down : public thrust::unary_function { + T max; + clamp_down(T max) : max(max) {} + __host__ __device__ T operator()(T x) const { return min(x, max); } +}; +/** + * @brief Creates a list column with random content. + * + * The data profile determines the list length distribution, number of nested level, and the data + * type of the bottom level. + * + * @param profile Parameters for the random generator + * @param engine Pseudo-random engine + * @param num_rows Size of the output column + * + * @return Column filled with random lists + */ +template <> +std::unique_ptr create_random_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + auto const dist_params = profile.get_distribution_params(); + auto const single_level_mean = get_distribution_mean(dist_params.length_params); + auto const num_elements = num_rows * pow(single_level_mean, dist_params.max_depth); + + auto leaf_column = cudf::type_dispatcher( + cudf::data_type(dist_params.element_type), create_rand_col_fn{}, profile, engine, num_elements); + auto len_dist = + random_value_fn{profile.get_distribution_params().length_params}; + auto valid_dist = random_value_fn( + distribution_params{1. - profile.get_null_probability().value_or(0)}); + + // Generate the list column bottom-up + auto list_column = std::move(leaf_column); + for (int lvl = 0; lvl < dist_params.max_depth; ++lvl) { + // Generating the next level - offsets point into the current list column + auto current_child_column = std::move(list_column); + cudf::size_type const num_rows = current_child_column->size() / single_level_mean; + + auto offsets = len_dist(engine, num_rows + 1); + auto valids = valid_dist(engine, num_rows); + // to ensure these values <= current_child_column->size() + auto output_offsets = thrust::make_transform_output_iterator( + offsets.begin(), clamp_down{current_child_column->size()}); + + thrust::exclusive_scan(thrust::device, offsets.begin(), offsets.end(), output_offsets); + thrust::device_pointer_cast(offsets.end())[-1] = + current_child_column->size(); // Always include all elements + + auto offsets_column = std::make_unique(cudf::data_type{cudf::type_id::INT32}, + num_rows + 1, + offsets.release(), + rmm::device_buffer{}, + 0); + + auto [null_mask, null_count] = cudf::detail::valid_if(valids.begin(), + valids.end(), + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + list_column = cudf::make_lists_column( + num_rows, + std::move(offsets_column), + std::move(current_child_column), + profile.get_null_probability().has_value() ? null_count : 0, + profile.get_null_probability().has_value() ? std::move(null_mask) : rmm::device_buffer{}); + } + return list_column; // return the top-level column +} + +using columns_vector = std::vector>; + +/** + * @brief Creates a vector of columns with random content. + * + * @param profile Parameters for the random generator + * @param dtype_ids vector of data type ids, one for each output column + * @param engine Pseudo-random engine + * @param num_rows Size of the output columns + * + * @return Column filled with random lists + */ +columns_vector create_random_columns(data_profile const& profile, + std::vector dtype_ids, + thrust::minstd_rand engine, + cudf::size_type num_rows) +{ + columns_vector output_columns; + std::transform( + dtype_ids.begin(), dtype_ids.end(), std::back_inserter(output_columns), [&](auto tid) { + engine.discard(num_rows); + return cudf::type_dispatcher( + cudf::data_type(tid), create_rand_col_fn{}, profile, engine, num_rows); + }); + return output_columns; +} + +/** + * @brief Repeats the input data types cyclically order to fill a vector of @ref num_cols + * elements. + */ +std::vector cycle_dtypes(std::vector const& dtype_ids, + cudf::size_type num_cols) +{ + if (dtype_ids.size() == static_cast(num_cols)) { return dtype_ids; } + std::vector out_dtypes; + out_dtypes.reserve(num_cols); + for (cudf::size_type col = 0; col < num_cols; ++col) + out_dtypes.push_back(dtype_ids[col % dtype_ids.size()]); + return out_dtypes; +} + +/** + * @brief Repeat the given two data types with a given ratio of a:b. + * + * The first dtype will have 'first_num' columns and the second will have 'num_cols - first_num' + * columns. + */ +std::vector mix_dtypes(std::pair const& dtype_ids, + cudf::size_type num_cols, + int first_num) +{ + std::vector out_dtypes; + out_dtypes.reserve(num_cols); + for (cudf::size_type col = 0; col < first_num; ++col) + out_dtypes.push_back(dtype_ids.first); + for (cudf::size_type col = first_num; col < num_cols; ++col) + out_dtypes.push_back(dtype_ids.second); + return out_dtypes; +} + +std::unique_ptr create_random_table(std::vector const& dtype_ids, + table_size_bytes table_bytes, + data_profile const& profile, + unsigned seed) +{ + size_t const avg_row_bytes = + std::accumulate(dtype_ids.begin(), dtype_ids.end(), 0ul, [&](size_t sum, auto tid) { + return sum + avg_element_size(profile, cudf::data_type(tid)); + }); + cudf::size_type const num_rows = table_bytes.size / avg_row_bytes; + + return create_random_table(dtype_ids, row_count{num_rows}, profile, seed); +} + +std::unique_ptr create_random_table(std::vector const& dtype_ids, + row_count num_rows, + data_profile const& profile, + unsigned seed) +{ + auto seed_engine = deterministic_engine(seed); + thrust::uniform_int_distribution seed_dist; + + columns_vector output_columns; + std::transform( + dtype_ids.begin(), dtype_ids.end(), std::back_inserter(output_columns), [&](auto tid) mutable { + return create_random_column(tid, num_rows, profile, seed_dist(seed_engine)); + }); + return std::make_unique(std::move(output_columns)); +} + +std::unique_ptr create_random_column(cudf::type_id dtype_id, + row_count num_rows, + data_profile const& profile, + unsigned seed) +{ + auto engine = deterministic_engine(seed); + return cudf::type_dispatcher( + cudf::data_type(dtype_id), create_rand_col_fn{}, profile, engine, num_rows.count); +} + +std::unique_ptr create_sequence_table(std::vector const& dtype_ids, + row_count num_rows, + std::optional null_probability, + unsigned seed) +{ + auto seed_engine = deterministic_engine(seed); + thrust::uniform_int_distribution seed_dist; + + auto columns = std::vector>(dtype_ids.size()); + std::transform(dtype_ids.begin(), dtype_ids.end(), columns.begin(), [&](auto dtype) mutable { + auto init = cudf::make_default_constructed_scalar(cudf::data_type{dtype}); + auto col = cudf::sequence(num_rows.count, *init); + auto [mask, count] = + create_random_null_mask(num_rows.count, null_probability, seed_dist(seed_engine)); + col->set_null_mask(std::move(mask), count); + return col; + }); + return std::make_unique(std::move(columns)); +} + +std::pair create_random_null_mask( + cudf::size_type size, std::optional null_probability, unsigned seed) +{ + if (not null_probability.has_value()) { return {rmm::device_buffer{}, 0}; } + CUDF_EXPECTS(*null_probability >= 0.0 and *null_probability <= 1.0, + "Null probability must be within the range [0.0, 1.0]"); + if (*null_probability == 0.0f) { + return {cudf::create_null_mask(size, cudf::mask_state::ALL_VALID), 0}; + } else if (*null_probability == 1.0) { + return {cudf::create_null_mask(size, cudf::mask_state::ALL_NULL), size}; + } else { + return cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + bool_generator{seed, 1.0 - *null_probability}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + } +} + +std::vector get_type_or_group(int32_t id) +{ + // identity transformation when passing a concrete type_id + if (id < static_cast(cudf::type_id::NUM_TYPE_IDS)) + return {static_cast(id)}; + + // if the value is larger that type_id::NUM_TYPE_IDS, it's a group id + type_group_id const group_id = static_cast(id); + + using trait_fn = bool (*)(cudf::data_type); + trait_fn is_integral = [](cudf::data_type type) { + return cudf::is_numeric(type) && !cudf::is_floating_point(type); + }; + trait_fn is_integral_signed = [](cudf::data_type type) { + return cudf::is_numeric(type) && !cudf::is_floating_point(type) && !cudf::is_unsigned(type); + }; + auto fn = [&]() -> trait_fn { + switch (group_id) { + case type_group_id::FLOATING_POINT: return cudf::is_floating_point; + case type_group_id::INTEGRAL: return is_integral; + case type_group_id::INTEGRAL_SIGNED: return is_integral_signed; + case type_group_id::NUMERIC: return cudf::is_numeric; + case type_group_id::TIMESTAMP: return cudf::is_timestamp; + case type_group_id::DURATION: return cudf::is_duration; + case type_group_id::FIXED_POINT: return cudf::is_fixed_point; + case type_group_id::COMPOUND: return cudf::is_compound; + case type_group_id::NESTED: return cudf::is_nested; + default: CUDF_FAIL("Invalid data type group"); + } + }(); + std::vector types; + for (int type_int = 0; type_int < static_cast(cudf::type_id::NUM_TYPE_IDS); ++type_int) { + auto const type = static_cast(type_int); + if (type != cudf::type_id::EMPTY && fn(cudf::data_type(type))) types.push_back(type); + } + return types; +} + +std::vector get_type_or_group(std::vector const& ids) +{ + std::vector all_type_ids; + for (auto& id : ids) { + auto const type_ids = get_type_or_group(id); + all_type_ids.insert(std::end(all_type_ids), std::cbegin(type_ids), std::cend(type_ids)); + } + return all_type_ids; +} diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp new file mode 100644 index 0000000..a2efdb8 --- /dev/null +++ b/cpp/benchmarks/common/generate_input.hpp @@ -0,0 +1,694 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +/** + * @file generate_input.hpp + * @brief Contains declarations of functions that generate columns filled with random data. + * + * Also includes the data profile descriptor classes. + * + * The create_random_table functions take a data profile, the information about table size and a + * seed to deterministically generate a table with given parameters. + * + * Currently, the data generation is done on the CPU and the data is then copied to the device + * memory. + */ + +/** + * @brief Identifies a group of related column's logical element types. + */ +enum class type_group_id : int32_t { + INTEGRAL = static_cast(cudf::type_id::NUM_TYPE_IDS), + INTEGRAL_SIGNED, + FLOATING_POINT, + NUMERIC, + TIMESTAMP, + DURATION, + FIXED_POINT, + COMPOUND, + NESTED, +}; + +/** + * @brief Identifies a probability distribution type. + */ +enum class distribution_id : int8_t { + UNIFORM, ///< Uniform sampling between the given bounds. Provides the best coverage of the + ///< overall value range. Real data rarely has this distribution. + NORMAL, ///< Gaussian sampling - most samples are close to the middle of the range. Good for + ///< simulating real-world numeric data. + GEOMETRIC, ///< Geometric sampling - highest chance to sample close to the lower bound. Good for + ///< simulating real data with asymmetric distribution (unsigned values, timestamps). +}; + +// Default distribution types for each type +namespace { +template ()>* = nullptr> +distribution_id default_distribution_id() +{ + return distribution_id::GEOMETRIC; +} + +template && cudf::is_numeric()>* = nullptr> +distribution_id default_distribution_id() +{ + return distribution_id::NORMAL; +} + +template && std::is_unsigned_v && + cudf::is_numeric()>* = nullptr> +distribution_id default_distribution_id() +{ + return distribution_id::GEOMETRIC; +} + +/** + * @brief Default range for the timestamp types: 1970 - 2020. + * + * The 2020 timestamp is used as a lower bound to bias the geometric distribution to recent + * timestamps. + */ +template ()>* = nullptr> +std::pair default_range() +{ + using cuda::std::chrono::duration_cast; + auto const year = duration_cast(cudf::duration_D{365l}); + return {50 * year.count(), 0}; +} + +/** + * @brief Default range for the duration types. + * + * If a geometric distribution is used, it will bias towards short duration values. + */ +template ()>* = nullptr> +std::pair default_range() +{ + using cuda::std::chrono::duration_cast; + auto const year = duration_cast(cudf::duration_D{365l}); + return {0, 2 * year.count()}; +} + +template ()>* = nullptr> +std::pair default_range() +{ + // Limits need to be such that `upper - lower` does not overflow + return {std::numeric_limits::lowest() / 2, std::numeric_limits::max() / 2}; +} +} // namespace + +/** + * @brief Enables partial specializations with SFINAE. + */ +template +struct distribution_params; + +/** + * @brief Numeric values are parameterized with a distribution type and bounds of the same type. + */ +template +struct distribution_params && cudf::is_numeric()>> { + distribution_id id; + T lower_bound; + T upper_bound; +}; + +/** + * @brief Booleans are parameterized with the probability of getting `true` value. + */ +template +struct distribution_params>> { + double probability_true; +}; + +/** + * @brief Timestamps and durations are parameterized with a distribution type and int64_t bounds. + */ +template +struct distribution_params()>> { + distribution_id id; + int64_t lower_bound; + int64_t upper_bound; +}; + +/** + * @brief Strings are parameterized by the distribution of their length, as an integral value. + */ +template +struct distribution_params>> { + distribution_params length_params; +}; + +/** + * @brief Lists are parameterized by the distribution of their length, maximal nesting level, and + * the element type. + */ +template +struct distribution_params>> { + cudf::type_id element_type; + distribution_params length_params; + cudf::size_type max_depth; +}; + +/** + * @brief Structs are parameterized by the maximal nesting level, and the leaf column types. + */ +template +struct distribution_params>> { + std::vector leaf_types; + cudf::size_type max_depth; +}; + +// Present for compilation only. To be implemented once reader/writers support the fixed width type. +template +struct distribution_params()>> {}; + +/** + * @brief Returns a vector of types, corresponding to the input type or a type group. + * + * If the input is a `cudf::type_id` enumerator, function simply returns a vector containing this + * type. If the input value corresponds to a `type_group_id` enumerator, function returns a vector + * containing all types in the input group. + * + * @param id Integer equal to either a `cudf::type_id` enumerator or a `type_group_id` enumerator. + */ +std::vector get_type_or_group(int32_t id); + +/** + * @brief Returns a vector of types, corresponding to the input types or type groups. + * + * If an element of the input vector is a `cudf::type_id` enumerator, function return value simply + * includes this type. If an element of the input vector is a `type_group_id` enumerator, function + * return value includes all types corresponding to the group enumerator. + * + * @param ids Vector of integers equal to either a `cudf::type_id` enumerator or a `type_group_id` + * enumerator. + */ +std::vector get_type_or_group(std::vector const& ids); + +/** + * @brief Contains data parameters for all types. + * + * This class exposes APIs to set and get distribution parameters for each supported type. + * Parameters can be set for multiple types with a single call by passing a `type_group_id` instead + * of `cudf::type_id`. + * + * All types have default parameters so it's not necessary to set the parameters before using them. + */ +class data_profile { + std::map> int_params; + std::map> float_params; + distribution_params string_dist_desc{{distribution_id::NORMAL, 0, 32}}; + distribution_params list_dist_desc{ + cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 100}, 2}; + distribution_params struct_dist_desc{ + {cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2}; + std::map> decimal_params; + + double bool_probability_true = 0.5; + std::optional null_probability = 0.01; + cudf::size_type cardinality = 2000; + cudf::size_type avg_run_length = 4; + + public: + template && cuda::std::is_integral_v, T>* = nullptr> + distribution_params get_distribution_params() const + { + auto it = int_params.find(cudf::type_to_id()); + if (it == int_params.end()) { + auto const range = default_range(); + return distribution_params{default_distribution_id(), range.first, range.second}; + } else { + auto& desc = it->second; + return {desc.id, static_cast(desc.lower_bound), static_cast(desc.upper_bound)}; + } + } + + template , T>* = nullptr> + distribution_params get_distribution_params() const + { + auto it = float_params.find(cudf::type_to_id()); + if (it == float_params.end()) { + auto const range = default_range(); + return distribution_params{default_distribution_id(), range.first, range.second}; + } else { + auto& desc = it->second; + return {desc.id, static_cast(desc.lower_bound), static_cast(desc.upper_bound)}; + } + } + + template >* = nullptr> + distribution_params get_distribution_params() const + { + return distribution_params{bool_probability_true}; + } + + template ()>* = nullptr> + distribution_params get_distribution_params() const + { + auto it = int_params.find(cudf::type_to_id()); + if (it == int_params.end()) { + auto const range = default_range(); + return distribution_params{default_distribution_id(), range.first, range.second}; + } else { + auto& desc = it->second; + return { + desc.id, static_cast(desc.lower_bound), static_cast(desc.upper_bound)}; + } + } + + template >* = nullptr> + distribution_params get_distribution_params() const + { + return string_dist_desc; + } + + template >* = nullptr> + distribution_params get_distribution_params() const + { + return list_dist_desc; + } + + template >* = nullptr> + distribution_params get_distribution_params() const + { + return struct_dist_desc; + } + + template ()>* = nullptr> + distribution_params get_distribution_params() const + { + using rep = typename T::rep; + auto it = decimal_params.find(cudf::type_to_id()); + if (it == decimal_params.end()) { + auto const range = default_range(); + return distribution_params{default_distribution_id(), range.first, range.second}; + } else { + auto& desc = it->second; + return {desc.id, static_cast(desc.lower_bound), static_cast(desc.upper_bound)}; + } + } + + auto get_bool_probability_true() const { return bool_probability_true; } + auto get_null_probability() const { return null_probability; }; + [[nodiscard]] auto get_cardinality() const { return cardinality; }; + [[nodiscard]] auto get_avg_run_length() const { return avg_run_length; }; + + // Users should pass integral values for bounds when setting the parameters for types that have + // discrete distributions (integers, strings, lists). Otherwise the call with have no effect. + template , T>* = nullptr> + void set_distribution_params(Type_enum type_or_group, + distribution_id dist, + T lower_bound, + T upper_bound) + { + for (auto tid : get_type_or_group(static_cast(type_or_group))) { + if (tid == cudf::type_id::STRING) { + string_dist_desc.length_params = { + dist, static_cast(lower_bound), static_cast(upper_bound)}; + } else if (tid == cudf::type_id::LIST) { + list_dist_desc.length_params = { + dist, static_cast(lower_bound), static_cast(upper_bound)}; + } else { + int_params[tid] = { + dist, static_cast(lower_bound), static_cast(upper_bound)}; + } + } + } + + // Users should pass floating point values for bounds when setting the parameters for types that + // have continuous distributions (floating point types). Otherwise the call with have no effect. + template , T>* = nullptr> + void set_distribution_params(Type_enum type_or_group, + distribution_id dist, + T lower_bound, + T upper_bound) + { + for (auto tid : get_type_or_group(static_cast(type_or_group))) { + float_params[tid] = { + dist, static_cast(lower_bound), static_cast(upper_bound)}; + } + } + + template (), T>* = nullptr> + void set_distribution_params(Type_enum type_or_group, + distribution_id dist, + typename T::rep lower_bound, + typename T::rep upper_bound) + { + for (auto tid : get_type_or_group(static_cast(type_or_group))) { + int_params[tid] = { + dist, static_cast(lower_bound), static_cast(upper_bound)}; + } + } + + void set_bool_probability_true(double p) + { + CUDF_EXPECTS(p >= 0. and p <= 1., "probability must be in range [0...1]"); + bool_probability_true = p; + } + void set_null_probability(std::optional p) + { + CUDF_EXPECTS(p.value_or(0.) >= 0. and p.value_or(0.) <= 1., + "probability must be in range [0...1]"); + null_probability = p; + } + void set_cardinality(cudf::size_type c) { cardinality = c; } + void set_avg_run_length(cudf::size_type avg_rl) { avg_run_length = avg_rl; } + + void set_list_depth(cudf::size_type max_depth) + { + CUDF_EXPECTS(max_depth > 0, "List depth must be positive"); + list_dist_desc.max_depth = max_depth; + } + + void set_list_type(cudf::type_id type) { list_dist_desc.element_type = type; } + + void set_struct_depth(cudf::size_type max_depth) + { + CUDF_EXPECTS(max_depth > 0, "Struct depth must be positive"); + struct_dist_desc.max_depth = max_depth; + } + + void set_struct_types(cudf::host_span types) + { + CUDF_EXPECTS( + std::none_of( + types.begin(), types.end(), [](auto& type) { return type == cudf::type_id::STRUCT; }), + "Cannot include STRUCT as its own subtype"); + struct_dist_desc.leaf_types.assign(types.begin(), types.end()); + } +}; + +/** + * @brief Builder to construct data profiles for the random data generator. + * + * Setters can be chained to set multiple properties in a single expression. + * For example, `data_profile` initialization + * @code{.pseudo} + * data_profile profile; + * profile.set_null_probability(0.0); + * profile.set_cardinality(0); + * profile.set_distribution_params(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 100); + * @endcode + * becomes + * @code{.pseudo} + * data_profile const profile = + * data_profile_builder().cardinality(0).null_probability(0.0).distribution( + * cudf::type_id::INT32, distribution_id::UNIFORM, 0, 100); + * @endcode + * The builder makes it easier to have immutable `data_profile` objects even with the complex + * initialization. The `profile` object in the example above is initialized from + * `data_profile_builder` using an implicit conversion operator. + * + * The builder API also includes a few additional convenience setters: + * Overload of `distribution` that only takes the distribution type (not the range). + * `no_validity`, which is a simpler equivalent of `null_probability(std::nullopr)`. + */ +class data_profile_builder { + data_profile profile; + + public: + /** + * @brief Sets random distribution type for a given set of data types. + * + * Only the distribution type is set; the distribution will use the default range. + * + * @param type_or_group Type or group ID, depending on whether the new distribution + * applies to a single type or a subset of types + * @param dist Random distribution type + * @tparam T Data type of the distribution range; does not need to match the data type + * @return this for chaining + */ + template + data_profile_builder& distribution(Type_enum type_or_group, distribution_id dist) + { + auto const range = default_range(); + profile.set_distribution_params(type_or_group, dist, range.first, range.second); + return *this; + } + + /** + * @brief Sets random distribution type and value range for a given set of data types. + * + * @tparam T Parameters that are forwarded to set_distribution_params + * @return this for chaining + */ + template + data_profile_builder& distribution(T&&... t) + { + profile.set_distribution_params(std::forward(t)...); + return *this; + } + + /** + * @brief Sets the probability that a randomly generated boolean element with be `true`. + * + * For example, passing `0.9` means that 90% of values in boolean columns with be `true`. + * + * @param p Probability of `true` values, in range [0..1] + * @return this for chaining + */ + data_profile_builder& bool_probability_true(double p) + { + profile.set_bool_probability_true(p); + return *this; + } + + /** + * @brief Sets the probability that a randomly generated element will be `null`. + * + * @param p Probability of `null` values, in range [0..1] + * @return this for chaining + */ + data_profile_builder& null_probability(std::optional p) + { + profile.set_null_probability(p); + return *this; + } + + /** + * @brief Disables the creation of null mask in the output columns. + * + * @return this for chaining + */ + data_profile_builder& no_validity() + { + profile.set_null_probability(std::nullopt); + return *this; + } + + /** + * @brief Sets the maximum number of unique values in each output column. + * + * @param c Maximum number of unique values + * @return this for chaining + */ + data_profile_builder& cardinality(cudf::size_type c) + { + profile.set_cardinality(c); + return *this; + } + + /** + * @brief Sets the average length of sequences of equal elements in output columns. + * + * @param avg_rl Average sequence length (run-length) + * @return this for chaining + */ + data_profile_builder& avg_run_length(cudf::size_type avg_rl) + { + profile.set_avg_run_length(avg_rl); + return *this; + } + + /** + * @brief Sets the maximum nesting depth of generated list columns. + * + * @param max_depth maximum nesting depth + * @return this for chaining + */ + data_profile_builder& list_depth(cudf::size_type max_depth) + { + profile.set_list_depth(max_depth); + return *this; + } + + /** + * @brief Sets the data type of list elements. + * + * @param type data type ID + * @return this for chaining + */ + data_profile_builder& list_type(cudf::type_id type) + { + profile.set_list_type(type); + return *this; + } + + /** + * @brief Sets the maximum nesting depth of generated struct columns. + * + * @param max_depth maximum nesting depth + * @return this for chaining + */ + data_profile_builder& struct_depth(cudf::size_type max_depth) + { + profile.set_struct_depth(max_depth); + return *this; + } + + /** + * @brief Sets the data types of struct fields. + * + * @param types data type IDs + * @return this for chaining + */ + data_profile_builder& struct_types(cudf::host_span types) + { + profile.set_struct_types(types); + return *this; + } + + /** + * @brief move data_profile member once it's built. + */ + operator data_profile&&() { return std::move(profile); } +}; + +/** + * @brief Strongly typed table size in bytes. Used to disambiguate overloads of + * `create_random_table`. + */ +struct table_size_bytes { + size_t size; +}; + +/** + * @brief Strongly typed row count. Used to disambiguate overloads of `create_random_table`. + */ +struct row_count { + cudf::size_type count; +}; + +/** + * @brief Deterministically generates a table filled with data with the given parameters. + * + * @param dtype_ids Vector of requested column types + * @param table_bytes Target size of the output table, in bytes. Some type may not produce columns + * of exact size + * @param data_params Optional, set of data parameters describing the data profile for each type + * @param seed Optional, seed for the pseudo-random engine + */ +std::unique_ptr create_random_table(std::vector const& dtype_ids, + table_size_bytes table_bytes, + data_profile const& data_params = data_profile{}, + unsigned seed = 1); + +/** + * @brief Deterministically generates a table filled with data with the given parameters. + * + * @param dtype_ids Vector of requested column types + * @param num_rows Number of rows in the output table + * @param data_params Optional, set of data parameters describing the data profile for each type + * @param seed Optional, seed for the pseudo-random engine + */ +std::unique_ptr create_random_table(std::vector const& dtype_ids, + row_count num_rows, + data_profile const& data_params = data_profile{}, + unsigned seed = 1); + +/** + * @brief Deterministically generates a column filled with data with the given parameters. + * + * @param dtype_id Requested column type + * @param num_rows Number of rows in the output column + * @param data_params Optional, set of data parameters describing the data profile + * @param seed Optional, seed for the pseudo-random engine + */ +std::unique_ptr create_random_column(cudf::type_id dtype_id, + row_count num_rows, + data_profile const& data_params = data_profile{}, + unsigned seed = 1); + +/** + * @brief Generate sequence columns starting with value 0 in first row and increasing by 1 in + * subsequent rows. + * + * @param dtype_ids Vector of requested column types + * @param num_rows Number of rows in the output table + * @param null_probability Optional, probability of a null value + * no value implies no null mask, =0 implies all valids, >=1 implies all nulls + * @param seed Optional, seed for the pseudo-random engine + * @return A table with the sequence columns. + */ +std::unique_ptr create_sequence_table( + std::vector const& dtype_ids, + row_count num_rows, + std::optional null_probability = std::nullopt, + unsigned seed = 1); + +/** + * @brief Repeats the input data types cyclically to fill a vector of @ref num_cols + * elements. + * + * @param dtype_ids Vector of requested column types + * @param num_cols Number of types in the output vector + * @return A vector of type_ids + */ +std::vector cycle_dtypes(std::vector const& dtype_ids, + cudf::size_type num_cols); + +/** + * @brief Repeat the given two data types with a given ratio of a:b. + * + * The first dtype will have 'first_num' columns and the second will have 'num_cols - first_num' + * columns. + * + * @param dtype_ids Pair of requested column types + * @param num_cols Total number of columns in the output vector + * @param first_num Total number of columns of type `dtype_ids.first` + * @return A vector of type_ids + */ +std::vector mix_dtypes(std::pair const& dtype_ids, + cudf::size_type num_cols, + int first_num); +/** + * @brief Create a random null mask object + * + * @param size number of rows + * @param null_probability probability of a null value + * no value implies no null mask, =0 implies all valids, >=1 implies all nulls + * @param seed Optional, seed for the pseudo-random engine + * @return null mask device buffer with random null mask data and null count + */ +std::pair create_random_null_mask( + cudf::size_type size, std::optional null_probability = std::nullopt, unsigned seed = 1); diff --git a/cpp/benchmarks/common/random_distribution_factory.cuh b/cpp/benchmarks/common/random_distribution_factory.cuh new file mode 100644 index 0000000..a548e4c --- /dev/null +++ b/cpp/benchmarks/common/random_distribution_factory.cuh @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "generate_input.hpp" + +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +/** + * @brief Real Type that has at least number of bits of integral type in its mantissa. + * number of bits of integrals < 23 bits of mantissa in float + * to allow full range of integer bits to be generated. + * @tparam T integral type + */ +template +using integral_to_realType = + std::conditional_t, + T, + std::conditional_t>; + +/** + * @brief Generates a normal distribution between zero and upper_bound. + */ +template +auto make_normal_dist(T lower_bound, T upper_bound) +{ + using realT = integral_to_realType; + T const mean = lower_bound + (upper_bound - lower_bound) / 2; + T const stddev = (upper_bound - lower_bound) / 6; + return thrust::random::normal_distribution(mean, stddev); +} + +template , T>* = nullptr> +auto make_uniform_dist(T range_start, T range_end) +{ + return thrust::uniform_int_distribution(range_start, range_end); +} + +template ()>* = nullptr> +auto make_uniform_dist(T range_start, T range_end) +{ + return thrust::uniform_real_distribution(range_start, range_end); +} + +template +double geometric_dist_p(T range_size) +{ + constexpr double percentage_in_range = 0.99; + double const p = 1 - exp(log(1 - percentage_in_range) / range_size); + return p ? p : std::numeric_limits::epsilon(); +} + +/** + * @brief Generates a geometric distribution between lower_bound and upper_bound. + * This distribution is an approximation generated using normal distribution. + * + * @tparam T Result type of the number to produce. + */ +template +class geometric_distribution : public thrust::random::normal_distribution> { + using realType = integral_to_realType; + using super_t = thrust::random::normal_distribution; + T _lower_bound; + T _upper_bound; + + public: + using result_type = T; + __host__ __device__ explicit geometric_distribution(T lower_bound, T upper_bound) + : super_t(0, std::labs(upper_bound - lower_bound) / 4.0), + _lower_bound(lower_bound), + _upper_bound(upper_bound) + { + } + + template + __host__ __device__ result_type operator()(UniformRandomNumberGenerator& urng) + { + return _lower_bound < _upper_bound ? std::abs(super_t::operator()(urng)) + _lower_bound + : _lower_bound - std::abs(super_t::operator()(urng)); + } +}; + +template +struct value_generator { + using result_type = T; + + value_generator(T lower_bound, T upper_bound, thrust::minstd_rand& engine, Generator gen) + : lower_bound(std::min(lower_bound, upper_bound)), + upper_bound(std::max(lower_bound, upper_bound)), + engine(engine), + dist(gen) + { + } + + __device__ T operator()(size_t n) + { + engine.discard(n); + if constexpr (cuda::std::is_integral_v && + cuda::std::is_floating_point_v) { + return std::clamp(static_cast(std::round(dist(engine))), lower_bound, upper_bound); + } else { + return std::clamp(dist(engine), lower_bound, upper_bound); + } + // Note: uniform does not need clamp, because already range is guaranteed to be within bounds. + } + + T lower_bound; + T upper_bound; + thrust::minstd_rand engine; + Generator dist; +}; + +template +using distribution_fn = std::function(thrust::minstd_rand&, size_t)>; + +template < + typename T, + std::enable_if_t or cuda::std::is_floating_point_v, T>* = nullptr> +distribution_fn make_distribution(distribution_id dist_id, T lower_bound, T upper_bound) +{ + switch (dist_id) { + case distribution_id::NORMAL: + return [lower_bound, upper_bound, dist = make_normal_dist(lower_bound, upper_bound)]( + thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { + rmm::device_uvector result(size, cudf::get_default_stream()); + thrust::tabulate(thrust::device, + result.begin(), + result.end(), + value_generator{lower_bound, upper_bound, engine, dist}); + return result; + }; + case distribution_id::UNIFORM: + return [lower_bound, upper_bound, dist = make_uniform_dist(lower_bound, upper_bound)]( + thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { + rmm::device_uvector result(size, cudf::get_default_stream()); + thrust::tabulate(thrust::device, + result.begin(), + result.end(), + value_generator{lower_bound, upper_bound, engine, dist}); + return result; + }; + case distribution_id::GEOMETRIC: + // kind of exponential distribution from lower_bound to upper_bound. + return [lower_bound, upper_bound, dist = geometric_distribution(lower_bound, upper_bound)]( + thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector { + rmm::device_uvector result(size, cudf::get_default_stream()); + thrust::tabulate(thrust::device, + result.begin(), + result.end(), + value_generator{lower_bound, upper_bound, engine, dist}); + return result; + }; + default: CUDF_FAIL("Unsupported probability distribution"); + } +} diff --git a/cpp/benchmarks/copying/contiguous_split.cu b/cpp/benchmarks/copying/contiguous_split.cu new file mode 100644 index 0000000..910fc68 --- /dev/null +++ b/cpp/benchmarks/copying/contiguous_split.cu @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include + +#include + +void contiguous_split(cudf::table_view const& src_table, std::vector const& splits) +{ + auto result = cudf::contiguous_split(src_table, splits); +} + +void chunked_pack(cudf::table_view const& src_table, std::vector const&) +{ + auto const mr = rmm::mr::get_current_device_resource(); + auto const stream = cudf::get_default_stream(); + auto user_buffer = rmm::device_uvector(100L * 1024 * 1024, stream, mr); + auto chunked_pack = cudf::chunked_pack::create(src_table, user_buffer.size(), mr); + while (chunked_pack->has_next()) { + auto iter_size = chunked_pack->next(user_buffer); + } + stream.synchronize(); +} + +template +void BM_contiguous_split_common(benchmark::State& state, + std::vector& src_cols, + int64_t num_rows, + int64_t num_splits, + int64_t bytes_total, + ContigSplitImpl& impl) +{ + // generate splits + std::vector splits; + if (num_splits > 0) { + cudf::size_type const split_stride = num_rows / num_splits; + // start after the first element. + auto iter = thrust::make_counting_iterator(1); + splits.reserve(num_splits); + std::transform(iter, + iter + num_splits, + std::back_inserter(splits), + [split_stride, num_rows](cudf::size_type i) { + return std::min(i * split_stride, static_cast(num_rows)); + }); + } + + for (auto const& col : src_cols) + // computing the null count is not a part of the benchmark's target code path, and we want the + // property to be pre-computed so that we measure the performance of only the intended code path + [[maybe_unused]] + auto const nulls = col->null_count(); + + auto const src_table = cudf::table(std::move(src_cols)); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + impl(src_table, splits); + } + + // it's 2x bytes_total because we're both reading and writing. + state.SetBytesProcessed(static_cast(state.iterations()) * bytes_total * 2); +} + +class ContiguousSplit : public cudf::benchmark {}; +class ChunkedPack : public cudf::benchmark {}; + +template +void BM_contiguous_split(benchmark::State& state, ContiguousSplitImpl& impl) +{ + int64_t const total_desired_bytes = state.range(0); + cudf::size_type const num_cols = state.range(1); + cudf::size_type const num_splits = state.range(2); + bool const include_validity = state.range(3) != 0; + + cudf::size_type el_size = 4; // ints and floats + int64_t const num_rows = total_desired_bytes / (num_cols * el_size); + + // generate input table + auto builder = data_profile_builder().cardinality(0).distribution(cudf::type_id::INT32, + distribution_id::UNIFORM); + if (not include_validity) builder.no_validity(); + + auto src_cols = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols), + row_count{static_cast(num_rows)}, + data_profile{builder}) + ->release(); + + int64_t const total_bytes = + total_desired_bytes + + (include_validity ? (max(int64_t{1}, (num_rows / 32)) * sizeof(cudf::bitmask_type) * num_cols) + : 0); + + BM_contiguous_split_common(state, src_cols, num_rows, num_splits, total_bytes, impl); +} + +class ContiguousSplitStrings : public cudf::benchmark {}; +class ChunkedPackStrings : public cudf::benchmark {}; + +template +void BM_contiguous_split_strings(benchmark::State& state, ContiguousSplitImpl& impl) +{ + int64_t const total_desired_bytes = state.range(0); + cudf::size_type const num_cols = state.range(1); + cudf::size_type const num_splits = state.range(2); + bool const include_validity = state.range(3) != 0; + + constexpr int64_t string_len = 8; + std::vector h_strings{ + "aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd", "eeeeeeee", "ffffffff", "gggggggg", "hhhhhhhh"}; + + int64_t const col_len_bytes = total_desired_bytes / num_cols; + int64_t const num_rows = col_len_bytes / string_len; + + // generate input table + data_profile profile = data_profile_builder().no_validity().cardinality(0).distribution( + cudf::type_id::INT32, + distribution_id::UNIFORM, + 0ul, + include_validity ? h_strings.size() * 2 : h_strings.size() - 1); // out of bounds nullified + cudf::test::strings_column_wrapper one_col(h_strings.begin(), h_strings.end()); + std::vector> src_cols(num_cols); + for (int64_t idx = 0; idx < num_cols; idx++) { + auto random_indices = create_random_column( + cudf::type_id::INT32, row_count{static_cast(num_rows)}, profile); + auto str_table = cudf::gather(cudf::table_view{{one_col}}, + *random_indices, + (include_validity ? cudf::out_of_bounds_policy::NULLIFY + : cudf::out_of_bounds_policy::DONT_CHECK)); + src_cols[idx] = std::move(str_table->release()[0]); + } + + int64_t const total_bytes = + total_desired_bytes + ((num_rows + 1) * sizeof(cudf::size_type)) + + (include_validity ? (max(int64_t{1}, (num_rows / 32)) * sizeof(cudf::bitmask_type) * num_cols) + : 0); + + BM_contiguous_split_common(state, src_cols, num_rows, num_splits, total_bytes, impl); +} + +#define CSBM_BENCHMARK_DEFINE(name, size, num_columns, num_splits, validity) \ + BENCHMARK_DEFINE_F(ContiguousSplit, name)(::benchmark::State & state) \ + { \ + BM_contiguous_split(state, contiguous_split); \ + } \ + BENCHMARK_REGISTER_F(ContiguousSplit, name) \ + ->Args({size, num_columns, num_splits, validity}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime() \ + ->Iterations(8) +CSBM_BENCHMARK_DEFINE(6Gb512ColsNoValidity, (int64_t)6 * 1024 * 1024 * 1024, 512, 256, 0); +CSBM_BENCHMARK_DEFINE(6Gb512ColsValidity, (int64_t)6 * 1024 * 1024 * 1024, 512, 256, 1); +CSBM_BENCHMARK_DEFINE(6Gb10ColsNoValidity, (int64_t)6 * 1024 * 1024 * 1024, 10, 256, 0); +CSBM_BENCHMARK_DEFINE(6Gb10ColsValidity, (int64_t)6 * 1024 * 1024 * 1024, 10, 256, 1); + +CSBM_BENCHMARK_DEFINE(4Gb512ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 0); +CSBM_BENCHMARK_DEFINE(4Gb512ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 1); +CSBM_BENCHMARK_DEFINE(4Gb10ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 0); +CSBM_BENCHMARK_DEFINE(4Gb10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 1); +CSBM_BENCHMARK_DEFINE(4Gb4ColsNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1); +CSBM_BENCHMARK_DEFINE(4Gb4ColsValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1); + +CSBM_BENCHMARK_DEFINE(1Gb512ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 0); +CSBM_BENCHMARK_DEFINE(1Gb512ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 1); +CSBM_BENCHMARK_DEFINE(1Gb10ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 0); +CSBM_BENCHMARK_DEFINE(1Gb10ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 1); +CSBM_BENCHMARK_DEFINE(1Gb1ColNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1); +CSBM_BENCHMARK_DEFINE(1Gb1ColValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1); + +#define CSBM_STRINGS_BENCHMARK_DEFINE(name, size, num_columns, num_splits, validity) \ + BENCHMARK_DEFINE_F(ContiguousSplitStrings, name)(::benchmark::State & state) \ + { \ + BM_contiguous_split_strings(state, contiguous_split); \ + } \ + BENCHMARK_REGISTER_F(ContiguousSplitStrings, name) \ + ->Args({size, num_columns, num_splits, validity}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime() \ + ->Iterations(8) + +CSBM_STRINGS_BENCHMARK_DEFINE(4Gb512ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 0); +CSBM_STRINGS_BENCHMARK_DEFINE(4Gb512ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 256, 1); +CSBM_STRINGS_BENCHMARK_DEFINE(4Gb10ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 0); +CSBM_STRINGS_BENCHMARK_DEFINE(4Gb10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 256, 1); +CSBM_STRINGS_BENCHMARK_DEFINE(4Gb4ColsNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 0); +CSBM_STRINGS_BENCHMARK_DEFINE(4Gb4ColsValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1); + +CSBM_STRINGS_BENCHMARK_DEFINE(1Gb512ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 0); +CSBM_STRINGS_BENCHMARK_DEFINE(1Gb512ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 256, 1); +CSBM_STRINGS_BENCHMARK_DEFINE(1Gb10ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 0); +CSBM_STRINGS_BENCHMARK_DEFINE(1Gb10ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 256, 1); +CSBM_STRINGS_BENCHMARK_DEFINE(1Gb1ColNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 0); +CSBM_STRINGS_BENCHMARK_DEFINE(1Gb1ColValidityNoSplits, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1); + +#define CCSBM_BENCHMARK_DEFINE(name, size, num_columns, num_splits, validity) \ + BENCHMARK_DEFINE_F(ChunkedPack, name)(::benchmark::State & state) \ + { \ + BM_contiguous_split(state, chunked_pack); \ + } \ + BENCHMARK_REGISTER_F(ChunkedPack, name) \ + ->Args({size, num_columns, num_splits, validity}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime() \ + ->Iterations(8) +CCSBM_BENCHMARK_DEFINE(6Gb512ColsNoValidity, (int64_t)6 * 1024 * 1024 * 1024, 512, 0, 0); +CCSBM_BENCHMARK_DEFINE(6Gb512ColsValidity, (int64_t)6 * 1024 * 1024 * 1024, 512, 0, 1); +CCSBM_BENCHMARK_DEFINE(6Gb10ColsNoValidity, (int64_t)6 * 1024 * 1024 * 1024, 10, 0, 0); +CCSBM_BENCHMARK_DEFINE(6Gb10ColsValidity, (int64_t)6 * 1024 * 1024 * 1024, 10, 0, 1); + +CCSBM_BENCHMARK_DEFINE(4Gb512ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 0, 0); +CCSBM_BENCHMARK_DEFINE(4Gb512ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 0, 1); +CCSBM_BENCHMARK_DEFINE(4Gb10ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 0, 0); +CCSBM_BENCHMARK_DEFINE(4Gb10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 0, 1); +CCSBM_BENCHMARK_DEFINE(4Gb4ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1); + +CCSBM_BENCHMARK_DEFINE(1Gb512ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 0, 0); +CCSBM_BENCHMARK_DEFINE(1Gb512ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 0, 1); +CCSBM_BENCHMARK_DEFINE(1Gb10ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 0, 0); +CCSBM_BENCHMARK_DEFINE(1Gb10ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 0, 1); +CCSBM_BENCHMARK_DEFINE(1Gb1ColValidity, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1); + +#define CCSBM_STRINGS_BENCHMARK_DEFINE(name, size, num_columns, num_splits, validity) \ + BENCHMARK_DEFINE_F(ChunkedPackStrings, name)(::benchmark::State & state) \ + { \ + BM_contiguous_split_strings(state, chunked_pack); \ + } \ + BENCHMARK_REGISTER_F(ChunkedPackStrings, name) \ + ->Args({size, num_columns, num_splits, validity}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime() \ + ->Iterations(8) + +CCSBM_STRINGS_BENCHMARK_DEFINE(4Gb512ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 0, 0); +CCSBM_STRINGS_BENCHMARK_DEFINE(4Gb512ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 512, 0, 1); +CCSBM_STRINGS_BENCHMARK_DEFINE(4Gb10ColsNoValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 0, 0); +CCSBM_STRINGS_BENCHMARK_DEFINE(4Gb10ColsValidity, (int64_t)4 * 1024 * 1024 * 1024, 10, 0, 1); +CCSBM_STRINGS_BENCHMARK_DEFINE(4Gb4ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 4, 0, 1); + +CCSBM_STRINGS_BENCHMARK_DEFINE(1Gb512ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 0, 0); +CCSBM_STRINGS_BENCHMARK_DEFINE(1Gb512ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 512, 0, 1); +CCSBM_STRINGS_BENCHMARK_DEFINE(1Gb10ColsNoValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 0, 0); +CCSBM_STRINGS_BENCHMARK_DEFINE(1Gb10ColsValidity, (int64_t)1 * 1024 * 1024 * 1024, 10, 0, 1); +CCSBM_STRINGS_BENCHMARK_DEFINE(1Gb1ColValidity, (int64_t)1 * 1024 * 1024 * 1024, 1, 0, 1); diff --git a/cpp/benchmarks/copying/copy_if_else.cpp b/cpp/benchmarks/copying/copy_if_else.cpp new file mode 100644 index 0000000..50ddfb8 --- /dev/null +++ b/cpp/benchmarks/copying/copy_if_else.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +class CopyIfElse : public cudf::benchmark {}; + +template +static void BM_copy_if_else(benchmark::State& state, bool nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto input_type = cudf::type_to_id(); + auto bool_type = cudf::type_id::BOOL8; + auto const input = create_random_table({input_type, input_type, bool_type}, row_count{n_rows}); + + if (!nulls) { + input->get_column(2).set_null_mask(rmm::device_buffer{}, 0); + input->get_column(1).set_null_mask(rmm::device_buffer{}, 0); + input->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + } + + cudf::column_view decision(input->view().column(2)); + cudf::column_view rhs(input->view().column(1)); + cudf::column_view lhs(input->view().column(0)); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::copy_if_else(lhs, rhs, decision); + } + + auto const bytes_read = n_rows * (sizeof(TypeParam) + sizeof(bool)); + auto const bytes_written = n_rows * sizeof(TypeParam); + auto const null_bytes = nulls ? 2 * cudf::bitmask_allocation_size_bytes(n_rows) : 0; + + // Use number of bytes read and written. + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_read + bytes_written + null_bytes)); +} + +#define COPY_BENCHMARK_DEFINE(name, type, b) \ + BENCHMARK_DEFINE_F(CopyIfElse, name) \ + (::benchmark::State & st) { BM_copy_if_else(st, b); } \ + BENCHMARK_REGISTER_F(CopyIfElse, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 12, 1 << 27}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +COPY_BENCHMARK_DEFINE(int16, int16_t, true) +COPY_BENCHMARK_DEFINE(uint32, uint32_t, true) +COPY_BENCHMARK_DEFINE(float64, double, true) +COPY_BENCHMARK_DEFINE(int16_no_nulls, int16_t, false) +COPY_BENCHMARK_DEFINE(uint32_no_nulls, uint32_t, false) +COPY_BENCHMARK_DEFINE(float64_no_nulls, double, false) diff --git a/cpp/benchmarks/copying/gather.cu b/cpp/benchmarks/copying/gather.cu new file mode 100644 index 0000000..eeb0149 --- /dev/null +++ b/cpp/benchmarks/copying/gather.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +class Gather : public cudf::benchmark {}; + +template +void BM_gather(benchmark::State& state) +{ + cudf::size_type const source_size{(cudf::size_type)state.range(0)}; + auto const n_cols = (cudf::size_type)state.range(1); + + // Gather indices + auto gather_map_table = + create_sequence_table({cudf::type_to_id()}, row_count{source_size}); + auto gather_map = gather_map_table->get_column(0).mutable_view(); + + if (coalesce) { + thrust::reverse( + thrust::device, gather_map.begin(), gather_map.end()); + } else { + thrust::shuffle(thrust::device, + gather_map.begin(), + gather_map.end(), + thrust::default_random_engine()); + } + + // Every element is valid + auto source_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{source_size}); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::gather(*source_table, gather_map); + } + + state.SetBytesProcessed(state.iterations() * state.range(0) * n_cols * 2 * sizeof(TypeParam)); +} + +#define GBM_BENCHMARK_DEFINE(name, type, coalesce) \ + BENCHMARK_DEFINE_F(Gather, name)(::benchmark::State & state) \ + { \ + BM_gather(state); \ + } \ + BENCHMARK_REGISTER_F(Gather, name) \ + ->RangeMultiplier(2) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ + ->UseManualTime(); + +GBM_BENCHMARK_DEFINE(double_coalesce_x, double, true); +GBM_BENCHMARK_DEFINE(double_coalesce_o, double, false); diff --git a/cpp/benchmarks/copying/scatter.cu b/cpp/benchmarks/copying/scatter.cu new file mode 100644 index 0000000..a521dc8 --- /dev/null +++ b/cpp/benchmarks/copying/scatter.cu @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +class Scatter : public cudf::benchmark {}; + +template +void BM_scatter(benchmark::State& state) +{ + auto const source_size{static_cast(state.range(0))}; + auto const n_cols{static_cast(state.range(1))}; + + // Gather indices + auto scatter_map_table = + create_sequence_table({cudf::type_to_id()}, row_count{source_size}); + auto scatter_map = scatter_map_table->get_column(0).mutable_view(); + + if (coalesce) { + thrust::reverse( + thrust::device, scatter_map.begin(), scatter_map.end()); + } else { + thrust::shuffle(thrust::device, + scatter_map.begin(), + scatter_map.end(), + thrust::default_random_engine()); + } + + // Every element is valid + auto source_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{source_size}); + auto target_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{source_size}); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::scatter(*source_table, scatter_map, *target_table); + } + + state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * n_cols * 2 * + sizeof(TypeParam)); +} + +#define SBM_BENCHMARK_DEFINE(name, type, coalesce) \ + BENCHMARK_DEFINE_F(Scatter, name)(::benchmark::State & state) \ + { \ + BM_scatter(state); \ + } \ + BENCHMARK_REGISTER_F(Scatter, name) \ + ->RangeMultiplier(2) \ + ->Ranges({{1 << 10, 1 << 25}, {1, 8}}) \ + ->UseManualTime(); + +SBM_BENCHMARK_DEFINE(double_coalesce_x, double, true); +SBM_BENCHMARK_DEFINE(double_coalesce_o, double, false); diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu new file mode 100644 index 0000000..460100a --- /dev/null +++ b/cpp/benchmarks/copying/shift.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +#include +#include +#include + +template > +std::unique_ptr make_scalar( + T value = 0, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto s = new ScalarType(value, true, stream, mr); + return std::unique_ptr(s); +} + +template +struct value_func { + T* data; + cudf::size_type offset; + + __device__ T operator()(int idx) { return data[idx - offset]; } +}; + +struct validity_func { + cudf::size_type size; + cudf::size_type offset; + + __device__ bool operator()(int idx) + { + auto source_idx = idx - offset; + return source_idx < 0 || source_idx >= size; + } +}; + +template +static void BM_shift(benchmark::State& state) +{ + cudf::size_type size = state.range(0); + cudf::size_type offset = size * (static_cast(shift_factor) / 100.0); + + auto const input_table = + create_sequence_table({cudf::type_to_id()}, + row_count{size}, + use_validity ? std::optional{1.0} : std::nullopt); + cudf::column_view input{input_table->get_column(0)}; + + auto fill = use_validity ? make_scalar() : make_scalar(777); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto output = cudf::shift(input, offset, *fill); + } +} + +class Shift : public cudf::benchmark {}; + +#define SHIFT_BM_BENCHMARK_DEFINE(name, use_validity, shift_factor) \ + BENCHMARK_DEFINE_F(Shift, name)(::benchmark::State & state) \ + { \ + BM_shift(state); \ + } \ + BENCHMARK_REGISTER_F(Shift, name) \ + ->RangeMultiplier(32) \ + ->Range(1 << 10, 1 << 30) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SHIFT_BM_BENCHMARK_DEFINE(shift_zero, false, 0); +SHIFT_BM_BENCHMARK_DEFINE(shift_zero_nullable_out, true, 0); + +SHIFT_BM_BENCHMARK_DEFINE(shift_ten_percent, false, 10); +SHIFT_BM_BENCHMARK_DEFINE(shift_ten_percent_nullable_out, true, 10); + +SHIFT_BM_BENCHMARK_DEFINE(shift_half, false, 50); +SHIFT_BM_BENCHMARK_DEFINE(shift_half_nullable_out, true, 50); + +SHIFT_BM_BENCHMARK_DEFINE(shift_full, false, 100); +SHIFT_BM_BENCHMARK_DEFINE(shift_full_nullable_out, true, 100); diff --git a/cpp/benchmarks/filling/repeat.cpp b/cpp/benchmarks/filling/repeat.cpp new file mode 100644 index 0000000..0abef46 --- /dev/null +++ b/cpp/benchmarks/filling/repeat.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +class Repeat : public cudf::benchmark {}; + +template +void BM_repeat(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto const n_cols = static_cast(state.range(1)); + + auto const input_table = + create_sequence_table(cycle_dtypes({cudf::type_to_id()}, n_cols), + row_count{n_rows}, + nulls ? std::optional{1.0} : std::nullopt); + // Create table view + auto input = cudf::table_view(*input_table); + + // repeat counts + using sizeT = cudf::size_type; + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 3); + auto repeat_count = create_random_column(cudf::type_to_id(), row_count{n_rows}, profile); + + // warm up + auto output = cudf::repeat(input, *repeat_count); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::repeat(input, *repeat_count); + } + + auto data_bytes = + (input.num_columns() * input.num_rows() + output->num_columns() * output->num_rows()) * + sizeof(TypeParam); + auto null_bytes = + nulls ? input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()) + + output->num_columns() * cudf::bitmask_allocation_size_bytes(output->num_rows()) + : 0; + state.SetBytesProcessed(state.iterations() * (data_bytes + null_bytes)); +} + +#define REPEAT_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(Repeat, name)(::benchmark::State & state) { BM_repeat(state); } \ + BENCHMARK_REGISTER_F(Repeat, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +REPEAT_BENCHMARK_DEFINE(double_nulls, double, true); +REPEAT_BENCHMARK_DEFINE(double_no_nulls, double, false); diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp new file mode 100644 index 0000000..bc6c2e5 --- /dev/null +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace cudf { + +namespace { +// memory resource factory helpers +inline auto make_cuda() { return std::make_shared(); } + +inline auto make_pool_instance() +{ + static rmm::mr::cuda_memory_resource cuda_mr; + static auto pool_mr = + std::make_shared>(&cuda_mr); + return pool_mr; +} +} // namespace + +/** + * @brief Google Benchmark fixture for libcudf benchmarks + * + * libcudf benchmarks should use a fixture derived from this fixture class to + * ensure that the RAPIDS Memory Manager pool mode is used in benchmarks, which + * eliminates memory allocation / deallocation performance overhead from the + * benchmark. + * + * The SetUp and TearDown methods of this fixture initialize RMM into pool mode + * and finalize it, respectively. These methods are called automatically by + * Google Benchmark + * + * Example: + * + * template + * class my_benchmark : public cudf::benchmark { + * public: + * using TypeParam = T; + * }; + * + * Then: + * + * BENCHMARK_TEMPLATE_DEFINE_F(my_benchmark, my_test_name, int) + * (::benchmark::State& state) { + * for (auto _ : state) { + * // benchmark stuff + * } + * } + * + * BENCHMARK_REGISTER_F(my_benchmark, my_test_name)->Range(128, 512); + */ +class benchmark : public ::benchmark::Fixture { + public: + benchmark() : ::benchmark::Fixture() + { + char const* env_iterations = std::getenv("CUDF_BENCHMARK_ITERATIONS"); + if (env_iterations != nullptr) { this->Iterations(std::max(0L, atol(env_iterations))); } + } + + void SetUp(::benchmark::State const& state) override + { + mr = make_pool_instance(); + rmm::mr::set_current_device_resource(mr.get()); // set default resource to pool + } + + void TearDown(::benchmark::State const& state) override + { + // reset default resource to the initial resource + rmm::mr::set_current_device_resource(nullptr); + mr.reset(); + } + + // eliminate partial override warnings (see benchmark/benchmark.h) + void SetUp(::benchmark::State& st) override { SetUp(const_cast<::benchmark::State const&>(st)); } + void TearDown(::benchmark::State& st) override + { + TearDown(const_cast<::benchmark::State const&>(st)); + } + + std::shared_ptr mr; +}; + +class memory_stats_logger { + public: + memory_stats_logger() + : existing_mr(rmm::mr::get_current_device_resource()), + statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + { + rmm::mr::set_current_device_resource(&statistics_mr); + } + + ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } + + [[nodiscard]] size_t peak_memory_usage() const noexcept + { + return statistics_mr.get_bytes_counter().peak; + } + + private: + rmm::mr::device_memory_resource* existing_mr; + rmm::mr::statistics_resource_adaptor statistics_mr; +}; + +} // namespace cudf diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp new file mode 100644 index 0000000..e08f910 --- /dev/null +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cudf { +namespace detail { +static std::string rmm_mode_param{"--rmm_mode"}; ///< RMM mode command-line parameter name +} // namespace detail + +/** + * Base fixture for cudf benchmarks using nvbench. + * + * Initializes the default memory resource to use the RMM pool device resource. + */ +struct nvbench_base_fixture { + inline auto make_cuda() { return std::make_shared(); } + + inline auto make_pool() + { + return rmm::mr::make_owning_wrapper(make_cuda()); + } + + inline auto make_async() { return std::make_shared(); } + + inline auto make_managed() { return std::make_shared(); } + + inline auto make_arena() + { + return rmm::mr::make_owning_wrapper(make_cuda()); + } + + inline auto make_managed_pool() + { + return rmm::mr::make_owning_wrapper(make_managed()); + } + + inline std::shared_ptr create_memory_resource( + std::string const& mode) + { + if (mode == "cuda") return make_cuda(); + if (mode == "pool") return make_pool(); + if (mode == "async") return make_async(); + if (mode == "arena") return make_arena(); + if (mode == "managed") return make_managed(); + if (mode == "managed_pool") return make_managed_pool(); + CUDF_FAIL("Unknown rmm_mode parameter: " + mode + + "\nExpecting: cuda, pool, async, arena, managed, or managed_pool"); + } + + nvbench_base_fixture(int argc, char const* const* argv) + { + for (int i = 1; i < argc - 1; ++i) { + std::string arg = argv[i]; + if (arg == detail::rmm_mode_param) { + i++; + rmm_mode = argv[i]; + } + } + + mr = create_memory_resource(rmm_mode); + rmm::mr::set_current_device_resource(mr.get()); + std::cout << "RMM memory resource = " << rmm_mode << "\n"; + } + + std::shared_ptr mr; + std::string rmm_mode{"pool"}; +}; + +} // namespace cudf diff --git a/cpp/benchmarks/fixture/nvbench_main.cpp b/cpp/benchmarks/fixture/nvbench_main.cpp new file mode 100644 index 0000000..64c4d83 --- /dev/null +++ b/cpp/benchmarks/fixture/nvbench_main.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture + +#include + +#include + +// strip off the rmm_mode parameter before passing the +// remaining arguments to nvbench::option_parser +#undef NVBENCH_MAIN_PARSE +#define NVBENCH_MAIN_PARSE(argc, argv) \ + nvbench::option_parser parser; \ + std::vector m_args; \ + for (int i = 0; i < argc; ++i) { \ + std::string arg = argv[i]; \ + if (arg == cudf::detail::rmm_mode_param) { \ + i += 2; \ + } else { \ + m_args.push_back(arg); \ + } \ + } \ + parser.parse(m_args) + +// this declares/defines the main() function using the definitions above +NVBENCH_MAIN diff --git a/cpp/benchmarks/fixture/templated_benchmark_fixture.hpp b/cpp/benchmarks/fixture/templated_benchmark_fixture.hpp new file mode 100644 index 0000000..57f5286 --- /dev/null +++ b/cpp/benchmarks/fixture/templated_benchmark_fixture.hpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { +/** + * @brief Templated Google Benchmark with fixture + * + * Extends Google benchmarks to support templated Benchmarks with non-templated fixture class. + * + * The SetUp and TearDown methods is called before each templated benchmark function is run. + * These methods are called automatically by Google Benchmark + * + * Example: + * + * @code + * template + * void my_benchmark(::benchmark::State& state) { + * std::vector v1(state.range(0)); + * std::vector v2(state.range(0)); + * for (auto _ : state) { + * // benchmark stuff + * } + * } + * + * TEMPLATED_BENCHMARK_F(cudf::benchmark, my_benchmark, int, double)->Range(128, 512); + * @endcode + */ +template +class FunctionTemplateBenchmark : public Fixture { + public: + FunctionTemplateBenchmark(char const* name, ::benchmark::internal::Function* func) + : Fixture(), func_(func) + { + this->SetName(name); + } + + virtual void Run(::benchmark::State& st) + { + this->SetUp(st); + this->BenchmarkCase(st); + this->TearDown(st); + } + + private: + ::benchmark::internal::Function* func_; + + protected: + virtual void BenchmarkCase(::benchmark::State& st) { func_(st); } +}; + +#define TEMPLATED_BENCHMARK_F(BaseClass, n, ...) \ + BENCHMARK_PRIVATE_DECLARE(n) = (::benchmark::internal::RegisterBenchmarkInternal( \ + new cudf::FunctionTemplateBenchmark(#BaseClass "/" #n "<" #__VA_ARGS__ ">", \ + n<__VA_ARGS__>))) + +} // namespace cudf diff --git a/cpp/benchmarks/groupby/group_common.hpp b/cpp/benchmarks/groupby/group_common.hpp new file mode 100644 index 0000000..fba5bc2 --- /dev/null +++ b/cpp/benchmarks/groupby/group_common.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +template +T random_int(T min, T max) +{ + static unsigned seed = 13377331; + static std::mt19937 engine{seed}; + static std::uniform_int_distribution uniform{min, max}; + + return uniform(engine); +} diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp new file mode 100644 index 0000000..e65c37f --- /dev/null +++ b/cpp/benchmarks/groupby/group_max.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +template +void bench_groupby_max(nvbench::state& state, nvbench::type_list) +{ + auto const size = static_cast(state.get_int64("num_rows")); + + auto const keys = [&] { + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + return create_random_column(cudf::type_to_id(), row_count{size}, profile); + }(); + + auto const vals = [&] { + auto builder = data_profile_builder().cardinality(0).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 1000); + if (const auto null_freq = state.get_float64("null_probability"); null_freq > 0) { + builder.null_probability(null_freq); + } else { + builder.no_validity(); + } + return create_random_column(cudf::type_to_id(), row_count{size}, data_profile{builder}); + }(); + + auto keys_view = keys->view(); + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view, keys_view, keys_view})); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back(cudf::make_max_aggregation()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); }); +} + +NVBENCH_BENCH_TYPES(bench_groupby_max, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("groupby_max") + .add_int64_power_of_two_axis("num_rows", {12, 18, 24}) + .add_float64_axis("null_probability", {0, 0.1, 0.9}); diff --git a/cpp/benchmarks/groupby/group_no_requests.cpp b/cpp/benchmarks/groupby/group_no_requests.cpp new file mode 100644 index 0000000..34618ac --- /dev/null +++ b/cpp/benchmarks/groupby/group_no_requests.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +class Groupby : public cudf::benchmark {}; + +void BM_basic_no_requests(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + + std::vector requests; + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::groupby::groupby gb_obj(*keys_table); + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, BasicNoRequest)(::benchmark::State& state) +{ + BM_basic_no_requests(state); +} + +BENCHMARK_REGISTER_F(Groupby, BasicNoRequest) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(10000) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); + +void BM_pre_sorted_no_requests(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + + auto sort_order = cudf::sorted_order(*keys_table); + auto sorted_keys = cudf::gather(*keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + std::vector requests; + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSortedNoRequests)(::benchmark::State& state) +{ + BM_pre_sorted_no_requests(state); +} + +BENCHMARK_REGISTER_F(Groupby, PreSortedNoRequests) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); diff --git a/cpp/benchmarks/groupby/group_nth.cpp b/cpp/benchmarks/groupby/group_nth.cpp new file mode 100644 index 0000000..f2c2443 --- /dev/null +++ b/cpp/benchmarks/groupby/group_nth.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +class Groupby : public cudf::benchmark {}; + +void BM_pre_sorted_nth(benchmark::State& state) +{ + // const cudf::size_type num_columns{(cudf::size_type)state.range(0)}; + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + auto vals = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + + auto sort_order = cudf::sorted_order(*keys_table); + auto sorted_keys = cudf::gather(*keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back( + cudf::make_nth_element_aggregation(-1)); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSortedNth)(::benchmark::State& state) { BM_pre_sorted_nth(state); } + +BENCHMARK_REGISTER_F(Groupby, PreSortedNth) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) /* 1M */ + ->Arg(10000000) /* 10M */ + ->Arg(100000000); /* 100M */ diff --git a/cpp/benchmarks/groupby/group_nunique.cpp b/cpp/benchmarks/groupby/group_nunique.cpp new file mode 100644 index 0000000..63d738b --- /dev/null +++ b/cpp/benchmarks/groupby/group_nunique.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +namespace { + +template +auto make_aggregation_request_vector(cudf::column_view const& values, Args&&... args) +{ + std::vector> aggregations; + (aggregations.emplace_back(std::forward(args)), ...); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request{values, std::move(aggregations)}); + + return requests; +} + +} // namespace + +template +void bench_groupby_nunique(nvbench::state& state, nvbench::type_list) +{ + auto const size = static_cast(state.get_int64("num_rows")); + + auto const keys = [&] { + data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + return create_random_column(cudf::type_to_id(), row_count{size}, profile); + }(); + + auto const vals = [&] { + data_profile profile = data_profile_builder().cardinality(0).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 1000); + if (const auto null_freq = state.get_float64("null_probability"); null_freq > 0) { + profile.set_null_probability(null_freq); + } else { + profile.set_null_probability(std::nullopt); + } + return create_random_column(cudf::type_to_id(), row_count{size}, profile); + }(); + + auto gb_obj = + cudf::groupby::groupby(cudf::table_view({keys->view(), keys->view(), keys->view()})); + auto const requests = make_aggregation_request_vector( + *vals, cudf::make_nunique_aggregation()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); }); +} + +NVBENCH_BENCH_TYPES(bench_groupby_nunique, NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("groupby_nunique") + .add_int64_power_of_two_axis("num_rows", {12, 16, 20, 24}) + .add_float64_axis("null_probability", {0, 0.5}); diff --git a/cpp/benchmarks/groupby/group_rank.cpp b/cpp/benchmarks/groupby/group_rank.cpp new file mode 100644 index 0000000..2122720 --- /dev/null +++ b/cpp/benchmarks/groupby/group_rank.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include +#include +#include +#include + +#include + +template +static void nvbench_groupby_rank(nvbench::state& state, + nvbench::type_list>) +{ + constexpr auto dtype = cudf::type_to_id(); + + bool const is_sorted = state.get_int64("is_sorted"); + cudf::size_type const column_size = state.get_int64("data_size"); + constexpr int num_groups = 100; + + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + dtype, distribution_id::UNIFORM, 0, num_groups); + + auto source_table = create_random_table({dtype, dtype}, row_count{column_size}, profile); + + // values to be pre-sorted too for groupby rank + if (is_sorted) source_table = cudf::sort(*source_table); + + cudf::table_view keys{{source_table->view().column(0)}}; + cudf::column_view order_by{source_table->view().column(1)}; + + auto agg = cudf::make_rank_aggregation(method); + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = order_by; + requests[0].aggregations.push_back(std::move(agg)); + + cudf::groupby::groupby gb_obj( + keys, cudf::null_policy::EXCLUDE, is_sorted ? cudf::sorted::YES : cudf::sorted::NO); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + // groupby scan uses sort implementation + auto result = gb_obj.scan(requests); + }); +} + +enum class rank_method : int32_t {}; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::rank_method, + [](cudf::rank_method value) { + switch (value) { + case cudf::rank_method::FIRST: return "FIRST"; + case cudf::rank_method::AVERAGE: return "AVERAGE"; + case cudf::rank_method::MIN: return "MIN"; + case cudf::rank_method::MAX: return "MAX"; + case cudf::rank_method::DENSE: return "DENSE"; + default: return "unknown"; + } + }, + [](cudf::rank_method value) { + switch (value) { + case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST"; + case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE"; + case cudf::rank_method::MIN: return "cudf::rank_method::MIN"; + case cudf::rank_method::MAX: return "cudf::rank_method::MAX"; + case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE"; + default: return "unknown"; + } + }) + +using methods = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(nvbench_groupby_rank, NVBENCH_TYPE_AXES(methods)) + .set_type_axes_names({"rank_method"}) + .set_name("groupby_rank") + .add_int64_axis("data_size", + { + 1000000, // 1M + 10000000, // 10M + 100000000, // 100M + }) + + .add_int64_axis("is_sorted", {0, 1}); diff --git a/cpp/benchmarks/groupby/group_scan.cpp b/cpp/benchmarks/groupby/group_scan.cpp new file mode 100644 index 0000000..2ae5b6f --- /dev/null +++ b/cpp/benchmarks/groupby/group_scan.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +class Groupby : public cudf::benchmark {}; + +void BM_basic_sum_scan(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + auto vals = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + + cudf::groupby::groupby gb_obj(cudf::table_view({keys->view(), keys->view(), keys->view()})); + + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.scan(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, BasicSumScan)(::benchmark::State& state) { BM_basic_sum_scan(state); } + +BENCHMARK_REGISTER_F(Groupby, BasicSumScan) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); + +void BM_pre_sorted_sum_scan(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + profile.set_null_probability(0.1); + auto vals = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + + auto sort_order = cudf::sorted_order(*keys_table); + auto sorted_keys = cudf::gather(*keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + + std::vector requests; + requests.emplace_back(cudf::groupby::scan_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.scan(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSortedSumScan)(::benchmark::State& state) +{ + BM_pre_sorted_sum_scan(state); +} + +BENCHMARK_REGISTER_F(Groupby, PreSortedSumScan) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); diff --git a/cpp/benchmarks/groupby/group_shift.cpp b/cpp/benchmarks/groupby/group_shift.cpp new file mode 100644 index 0000000..eda2b3d --- /dev/null +++ b/cpp/benchmarks/groupby/group_shift.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +class Groupby : public cudf::benchmark {}; + +void BM_group_shift(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + int const num_groups = 100; + + data_profile const profile = + data_profile_builder().cardinality(0).null_probability(0.01).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, num_groups); + + auto keys_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + auto vals_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + + cudf::groupby::groupby gb_obj(*keys_table); + + std::vector offsets{ + static_cast(column_size / float(num_groups) * 0.5)}; // forward shift half way + // null fill value + auto fill_value = cudf::make_default_constructed_scalar(cudf::data_type(cudf::type_id::INT64)); + // non null fill value + // auto fill_value = cudf::make_fixed_width_scalar(static_cast(42)); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = gb_obj.shift(*vals_table, offsets, {*fill_value}); + } +} + +BENCHMARK_DEFINE_F(Groupby, Shift)(::benchmark::State& state) { BM_group_shift(state); } + +BENCHMARK_REGISTER_F(Groupby, Shift) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000) + ->UseManualTime() + ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/groupby/group_struct_keys.cpp b/cpp/benchmarks/groupby/group_struct_keys.cpp new file mode 100644 index 0000000..44a12c1 --- /dev/null +++ b/cpp/benchmarks/groupby/group_struct_keys.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include + +#include + +#include + +void bench_groupby_struct_keys(nvbench::state& state) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + cudf::size_type const n_rows{static_cast(state.get_int64("NumRows"))}; + cudf::size_type const n_cols{1}; + cudf::size_type const depth{static_cast(state.get_int64("Depth"))}; + bool const nulls{static_cast(state.get_int64("Nulls"))}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&]() { + auto const elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + if (!nulls) return column_wrapper(elements, elements + n_rows); + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; }); + return column_wrapper(elements, elements + n_rows, valids); + }); + + std::vector> cols; + std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { + return col.release(); + }); + + std::vector> child_cols = std::move(cols); + // Add some layers + for (int i = 0; i < depth; i++) { + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 100 * (i + 1)); + std::generate_n( + std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); + child_cols = std::vector>{}; + child_cols.push_back(struct_col.release()); + } + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + + auto const keys_table = cudf::table(std::move(child_cols)); + auto const vals = create_random_column(cudf::type_to_id(), row_count{n_rows}, profile); + + cudf::groupby::groupby gb_obj(keys_table.view()); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back(cudf::make_min_aggregation()); + + // Set up nvbench default stream + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); }); +} + +NVBENCH_BENCH(bench_groupby_struct_keys) + .set_name("groupby_struct_keys") + .add_int64_power_of_two_axis("NumRows", {10, 16, 20}) + .add_int64_axis("Depth", {0, 1, 8}) + .add_int64_axis("Nulls", {0, 1}); diff --git a/cpp/benchmarks/groupby/group_struct_values.cpp b/cpp/benchmarks/groupby/group_struct_values.cpp new file mode 100644 index 0000000..024fd37 --- /dev/null +++ b/cpp/benchmarks/groupby/group_struct_values.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +static constexpr cudf::size_type num_struct_members = 8; +static constexpr cudf::size_type max_int = 100; +static constexpr cudf::size_type max_str_length = 32; + +static auto create_data_table(cudf::size_type n_rows) +{ + data_profile const table_profile = + data_profile_builder() + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, max_int) + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + + // The first two struct members are int32 and string. + // The first column is also used as keys in groupby. + // The subsequent struct members are int32 and string again. + return create_random_table( + cycle_dtypes({cudf::type_id::INT32, cudf::type_id::STRING}, num_struct_members), + row_count{n_rows}, + table_profile); +} + +// Max aggregation/scan technically has the same performance as min. +template +void BM_groupby_min_struct(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto data_cols = create_data_table(n_rows)->release(); + + auto const keys_view = data_cols.front()->view(); + auto const values = + cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer()); + + using RequestType = std::conditional_t, + cudf::groupby::aggregation_request, + cudf::groupby::scan_request>; + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view})); + auto requests = std::vector(); + requests.emplace_back(RequestType()); + requests.front().values = values->view(); + requests.front().aggregations.push_back(cudf::make_min_aggregation()); + + for (auto _ : state) { + [[maybe_unused]] auto const timer = cuda_event_timer(state, true); + if constexpr (std::is_same_v) { + [[maybe_unused]] auto const result = gb_obj.aggregate(requests); + } else { + [[maybe_unused]] auto const result = gb_obj.scan(requests); + } + } +} + +class Groupby : public cudf::benchmark {}; + +#define MIN_RANGE 10'000 +#define MAX_RANGE 10'000'000 + +#define REGISTER_BENCHMARK(name, op_type) \ + BENCHMARK_DEFINE_F(Groupby, name)(::benchmark::State & state) \ + { \ + BM_groupby_min_struct(state); \ + } \ + BENCHMARK_REGISTER_F(Groupby, name) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond) \ + ->RangeMultiplier(4) \ + ->Ranges({{MIN_RANGE, MAX_RANGE}}); + +REGISTER_BENCHMARK(Aggregation, cudf::groupby_aggregation) +REGISTER_BENCHMARK(Scan, cudf::groupby_scan_aggregation) diff --git a/cpp/benchmarks/groupby/group_sum.cpp b/cpp/benchmarks/groupby/group_sum.cpp new file mode 100644 index 0000000..b3fd881 --- /dev/null +++ b/cpp/benchmarks/groupby/group_sum.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +class Groupby : public cudf::benchmark {}; + +void BM_basic_sum(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + auto vals = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + + cudf::groupby::groupby gb_obj(cudf::table_view({keys->view(), keys->view(), keys->view()})); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, Basic)(::benchmark::State& state) { BM_basic_sum(state); } + +BENCHMARK_REGISTER_F(Groupby, Basic) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(10000) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); + +void BM_pre_sorted_sum(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + + data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto keys_table = + create_random_table({cudf::type_to_id()}, row_count{column_size}, profile); + profile.set_null_probability(0.1); + auto vals = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + + auto sort_order = cudf::sorted_order(*keys_table); + auto sorted_keys = cudf::gather(*keys_table, *sort_order); + // No need to sort values using sort_order because they were generated randomly + + cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = vals->view(); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + + auto result = gb_obj.aggregate(requests); + } +} + +BENCHMARK_DEFINE_F(Groupby, PreSorted)(::benchmark::State& state) { BM_pre_sorted_sum(state); } + +BENCHMARK_REGISTER_F(Groupby, PreSorted) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(1000000) + ->Arg(10000000) + ->Arg(100000000); diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp new file mode 100644 index 0000000..e679b4b --- /dev/null +++ b/cpp/benchmarks/hashing/hash.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +#include + +static void bench_hash(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const nulls = state.get_float64("nulls"); + // disable null bitmask if probability is exactly 0.0 + bool const no_nulls = nulls == 0.0; + auto const hash_name = state.get_string("hash_name"); + + data_profile const profile = + data_profile_builder().null_probability(no_nulls ? std::nullopt : std::optional{nulls}); + auto const data = create_random_table( + {cudf::type_id::INT64, cudf::type_id::STRING}, row_count{num_rows}, profile); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + + // collect statistics + cudf::strings_column_view input(data->get_column(1).view()); + auto const chars_size = input.chars_size(); + // add memory read from string column + state.add_global_memory_reads(chars_size); + // add memory read from int64_t column + state.add_global_memory_reads(num_rows); + // add memory read from bitmaks + if (!no_nulls) { + state.add_global_memory_reads(2 * + cudf::bitmask_allocation_size_bytes(num_rows)); + } + // memory written depends on used hash + + if (hash_name == "murmurhash3_x86_32") { + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::hashing::murmurhash3_x86_32(data->view()); + }); + } else if (hash_name == "md5") { + // md5 creates a 32-byte string + state.add_global_memory_writes(32 * num_rows); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); }); + } else if (hash_name == "spark_murmurhash3_x86_32") { + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::hashing::spark_murmurhash3_x86_32(data->view()); + }); + } else { + state.skip(hash_name + ": unknown hash name"); + } +} + +NVBENCH_BENCH(bench_hash) + .set_name("hashing") + .add_int64_axis("num_rows", {65536, 16777216}) + .add_float64_axis("nulls", {0.0, 0.1}) + .add_string_axis("hash_name", {"murmurhash3_x86_32", "md5", "spark_murmurhash3_x86_32"}); diff --git a/cpp/benchmarks/hashing/partition.cpp b/cpp/benchmarks/hashing/partition.cpp new file mode 100644 index 0000000..0bec439 --- /dev/null +++ b/cpp/benchmarks/hashing/partition.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include + +class Hashing : public cudf::benchmark {}; + +template +void BM_hash_partition(benchmark::State& state) +{ + auto const num_rows = state.range(0); + auto const num_cols = state.range(1); + auto const num_partitions = state.range(2); + + // Create owning columns + auto input_table = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, num_cols), + row_count{static_cast(num_rows)}); + auto input = cudf::table_view(*input_table); + + auto columns_to_hash = std::vector(num_cols); + std::iota(columns_to_hash.begin(), columns_to_hash.end(), 0); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto output = cudf::hash_partition(input, columns_to_hash, num_partitions); + } + + auto const bytes_read = num_rows * num_cols * sizeof(T); + auto const bytes_written = num_rows * num_cols * sizeof(T); + auto const partition_bytes = num_partitions * sizeof(cudf::size_type); + + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_read + bytes_written + partition_bytes)); +} + +BENCHMARK_DEFINE_F(Hashing, hash_partition) +(::benchmark::State& state) { BM_hash_partition(state); } + +static void CustomRanges(benchmark::internal::Benchmark* b) +{ + for (int columns = 1; columns <= 256; columns *= 16) { + for (int partitions = 64; partitions <= 1024; partitions *= 2) { + for (int rows = 1 << 17; rows <= 1 << 21; rows *= 2) { + b->Args({rows, columns, partitions}); + } + } + } +} + +BENCHMARK_REGISTER_F(Hashing, hash_partition) + ->Apply(CustomRanges) + ->Unit(benchmark::kMillisecond) + ->UseManualTime(); diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp new file mode 100644 index 0000000..6216a9e --- /dev/null +++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +constexpr size_t data_size = 256 << 20; +constexpr cudf::size_type num_cols = 64; + +template +void csv_read_common(DataType const& data_types, + cudf::io::io_type const& source_type, + nvbench::state& state) +{ + auto const tbl = + create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view).include_header(true); + + cudf::io::write_csv(options); + + cudf::io::csv_reader_options const read_options = + cudf::io::csv_reader_options::builder(source_sink.make_source_info()); + + auto const mem_stats_logger = cudf::memory_stats_logger(); // init stats logger + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); // Drop L3 cache for accurate measurement + + timer.start(); + cudf::io::read_csv(read_options); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_csv_read_input(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const source_type = IOType; + + csv_read_common(d_type, source_type, state); +} + +template +void BM_csv_read_io(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING)}); + auto const source_type = IOType; + + csv_read_common(d_type, source_type, state); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = + nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_csv_read_input, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("csv_read_data_type") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4); + +NVBENCH_BENCH_TYPES(BM_csv_read_io, NVBENCH_TYPE_AXES(io_list)) + .set_name("csv_read_io") + .set_type_axes_names({"io"}) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp new file mode 100644 index 0000000..93ef5be --- /dev/null +++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +constexpr size_t data_size = 256 << 20; + +template +void BM_csv_read_varying_options( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const data_types = + dtypes_for_column_selection(get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING)}), + ColSelection); + auto const cols_to_read = select_column_indexes(data_types.size(), ColSelection); + auto const num_chunks = state.get_int64("num_chunks"); + + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view) + .include_header(true) + .line_terminator("\r\n"); + cudf::io::write_csv(options); + + cudf::io::csv_reader_options read_options = + cudf::io::csv_reader_options::builder(source_sink.make_source_info()) + .use_cols_indexes(cols_to_read) + .thousands('\'') + .windowslinetermination(true) + .comment('#') + .prefix("BM_"); + + size_t const chunk_size = source_sink.size() / num_chunks; + cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks; + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); // Drop L3 cache for accurate measurement + + timer.start(); + for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { + // only read the header in the first chunk + read_options.set_header(chunk == 0 ? 0 : -1); + + auto const is_last_chunk = chunk == (num_chunks - 1); + switch (RowSelection) { + case row_selection::ALL: break; + case row_selection::BYTE_RANGE: + read_options.set_byte_range_offset(chunk * chunk_size); + read_options.set_byte_range_size(chunk_size); + if (is_last_chunk) read_options.set_byte_range_size(0); + break; + case row_selection::NROWS: + read_options.set_skiprows(chunk * chunk_row_cnt); + read_options.set_nrows(chunk_row_cnt); + if (is_last_chunk) read_options.set_nrows(-1); + break; + case row_selection::SKIPFOOTER: + read_options.set_skiprows(chunk * chunk_row_cnt); + read_options.set_skipfooter(view.num_rows() - (chunk + 1) * chunk_row_cnt); + if (is_last_chunk) read_options.set_skipfooter(0); + break; + default: CUDF_FAIL("Unsupported row selection method"); + } + + cudf::io::read_csv(read_options); + } + timer.stop(); + }); + + auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); + state.add_element_count(static_cast(data_processed) / elapsed_time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +using col_selections = nvbench::enum_type_list; + +using row_selections = nvbench:: + enum_type_list; + +NVBENCH_BENCH_TYPES(BM_csv_read_varying_options, + NVBENCH_TYPE_AXES(col_selections, nvbench::enum_type_list)) + .set_name("csv_read_column_selection") + .set_type_axes_names({"column_selection", "row_selection"}) + .set_min_samples(4) + .add_int64_axis("num_chunks", {1}); + +NVBENCH_BENCH_TYPES(BM_csv_read_varying_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + row_selections)) + .set_name("csv_read_row_selection") + .set_type_axes_names({"column_selection", "row_selection"}) + .set_min_samples(4) + .add_int64_axis("num_chunks", {1, 8}); diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp new file mode 100644 index 0000000..8ff07be --- /dev/null +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 256 << 20; +constexpr cudf::size_type num_cols = 64; + +template +void BM_csv_write_dtype_io(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const data_types = get_type_or_group(static_cast(DataType)); + auto const sink_type = IO; + + auto const tbl = + create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size}); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(sink_type); + + timer.start(); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_csv(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +void BM_csv_write_varying_options(nvbench::state& state) +{ + auto const na_per_len = state.get_int64("na_per_len"); + auto const rows_per_chunk = state.get_int64("rows_per_chunk"); + + auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING)}); + + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); + auto const view = tbl->view(); + + std::string const na_per(na_per_len, '#'); + std::size_t encoded_file_size = 0; + + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + + timer.start(); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep(na_per) + .rows_per_chunk(rows_per_chunk); + cudf::io::write_csv(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list)) + .set_name("csv_write_dtype_io") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4); + +NVBENCH_BENCH(BM_csv_write_varying_options) + .set_name("csv_write_options") + .set_min_samples(4) + .add_int64_axis("na_per_len", {0, 16}) + .add_int64_power_of_two_axis("rows_per_chunk", nvbench::range(8, 20, 2)); diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp new file mode 100644 index 0000000..b1aaef4 --- /dev/null +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"}; + +std::string random_file_in_dir(std::string const& dir_path) +{ + // `mkstemp` modifies the template in place + std::string filename = dir_path + "io.XXXXXX"; + + // `mkstemp` opens the file; closing immediately, only need the name + close(mkstemp(const_cast(filename.data()))); + + return filename; +} + +cuio_source_sink_pair::cuio_source_sink_pair(io_type type) + : type{type}, + d_buffer{0, cudf::get_default_stream()}, + file_name{random_file_in_dir(tmpdir.path())} +{ +} + +cudf::io::source_info cuio_source_sink_pair::make_source_info() +{ + switch (type) { + case io_type::FILEPATH: return cudf::io::source_info(file_name); + case io_type::HOST_BUFFER: return cudf::io::source_info(h_buffer.data(), h_buffer.size()); + case io_type::DEVICE_BUFFER: { + // TODO: make cuio_source_sink_pair stream-friendly and avoid implicit use of the default + // stream + auto const stream = cudf::get_default_stream(); + d_buffer.resize(h_buffer.size(), stream); + CUDF_CUDA_TRY(cudaMemcpyAsync( + d_buffer.data(), h_buffer.data(), h_buffer.size(), cudaMemcpyDefault, stream.value())); + + return cudf::io::source_info(d_buffer); + } + default: CUDF_FAIL("invalid input type"); + } +} + +cudf::io::sink_info cuio_source_sink_pair::make_sink_info() +{ + switch (type) { + case io_type::VOID: return cudf::io::sink_info(&void_sink); + case io_type::FILEPATH: return cudf::io::sink_info(file_name); + case io_type::HOST_BUFFER: [[fallthrough]]; + case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer); + default: CUDF_FAIL("invalid output type"); + } +} + +size_t cuio_source_sink_pair::size() +{ + switch (type) { + case io_type::VOID: return void_sink.bytes_written(); + case io_type::FILEPATH: + return static_cast( + std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg()); + case io_type::HOST_BUFFER: [[fallthrough]]; + case io_type::DEVICE_BUFFER: return h_buffer.size(); + default: CUDF_FAIL("invalid output type"); + } +} + +std::vector dtypes_for_column_selection(std::vector const& data_types, + column_selection col_sel) +{ + std::vector out_dtypes; + out_dtypes.reserve(2 * data_types.size()); + switch (col_sel) { + case column_selection::ALL: + case column_selection::FIRST_HALF: + case column_selection::SECOND_HALF: + std::copy(data_types.begin(), data_types.end(), std::back_inserter(out_dtypes)); + std::copy(data_types.begin(), data_types.end(), std::back_inserter(out_dtypes)); + break; + case column_selection::ALTERNATE: + for (auto const& type : data_types) { + out_dtypes.push_back(type); + out_dtypes.push_back(type); + } + break; + } + return out_dtypes; +} + +std::vector select_column_indexes(int num_cols, column_selection col_sel) +{ + std::vector col_idxs(num_cols / 2); + switch (col_sel) { + case column_selection::ALL: col_idxs.resize(num_cols); + case column_selection::FIRST_HALF: + case column_selection::SECOND_HALF: + std::iota(std::begin(col_idxs), + std::end(col_idxs), + (col_sel == column_selection::SECOND_HALF) ? num_cols / 2 : 0); + break; + case column_selection::ALTERNATE: + for (size_t i = 0; i < col_idxs.size(); ++i) + col_idxs[i] = 2 * i; + break; + } + return col_idxs; +} + +std::vector select_column_names(std::vector const& col_names, + column_selection col_sel) +{ + auto const col_idxs_to_read = select_column_indexes(col_names.size(), col_sel); + + std::vector col_names_to_read; + std::transform(col_idxs_to_read.begin(), + col_idxs_to_read.end(), + std::back_inserter(col_names_to_read), + [&](auto& idx) { return col_names[idx]; }); + return col_names_to_read; +} + +std::vector segments_in_chunk(int num_segments, int num_chunks, int chunk_idx) +{ + CUDF_EXPECTS(num_segments >= num_chunks, + "Number of chunks cannot be greater than the number of segments in the file"); + CUDF_EXPECTS(chunk_idx < num_chunks, + "Chunk index must be smaller than the number of chunks in the file"); + + auto const segments_in_chunk = cudf::util::div_rounding_up_unsafe(num_segments, num_chunks); + auto const begin_segment = std::min(chunk_idx * segments_in_chunk, num_segments); + auto const end_segment = std::min(begin_segment + segments_in_chunk, num_segments); + std::vector selected_segments(end_segment - begin_segment); + std::iota(selected_segments.begin(), selected_segments.end(), begin_segment); + + return selected_segments; +} + +// Executes the command and returns stderr output +std::string exec_cmd(std::string_view cmd) +{ + // Prevent the output from the command from mixing with the original process' output + std::fflush(nullptr); + // Switch stderr and stdout to only capture stderr + auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null"); + std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); + CUDF_EXPECTS(pipe != nullptr, "popen() failed"); + + std::array buffer; + std::string error_out; + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + error_out += buffer.data(); + } + return error_out; +} + +void log_l3_warning_once() +{ + static bool is_logged = false; + if (not is_logged) { + CUDF_LOG_WARN( + "Running benchmarks without dropping the L3 cache; results may not reflect file IO " + "throughput"); + is_logged = true; + } +} + +void try_drop_l3_cache() +{ + static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr; + if (not is_drop_cache_enabled) { + log_l3_warning_once(); + return; + } + + std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"}; + CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(), + drop_cache_cmds.cend(), + [](auto& cmd) { return exec_cmd(cmd).empty(); }), + "Failed to execute the drop cache command"); +} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp new file mode 100644 index 0000000..34adae3 --- /dev/null +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +#include + +using cudf::io::io_type; + +std::string random_file_in_dir(std::string const& dir_path); + +/** + * @brief Class to create a coupled `source_info` and `sink_info` of given type. + */ +class cuio_source_sink_pair { + class bytes_written_only_sink : public cudf::io::data_sink { + size_t _bytes_written = 0; + + public: + void host_write(void const* data, size_t size) override { _bytes_written += size; } + void flush() override {} + size_t bytes_written() override { return _bytes_written; } + }; + + public: + cuio_source_sink_pair(io_type type); + ~cuio_source_sink_pair() + { + // delete the temporary file + std::remove(file_name.c_str()); + } + /** + * @brief Created a source info of the set type + * + * The `datasource` created using the returned `source_info` will read data from the same location + * that the result of a @ref `make_sink_info` call writes to. + * + * @return The description of the data source + */ + cudf::io::source_info make_source_info(); + + /** + * @brief Created a sink info of the set type + * + * The `data_sink` created using the returned `sink_info` will write data to the same location + * that the result of a @ref `make_source_info` call reads from. + * + * `io_type::DEVICE_BUFFER` source/sink is an exception where a host buffer sink will be created. + * + * @return The description of the data sink + */ + cudf::io::sink_info make_sink_info(); + + [[nodiscard]] size_t size(); + + private: + static temp_directory const tmpdir; + + io_type const type; + std::vector h_buffer; + rmm::device_uvector d_buffer; + std::string const file_name; + bytes_written_only_sink void_sink; +}; + +/** + * @brief Column selection strategy. + */ +enum class column_selection { ALL, ALTERNATE, FIRST_HALF, SECOND_HALF }; + +/** + * @brief Row selection strategy. + * + * Not all strategies are applicable to all readers. + */ +enum class row_selection { ALL, BYTE_RANGE, NROWS, SKIPFOOTER, STRIPES, ROW_GROUPS }; + +/** + * @brief Modify data types such that total selected columns size is a fix fraction of the total + * size. + * + * The data types are multiplied/rearranged such that the columns selected with the given column + * selection enumerator add up to a fixed fraction of the total table size, regardless of the data + * types. + * + * @param ids Array of column type IDs + * @param cs The column selection enumerator + * + * @return The duplicated/rearranged array of type IDs + */ +std::vector dtypes_for_column_selection(std::vector const& ids, + column_selection col_sel); + +/** + * @brief Selects a subset of columns based on the input enumerator. + */ +std::vector select_column_indexes(int num_cols, column_selection col_sel); + +/** + * @brief Selects a subset of columns from the array of names, based on the input enumerator. + */ +std::vector select_column_names(std::vector const& col_names, + column_selection col_sel); + +/** + * @brief Returns file segments that belong to the given chunk if the file is split into a given + * number of chunks. + * + * The segments could be Parquet row groups or ORC stripes. + */ +std::vector segments_in_chunk(int num_segments, int num_chunks, int chunk); + +/** + * @brief Drops L3 cache if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set. + * + * Has no effect if the environment variable is not set. + * May require sudo access ro run successfully. + * + * @throw cudf::logic_error if the environment variable is set and the command fails + */ +void try_drop_l3_cache(); diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu new file mode 100644 index 0000000..c0c8851 --- /dev/null +++ b/cpp/benchmarks/io/fst.cu @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include //TODO find better replacement + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +namespace { +auto make_test_json_data(nvbench::state& state) +{ + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + + // Test input + std::string input = R"( {)" + R"("category": "reference",)" + R"("index:" [4,12,42],)" + R"("author": "Nigel Rees",)" + R"("title": "Sayings of the Century",)" + R"("price": 8.95)" + R"(} )" + R"({)" + R"("category": "reference",)" + R"("index:" [4,{},null,{"a":[]}],)" + R"("author": "Nigel Rees",)" + R"("title": "Sayings of the Century",)" + R"("price": 8.95)" + R"(} {} [] [ ])"; + + auto d_input_scalar = cudf::make_string_scalar(input); + auto& d_string_scalar = static_cast(*d_input_scalar); + cudf::size_type const repeat_times = string_size / input.size(); + return cudf::strings::repeat_string(d_string_scalar, repeat_times); +} + +// Type used to represent the atomic symbol type used within the finite-state machine +using SymbolT = char; +// Type sufficiently large to index symbols within the input and output (may be unsigned) +using SymbolOffsetT = uint32_t; +constexpr std::size_t single_item = 1; +constexpr auto max_translation_table_size = TT_NUM_STATES * NUM_SYMBOL_GROUPS; + +} // namespace + +void BM_FST_JSON(nvbench::state& state) +{ + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + "Benchmarks only support up to size_type's maximum number of items"); + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + // Prepare cuda stream for data transfers & kernels + rmm::cuda_stream stream{}; + rmm::cuda_stream_view stream_view(stream); + + auto input_string = make_test_json_data(state); + auto& d_input = static_cast&>(*input_string); + + state.add_element_count(d_input.size()); + + // Prepare input & output buffers + cudf::detail::hostdevice_vector output_gpu(d_input.size(), stream_view); + cudf::detail::hostdevice_vector output_gpu_size(single_item, stream_view); + cudf::detail::hostdevice_vector out_indexes_gpu(d_input.size(), stream_view); + + // Run algorithm + auto parser = cudf::io::fst::detail::make_fst( + cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), + cudf::io::fst::detail::make_transition_table(pda_state_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), + stream); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // Allocate device-side temporary storage & run algorithm + parser.Transduce(d_input.data(), + static_cast(d_input.size()), + output_gpu.device_ptr(), + out_indexes_gpu.device_ptr(), + output_gpu_size.device_ptr(), + start_state, + stream.value()); + }); +} + +void BM_FST_JSON_no_outidx(nvbench::state& state) +{ + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + "Benchmarks only support up to size_type's maximum number of items"); + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + // Prepare cuda stream for data transfers & kernels + rmm::cuda_stream stream{}; + rmm::cuda_stream_view stream_view(stream); + + auto input_string = make_test_json_data(state); + auto& d_input = static_cast&>(*input_string); + + state.add_element_count(d_input.size()); + + // Prepare input & output buffers + cudf::detail::hostdevice_vector output_gpu(d_input.size(), stream_view); + cudf::detail::hostdevice_vector output_gpu_size(single_item, stream_view); + cudf::detail::hostdevice_vector out_indexes_gpu(d_input.size(), stream_view); + + // Run algorithm + auto parser = cudf::io::fst::detail::make_fst( + cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), + cudf::io::fst::detail::make_transition_table(pda_state_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), + stream); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // Allocate device-side temporary storage & run algorithm + parser.Transduce(d_input.data(), + static_cast(d_input.size()), + output_gpu.device_ptr(), + thrust::make_discard_iterator(), + output_gpu_size.device_ptr(), + start_state, + stream.value()); + }); +} + +void BM_FST_JSON_no_out(nvbench::state& state) +{ + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + "Benchmarks only support up to size_type's maximum number of items"); + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + // Prepare cuda stream for data transfers & kernels + rmm::cuda_stream stream{}; + rmm::cuda_stream_view stream_view(stream); + + auto input_string = make_test_json_data(state); + auto& d_input = static_cast&>(*input_string); + + state.add_element_count(d_input.size()); + + // Prepare input & output buffers + cudf::detail::hostdevice_vector output_gpu_size(single_item, stream_view); + + // Run algorithm + auto parser = cudf::io::fst::detail::make_fst( + cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), + cudf::io::fst::detail::make_transition_table(pda_state_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), + stream); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // Allocate device-side temporary storage & run algorithm + parser.Transduce(d_input.data(), + static_cast(d_input.size()), + thrust::make_discard_iterator(), + thrust::make_discard_iterator(), + output_gpu_size.device_ptr(), + start_state, + stream.value()); + }); +} + +void BM_FST_JSON_no_str(nvbench::state& state) +{ + CUDF_EXPECTS(state.get_int64("string_size") <= std::numeric_limits::max(), + "Benchmarks only support up to size_type's maximum number of items"); + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + // Prepare cuda stream for data transfers & kernels + rmm::cuda_stream stream{}; + rmm::cuda_stream_view stream_view(stream); + + auto input_string = make_test_json_data(state); + auto& d_input = static_cast&>(*input_string); + + state.add_element_count(d_input.size()); + + // Prepare input & output buffers + cudf::detail::hostdevice_vector output_gpu_size(single_item, stream_view); + cudf::detail::hostdevice_vector out_indexes_gpu(d_input.size(), stream_view); + + // Run algorithm + auto parser = cudf::io::fst::detail::make_fst( + cudf::io::fst::detail::make_symbol_group_lut(pda_sgs), + cudf::io::fst::detail::make_transition_table(pda_state_tt), + cudf::io::fst::detail::make_translation_table(pda_out_tt), + stream); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // Allocate device-side temporary storage & run algorithm + parser.Transduce(d_input.data(), + static_cast(d_input.size()), + thrust::make_discard_iterator(), + out_indexes_gpu.device_ptr(), + output_gpu_size.device_ptr(), + start_state, + stream.value()); + }); +} + +NVBENCH_BENCH(BM_FST_JSON) + .set_name("FST_JSON") + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); + +NVBENCH_BENCH(BM_FST_JSON_no_outidx) + .set_name("FST_JSON_no_outidx") + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); + +NVBENCH_BENCH(BM_FST_JSON_no_out) + .set_name("FST_JSON_no_out") + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); + +NVBENCH_BENCH(BM_FST_JSON_no_str) + .set_name("FST_JSON_no_str") + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp new file mode 100644 index 0000000..31bb5da --- /dev/null +++ b/cpp/benchmarks/io/json/json_reader_input.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +void json_read_common(cuio_source_sink_pair& source_sink, nvbench::state& state) +{ + cudf::io::json_reader_options read_opts = + cudf::io::json_reader_options::builder(source_sink.make_source_info()); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::io::read_json(read_opts); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_json_read_io(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const source_type = IO; + cuio_source_sink_pair source_sink(source_type); + + { + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cudf::io::json_writer_options const write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep("null") + .rows_per_chunk(100'000); + cudf::io::write_json(write_opts); + } + + json_read_common(source_sink, state); +} + +template +void BM_json_read_data_type( + nvbench::state& state, nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const source_type = IO; + cuio_source_sink_pair source_sink(source_type); + { + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cudf::io::json_writer_options const write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep("null") + .rows_per_chunk(100'000); + cudf::io::write_json(write_opts); + } + json_read_common(source_sink, state); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_json_read_data_type, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("json_read_data_type") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4); + +NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list)) + .set_name("json_read_io") + .set_type_axes_names({"io"}) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/json/json_writer.cpp b/cpp/benchmarks/io/json/json_writer.cpp new file mode 100644 index 0000000..ae6bb81 --- /dev/null +++ b/cpp/benchmarks/io/json/json_writer.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +void json_write_common(cudf::io::json_writer_options const& write_opts, + cuio_source_sink_pair& source_sink, + size_t const data_size, + nvbench::state& state) +{ + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::io::write_json(write_opts); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_json_write_io(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const source_type = IO; + + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::json_writer_options write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep("null") + .rows_per_chunk(view.num_rows() / 10); + + json_write_common(write_opts, source_sink, data_size, state); +} + +void BM_json_writer_options(nvbench::state& state) +{ + auto const source_type = io_type::HOST_BUFFER; + bool const json_lines = state.get_int64("json_lines"); + bool const include_nulls = state.get_int64("include_nulls"); + auto const rows_per_chunk = state.get_int64("rows_per_chunk"); + + if ((json_lines or include_nulls) and rows_per_chunk != 1 << 20) { + state.skip("Skipping for unrequired rows_per_chunk combinations"); + return; + } + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + auto const tbl = create_random_table( + cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder()); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::json_writer_options write_opts = + cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view) + .na_rep("null") + .lines(json_lines) + .include_nulls(include_nulls) + .rows_per_chunk(rows_per_chunk); + + json_write_common(write_opts, source_sink, data_size, state); +} + +using io_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list)) + .set_name("json_write_io") + .set_type_axes_names({"io"}) + .set_min_samples(4); + +NVBENCH_BENCH(BM_json_writer_options) + .set_name("json_write_options") + .set_min_samples(4) + .add_int64_axis("json_lines", {false, true}) + .add_int64_axis("include_nulls", {false, true}) + .add_int64_power_of_two_axis("rows_per_chunk", {10, 15, 16, 18, 20}); diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp new file mode 100644 index 0000000..03ccd4e --- /dev/null +++ b/cpp/benchmarks/io/json/nested_json.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +#include +#include +#include + +#include + +#include +#include + +namespace { + +// pre-generate all the number strings +std::vector _num_to_string; +std::string num_to_string(int32_t num) { return _num_to_string.at(num); } + +// List of List nested. +std::string generate_list_of_lists(int32_t max_depth, int32_t max_rows, std::string elem) +{ + std::string json = "["; + if (max_depth > 1) json += std::string(max_depth - 1, '['); + for (int32_t row = 0; row < max_rows; ++row) { + json += elem; + if (row < max_rows - 1) { json += ", "; } + } + if (max_depth > 1) json += std::string(max_depth - 1, ']'); + json += "]"; + return json; +} + +// Struct of Struct nested. +std::string generate_struct_of_structs(int32_t max_depth, int32_t max_rows, std::string elem) +{ + if (max_depth <= 0) return "{}"; + std::string json; + for (int32_t depth = 0; depth < max_depth / 2; ++depth) { + json += R"({"a)" + num_to_string(depth) + R"(": )"; + } + if (max_rows == 0) json += "{}"; + + for (int32_t row = 0; row < max_rows; ++row) { + json += elem; + if (row < max_rows - 1) { + json += R"(, "a)" + num_to_string(max_depth / 2 - 1) + "_" + num_to_string(row) + R"(": )"; + } + } + if (max_depth > 0) json += std::string(max_depth / 2, '}'); + return json; +} + +// Memoize the generated rows so we don't have to regenerate them. +std::map, std::string> _row_cache; + +std::string generate_row( + int num_columns, int max_depth, int max_list_size, int max_struct_size, size_t max_bytes) +{ + std::string s = "{"; + std::vector const elems{ + R"(1)", R"(-2)", R"(3.4)", R"("5")", R"("abcdefghij")", R"(true)", R"(null)"}; + for (int i = 0; i < num_columns; i++) { + s += R"("col)" + num_to_string(i) + R"(": )"; + if (auto it = _row_cache.find({i % 2, max_depth - 2, max_struct_size, i % elems.size()}); + it != _row_cache.end()) { + s += it->second; + } else { + auto r = + (i % 2 == 0) + ? generate_struct_of_structs(max_depth - 2, max_struct_size, elems[i % elems.size()]) + : generate_list_of_lists(max_depth - 2, max_struct_size, elems[i % elems.size()]); + _row_cache[{i % 2, max_depth - 2, max_struct_size, i % elems.size()}] = r; + s += r; + } + if (s.length() > max_bytes) break; + if (i < num_columns - 1) s += ", "; + } + s += "}"; + return s; +} + +std::string generate_json(int num_rows, + int num_columns, + int max_depth, + int max_list_size, + int max_struct_size, + size_t max_json_bytes) +{ + // std::to_string is slow, so we pre-generate all number strings we need. + _num_to_string.clear(); + auto max_num_str = + std::max(std::max(num_columns, max_depth), std::max(max_list_size, max_struct_size)); + for (int i = 0; i < max_num_str; i++) + _num_to_string.emplace_back(std::to_string(i)); + _row_cache.clear(); + + std::string s = "[\n"; + s.reserve(max_json_bytes + 1024); + for (int i = 0; i < num_rows; i++) { + s += generate_row( + num_columns, max_depth - 2, max_list_size, max_struct_size, max_json_bytes - s.length()); + if (s.length() > max_json_bytes) break; + if (i != num_rows - 1) s += ",\n"; + } + s += "\n]"; + return s; +} + +auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stream) +{ + // Test input + std::string input = R"( + {"a":1,"b":2,"c":[3], "d": {}}, + {"a":1,"b":4.0,"c":[], "d": {"year":1882,"author": "Bharathi"}}, + {"a":1,"b":6.0,"c":[5, 7], "d": null}, + {"a":1,"b":null,"c":null}, + { + "a" : 1 + }, + {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}, + {"a": 1, "b": 8.0, "d": { "author": "Jean-Jacques Rousseau"}},)"; + + cudf::size_type const repeat_times = string_size / input.size(); + + auto d_input_scalar = cudf::make_string_scalar(input, stream); + auto& d_string_scalar = static_cast(*d_input_scalar); + auto d_scalar = cudf::strings::repeat_string(d_string_scalar, repeat_times); + + auto data = const_cast(d_scalar->data()); + CUDF_CUDA_TRY(cudaMemsetAsync(data, '[', 1, stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value())); + + return d_scalar; +} +} // namespace + +void BM_NESTED_JSON(nvbench::state& state) +{ + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + auto const default_options = cudf::io::json_reader_options{}; + + auto input = make_test_json_data(string_size, cudf::get_default_stream()); + state.add_element_count(input->size()); + + // Run algorithm + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // Allocate device-side temporary storage & run algorithm + cudf::io::json::detail::device_parse_nested_json( + cudf::device_span{input->data(), static_cast(input->size())}, + default_options, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(string_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); +} + +NVBENCH_BENCH(BM_NESTED_JSON) + .set_name("nested_json_gpu_parser") + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); + +void BM_NESTED_JSON_DEPTH(nvbench::state& state) +{ + auto const string_size{cudf::size_type(state.get_int64("string_size"))}; + auto const depth{cudf::size_type(state.get_int64("depth"))}; + + auto d_scalar = cudf::string_scalar( + generate_json(100'000'000, 10, depth, 10, 10, string_size), true, cudf::get_default_stream()); + auto input = cudf::device_span(d_scalar.data(), d_scalar.size()); + + state.add_element_count(input.size()); + auto const default_options = cudf::io::json_reader_options{}; + + // Run algorithm + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // Allocate device-side temporary storage & run algorithm + cudf::io::json::detail::device_parse_nested_json( + input, default_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(string_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); +} + +NVBENCH_BENCH(BM_NESTED_JSON_DEPTH) + .set_name("nested_json_gpu_parser_depth") + .add_int64_power_of_two_axis("depth", nvbench::range(1, 4, 1)) + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 2)); diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp new file mode 100644 index 0000000..dd96f6f --- /dev/null +++ b/cpp/benchmarks/io/nvbench_helpers.hpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include + +enum class data_type : int32_t { + INTEGRAL = static_cast(type_group_id::INTEGRAL), + INTEGRAL_SIGNED = static_cast(type_group_id::INTEGRAL_SIGNED), + FLOAT = static_cast(type_group_id::FLOATING_POINT), + DECIMAL = static_cast(type_group_id::FIXED_POINT), + TIMESTAMP = static_cast(type_group_id::TIMESTAMP), + DURATION = static_cast(type_group_id::DURATION), + STRING = static_cast(cudf::type_id::STRING), + LIST = static_cast(cudf::type_id::LIST), + STRUCT = static_cast(cudf::type_id::STRUCT) +}; + +// NVBENCH_DECLARE_ENUM_TYPE_STRINGS macro must be used from global namespace scope +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + data_type, + [](data_type value) { + switch (value) { + case data_type::INTEGRAL: return "INTEGRAL"; + case data_type::INTEGRAL_SIGNED: return "INTEGRAL_SIGNED"; + case data_type::FLOAT: return "FLOAT"; + case data_type::DECIMAL: return "DECIMAL"; + case data_type::TIMESTAMP: return "TIMESTAMP"; + case data_type::DURATION: return "DURATION"; + case data_type::STRING: return "STRING"; + case data_type::LIST: return "LIST"; + case data_type::STRUCT: return "STRUCT"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::io::io_type, + [](auto value) { + switch (value) { + case cudf::io::io_type::FILEPATH: return "FILEPATH"; + case cudf::io::io_type::HOST_BUFFER: return "HOST_BUFFER"; + case cudf::io::io_type::DEVICE_BUFFER: return "DEVICE_BUFFER"; + case cudf::io::io_type::VOID: return "VOID"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::io::compression_type, + [](auto value) { + switch (value) { + case cudf::io::compression_type::SNAPPY: return "SNAPPY"; + case cudf::io::compression_type::NONE: return "NONE"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +enum class uses_index : bool { YES, NO }; + +enum class uses_numpy_dtype : bool { YES, NO }; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + uses_index, + [](auto value) { + switch (value) { + case uses_index::YES: return "YES"; + case uses_index::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + uses_numpy_dtype, + [](auto value) { + switch (value) { + case uses_numpy_dtype::YES: return "YES"; + case uses_numpy_dtype::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + column_selection, + [](auto value) { + switch (value) { + case column_selection::ALL: return "ALL"; + case column_selection::ALTERNATE: return "ALTERNATE"; + case column_selection::FIRST_HALF: return "FIRST_HALF"; + case column_selection::SECOND_HALF: return "SECOND_HALF"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + row_selection, + [](auto value) { + switch (value) { + case row_selection::ALL: return "ALL"; + case row_selection::BYTE_RANGE: return "BYTE_RANGE"; + case row_selection::NROWS: return "NROWS"; + case row_selection::SKIPFOOTER: return "SKIPFOOTER"; + case row_selection::STRIPES: return "STRIPES"; + case row_selection::ROW_GROUPS: return "ROW_GROUPS"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::type_id, + [](auto value) { + switch (value) { + case cudf::type_id::EMPTY: return "EMPTY"; + case cudf::type_id::TIMESTAMP_NANOSECONDS: return "TIMESTAMP_NANOSECONDS"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +enum class converts_strings : bool { YES, NO }; + +enum class uses_pandas_metadata : bool { YES, NO }; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + converts_strings, + [](auto value) { + switch (value) { + case converts_strings::YES: return "YES"; + case converts_strings::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + uses_pandas_metadata, + [](auto value) { + switch (value) { + case uses_pandas_metadata::YES: return "YES"; + case uses_pandas_metadata::NO: return "NO"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp new file mode 100644 index 0000000..b6e15fb --- /dev/null +++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr int64_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +void orc_read_common(cudf::io::orc_writer_options const& opts, + cuio_source_sink_pair& source_sink, + nvbench::state& state) +{ + cudf::io::write_orc(opts); + + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(source_sink.make_source_info()); + + auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::io::read_orc(read_opts); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_orc_read_data(nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(IOType); + cudf::io::orc_writer_options opts = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view); + + orc_read_common(opts, source_sink, state); +} + +template +void BM_orc_read_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(IOType); + cudf::io::orc_writer_options opts = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view) + .compression(Compression); + + orc_read_common(opts, source_sink, state); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_orc_read_data, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("orc_read_decode") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) + .set_name("orc_read_io_compression") + .set_type_axes_names({"io", "compression"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp new file mode 100644 index 0000000..1f656f7 --- /dev/null +++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr int64_t data_size = 512 << 20; +// The number of separate read calls to use when reading files in multiple chunks +// Each call reads roughly equal amounts of data +constexpr int32_t chunked_read_num_chunks = 4; + +std::vector get_top_level_col_names(cudf::io::source_info const& source) +{ + auto const top_lvl_cols = cudf::io::read_orc_metadata(source).schema().root().children(); + std::vector col_names; + std::transform(top_lvl_cols.cbegin(), + top_lvl_cols.cend(), + std::back_inserter(col_names), + [](auto const& col_meta) { return col_meta.name(); }); + return col_names; +} + +template +void BM_orc_read_varying_options(nvbench::state& state, + nvbench::type_list, + nvbench::enum_type, + nvbench::enum_type, + nvbench::enum_type, + nvbench::enum_type>) +{ + auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks; + + auto const use_index = UsesIndex == uses_index::YES; + auto const use_np_dtypes = UsesNumpyDType == uses_numpy_dtype::YES; + auto const ts_type = cudf::data_type{Timestamp}; + + // skip_rows is not supported on nested types + auto const data_types = + dtypes_for_column_selection(get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::STRING)}), + ColSelection); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + cudf::io::orc_writer_options options = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_orc(options); + + auto const cols_to_read = + select_column_names(get_top_level_col_names(source_sink.make_source_info()), ColSelection); + cudf::io::orc_reader_options read_options = + cudf::io::orc_reader_options::builder(source_sink.make_source_info()) + .columns(cols_to_read) + .use_index(use_index) + .use_np_dtypes(use_np_dtypes) + .timestamp_type(ts_type); + + auto const num_stripes = + cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes(); + auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::size_type rows_read = 0; + for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { + switch (RowSelection) { + case row_selection::ALL: break; + case row_selection::STRIPES: + read_options.set_stripes({segments_in_chunk(num_stripes, num_chunks, chunk)}); + break; + case row_selection::NROWS: + read_options.set_skip_rows(chunk * chunk_row_cnt); + read_options.set_num_rows(chunk_row_cnt); + break; + default: CUDF_FAIL("Unsupported row selection method"); + } + + rows_read += cudf::io::read_orc(read_options).tbl->num_rows(); + } + + CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); + timer.stop(); + }); + + auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); + state.add_element_count(static_cast(data_processed) / elapsed_time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +using col_selections = nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_orc_read_varying_options, + NVBENCH_TYPE_AXES(col_selections, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("orc_read_column_selection") + .set_type_axes_names( + {"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"}) + .set_min_samples(4); + +using row_selections = + nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_orc_read_varying_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + row_selections, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("orc_read_row_selection") + .set_type_axes_names( + {"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"}) + .set_min_samples(4); + +NVBENCH_BENCH_TYPES( + BM_orc_read_varying_options, + NVBENCH_TYPE_AXES( + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("orc_read_misc_options") + .set_type_axes_names( + {"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"}) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp new file mode 100644 index 0000000..bb37329 --- /dev/null +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::io::statistics_freq, + [](auto value) { + switch (value) { + case cudf::io::statistics_freq::STATISTICS_NONE: return "STATISTICS_NONE"; + case cudf::io::statistics_freq::STATISTICS_ROWGROUP: return "ORC_STATISTICS_STRIPE"; + case cudf::io::statistics_freq::STATISTICS_PAGE: return "ORC_STATISTICS_ROW_GROUP"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr int64_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +template +void BM_orc_write_encode(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + auto const compression = cudf::io::compression_type::SNAPPY; + auto const sink_type = io_type::VOID; + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(sink_type); + + timer.start(); + cudf::io::orc_writer_options options = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + cudf::io::write_orc(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +template +void BM_orc_write_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + auto const compression = Compression; + auto const sink_type = IO; + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(sink_type); + + timer.start(); + cudf::io::orc_writer_options options = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + cudf::io::write_orc(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +template +void BM_orc_write_statistics( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::STRING), + static_cast(data_type::LIST)}); + + auto const compression = Compression; + auto const stats_freq = Statistics; + + auto const tbl = create_random_table(d_type, table_size_bytes{data_size}); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::FILEPATH); + + timer.start(); + cudf::io::orc_writer_options const options = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression) + .enable_statistics(stats_freq); + cudf::io::write_orc(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +using stats_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_orc_write_encode, NVBENCH_TYPE_AXES(d_type_list)) + .set_name("orc_write_encode") + .set_type_axes_names({"data_type"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_orc_write_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) + .set_name("orc_write_io_compression") + .set_type_axes_names({"io", "compression"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_orc_write_statistics, NVBENCH_TYPE_AXES(stats_list, compression_list)) + .set_name("orc_write_statistics") + .set_type_axes_names({"statistics", "compression"}) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp new file mode 100644 index 0000000..dff88d7 --- /dev/null +++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr int64_t data_size = 512 << 20; + +void nvbench_orc_write(nvbench::state& state) +{ + cudf::size_type num_cols = state.get_int64("num_columns"); + + auto tbl = create_random_table( + cycle_dtypes(get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::STRING), + static_cast(data_type::STRUCT), + static_cast(data_type::LIST)}), + num_cols), + table_size_bytes{data_size}); + cudf::table_view view = tbl->view(); + + auto mem_stats_logger = cudf::memory_stats_logger(); + + state.add_global_memory_reads(data_size); + state.add_element_count(view.num_columns() * view.num_rows()); + + size_t encoded_file_size = 0; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::VOID); + timer.start(); + + cudf::io::orc_writer_options opts = + cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_orc(opts); + + timer.stop(); + encoded_file_size = source_sink.size(); + }); + + state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage"); + state.add_buffer_size(encoded_file_size, "efs", "Encoded File Size"); + state.add_element_count(view.num_rows(), "Total Rows"); +} + +void nvbench_orc_chunked_write(nvbench::state& state) +{ + cudf::size_type num_cols = state.get_int64("num_columns"); + cudf::size_type num_tables = state.get_int64("num_chunks"); + + std::vector> tables; + for (cudf::size_type idx = 0; idx < num_tables; idx++) { + tables.push_back(create_random_table( + cycle_dtypes(get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::STRING), + static_cast(data_type::STRUCT), + static_cast(data_type::LIST)}), + num_cols), + table_size_bytes{size_t(data_size / num_tables)})); + } + + auto mem_stats_logger = cudf::memory_stats_logger(); + + auto size_iter = thrust::make_transform_iterator( + tables.begin(), [](auto const& i) { return i->num_columns() * i->num_rows(); }); + auto row_count_iter = + thrust::make_transform_iterator(tables.begin(), [](auto const& i) { return i->num_rows(); }); + auto total_elements = std::accumulate(size_iter, size_iter + num_tables, 0); + auto total_rows = std::accumulate(row_count_iter, row_count_iter + num_tables, 0); + + state.add_global_memory_reads(data_size); + state.add_element_count(total_elements); + + size_t encoded_file_size = 0; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::VOID); + timer.start(); + + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(source_sink.make_sink_info()); + cudf::io::orc_chunked_writer writer(opts); + std::for_each(tables.begin(), + tables.end(), + [&writer](std::unique_ptr const& tbl) { writer.write(*tbl); }); + writer.close(); + + timer.stop(); + encoded_file_size = source_sink.size(); + }); + + state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage"); + state.add_buffer_size(encoded_file_size, "efs", "Encoded File Size"); + state.add_element_count(total_rows, "Total Rows"); +} + +NVBENCH_BENCH(nvbench_orc_write) + .set_name("orc_write") + .set_min_samples(4) + .add_int64_axis("num_columns", {8, 64}); + +NVBENCH_BENCH(nvbench_orc_chunked_write) + .set_name("orc_chunked_write") + .set_min_samples(4) + .add_int64_axis("num_columns", {8, 64}) + .add_int64_axis("num_chunks", {8, 64}); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp new file mode 100644 index 0000000..80303ea --- /dev/null +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +void parquet_read_common(cudf::io::parquet_writer_options const& write_opts, + cuio_source_sink_pair& source_sink, + nvbench::state& state) +{ + cudf::io::write_parquet(write_opts); + + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::io::read_parquet(read_opts); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void BM_parquet_read_data( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + auto const compression = cudf::io::compression_type::SNAPPY; + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(IOType); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + + parquet_read_common(write_opts, source_sink, state); +} + +template +void BM_parquet_read_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + auto const compression = Compression; + auto const source_type = IOType; + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + + parquet_read_common(write_opts, source_sink, state); +} + +template +void BM_parquet_read_io_small_mixed(nvbench::state& state, + nvbench::type_list>) +{ + auto const d_type = + std::pair{cudf::type_id::STRING, cudf::type_id::INT32}; + + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::size_type const num_strings = state.get_int64("num_string_cols"); + auto const source_type = IOType; + + // want 80 pages total, across 4 columns, so 20 pages per column + cudf::size_type constexpr n_col = 4; + cudf::size_type constexpr page_size_rows = 10'000; + cudf::size_type constexpr num_rows = page_size_rows * (80 / n_col); + + auto const tbl = + create_random_table(mix_dtypes(d_type, n_col, num_strings), + row_count{num_rows}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(source_type); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .max_page_size_rows(10'000) + .compression(cudf::io::compression_type::NONE); + + parquet_read_common(write_opts, source_sink, state); +} + +template +void BM_parquet_read_chunks( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + cudf::size_type const byte_limit = state.get_int64("byte_limit"); + auto const compression = cudf::io::compression_type::SNAPPY; + + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(IOType); + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + + cudf::io::write_parquet(write_opts); + + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer, + [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts); + do { + [[maybe_unused]] auto const chunk = reader.read_chunk(); + } while (reader.has_next()); + timer.stop(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_parquet_read_data, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("parquet_read_decode") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) + .set_name("parquet_read_io_compression") + .set_type_axes_names({"io", "compression"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, + NVBENCH_TYPE_AXES(d_type_list, + nvbench::enum_type_list)) + .set_name("parquet_read_chunks") + .set_type_axes_names({"data_type", "io"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}) + .add_int64_axis("byte_limit", {0, 500'000}); + +NVBENCH_BENCH_TYPES(BM_parquet_read_io_small_mixed, + NVBENCH_TYPE_AXES(nvbench::enum_type_list)) + .set_name("parquet_read_io_small_mixed") + .set_type_axes_names({"io"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}) + .add_int64_axis("num_string_cols", {1, 2, 3}); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp new file mode 100644 index 0000000..9f221de --- /dev/null +++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr std::size_t data_size = 512 << 20; +// The number of separate read calls to use when reading files in multiple chunks +// Each call reads roughly equal amounts of data +constexpr int32_t chunked_read_num_chunks = 4; + +std::vector get_top_level_col_names(cudf::io::source_info const& source) +{ + auto const top_lvl_cols = cudf::io::read_parquet_metadata(source).schema().root().children(); + std::vector col_names; + std::transform(top_lvl_cols.cbegin(), + top_lvl_cols.cend(), + std::back_inserter(col_names), + [](auto const& col_meta) { return col_meta.name(); }); + + return col_names; +} + +template +void BM_parquet_read_options(nvbench::state& state, + nvbench::type_list, + nvbench::enum_type, + nvbench::enum_type, + nvbench::enum_type, + nvbench::enum_type>) +{ + auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks; + + auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES; + auto constexpr uses_pd_metadata = UsesPandasMetadata == uses_pandas_metadata::YES; + + auto const ts_type = cudf::data_type{Timestamp}; + + auto const data_types = + dtypes_for_column_selection(get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}), + ColSelection); + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); + auto const view = tbl->view(); + + cuio_source_sink_pair source_sink(io_type::HOST_BUFFER); + cudf::io::parquet_writer_options options = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_parquet(options); + + auto const cols_to_read = + select_column_names(get_top_level_col_names(source_sink.make_source_info()), ColSelection); + cudf::io::parquet_reader_options read_options = + cudf::io::parquet_reader_options::builder(source_sink.make_source_info()) + .columns(cols_to_read) + .convert_strings_to_categories(str_to_categories) + .use_pandas_metadata(uses_pd_metadata) + .timestamp_type(ts_type); + + auto const num_row_groups = read_parquet_metadata(source_sink.make_source_info()).num_rowgroups(); + auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + cudf::size_type rows_read = 0; + for (int32_t chunk = 0; chunk < num_chunks; ++chunk) { + switch (RowSelection) { + case row_selection::ALL: break; + case row_selection::ROW_GROUPS: { + read_options.set_row_groups({segments_in_chunk(num_row_groups, num_chunks, chunk)}); + } break; + case row_selection::NROWS: + read_options.set_skip_rows(chunk * chunk_row_cnt); + read_options.set_num_rows(chunk_row_cnt); + break; + default: CUDF_FAIL("Unsupported row selection method"); + } + + rows_read += cudf::io::read_parquet(read_options).tbl->num_rows(); + } + + CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table"); + timer.stop(); + }); + + auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + auto const data_processed = data_size * cols_to_read.size() / view.num_columns(); + state.add_element_count(static_cast(data_processed) / elapsed_time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +using row_selections = + nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_parquet_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + row_selections, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("parquet_read_row_selection") + .set_type_axes_names({"column_selection", + "row_selection", + "str_to_categories", + "uses_pandas_metadata", + "timestamp_type"}) + .set_min_samples(4); + +using col_selections = nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_parquet_read_options, + NVBENCH_TYPE_AXES(col_selections, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("parquet_read_column_selection") + .set_type_axes_names({"column_selection", + "row_selection", + "str_to_categories", + "uses_pandas_metadata", + "timestamp_type"}) + .set_min_samples(4); + +NVBENCH_BENCH_TYPES( + BM_parquet_read_options, + NVBENCH_TYPE_AXES(nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list, + nvbench::enum_type_list)) + .set_name("parquet_read_misc_options") + .set_type_axes_names({"column_selection", + "row_selection", + "str_to_categories", + "uses_pandas_metadata", + "timestamp_type"}) + .set_min_samples(4); diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp new file mode 100644 index 0000000..13b396e --- /dev/null +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::io::statistics_freq, + [](auto value) { + switch (value) { + case cudf::io::statistics_freq::STATISTICS_NONE: return "STATISTICS_NONE"; + case cudf::io::statistics_freq::STATISTICS_ROWGROUP: return "STATISTICS_ROWGROUP"; + case cudf::io::statistics_freq::STATISTICS_PAGE: return "STATISTICS_PAGE"; + case cudf::io::statistics_freq::STATISTICS_COLUMN: return "STATISTICS_COLUMN"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; +constexpr cudf::size_type num_cols = 64; + +template +void BM_parq_write_encode(nvbench::state& state, nvbench::type_list>) +{ + auto const data_types = get_type_or_group(static_cast(DataType)); + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + auto const compression = cudf::io::compression_type::SNAPPY; + auto const sink_type = io_type::VOID; + + auto const tbl = + create_random_table(cycle_dtypes(data_types, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(sink_type); + + timer.start(); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + cudf::io::write_parquet(opts); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +template +void BM_parq_write_io_compression( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST), + static_cast(data_type::STRUCT)}); + + cudf::size_type const cardinality = state.get_int64("cardinality"); + cudf::size_type const run_length = state.get_int64("run_length"); + auto const compression = Compression; + auto const sink_type = IO; + + auto const tbl = + create_random_table(cycle_dtypes(data_types, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto const mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(sink_type); + + timer.start(); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + cudf::io::write_parquet(opts); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +template +void BM_parq_write_varying_options( + nvbench::state& state, + nvbench::type_list, nvbench::enum_type>) +{ + auto const enable_stats = Statistics; + auto const compression = Compression; + auto const file_path = state.get_string("file_path"); + + auto const data_types = get_type_or_group({static_cast(data_type::INTEGRAL_SIGNED), + static_cast(data_type::FLOAT), + static_cast(data_type::DECIMAL), + static_cast(data_type::TIMESTAMP), + static_cast(data_type::DURATION), + static_cast(data_type::STRING), + static_cast(data_type::LIST)}); + + auto const tbl = create_random_table(data_types, table_size_bytes{data_size}); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::FILEPATH); + + timer.start(); + cudf::io::parquet_writer_options const options = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression) + .stats_level(enable_stats) + .column_chunks_file_paths({file_path}); + cudf::io::write_parquet(options); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +using d_type_list = nvbench::enum_type_list; + +using io_list = nvbench::enum_type_list; + +using compression_list = + nvbench::enum_type_list; + +using stats_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_parq_write_encode, NVBENCH_TYPE_AXES(d_type_list)) + .set_name("parquet_write_encode") + .set_type_axes_names({"data_type"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_parq_write_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) + .set_name("parquet_write_io_compression") + .set_type_axes_names({"io", "compression"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH_TYPES(BM_parq_write_varying_options, NVBENCH_TYPE_AXES(stats_list, compression_list)) + .set_name("parquet_write_options") + .set_type_axes_names({"statistics", "compression"}) + .set_min_samples(4) + .add_string_axis("file_path", {"unused_path.parquet", ""}); diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp new file mode 100644 index 0000000..b85c97f --- /dev/null +++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr int64_t data_size = 512 << 20; + +void PQ_write(nvbench::state& state) +{ + cudf::size_type const num_cols = state.get_int64("num_cols"); + + auto const tbl = create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols), + table_size_bytes{data_size}); + auto const view = tbl->view(); + + std::size_t encoded_file_size = 0; + auto const mem_stats_logger = cudf::memory_stats_logger(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::VOID); + + timer.start(); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_parquet(opts); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +void PQ_write_chunked(nvbench::state& state) +{ + cudf::size_type const num_cols = state.get_int64("num_cols"); + cudf::size_type const num_tables = state.get_int64("num_chunks"); + + std::vector> tables; + for (cudf::size_type idx = 0; idx < num_tables; idx++) { + tables.push_back(create_random_table(cycle_dtypes({cudf::type_id::INT32}, num_cols), + table_size_bytes{size_t(data_size / num_tables)})); + } + + auto const mem_stats_logger = cudf::memory_stats_logger(); + std::size_t encoded_file_size = 0; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) { + cuio_source_sink_pair source_sink(io_type::VOID); + + timer.start(); + cudf::io::chunked_parquet_writer_options opts = + cudf::io::chunked_parquet_writer_options::builder(source_sink.make_sink_info()); + cudf::io::parquet_chunked_writer writer(opts); + std::for_each(tables.begin(), + tables.end(), + [&writer](std::unique_ptr const& tbl) { writer.write(*tbl); }); + writer.close(); + timer.stop(); + + encoded_file_size = source_sink.size(); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size"); +} + +NVBENCH_BENCH(PQ_write) + .set_name("parquet_write_num_cols") + .set_min_samples(4) + .add_int64_axis("num_cols", {8, 1024}); + +NVBENCH_BENCH(PQ_write_chunked) + .set_name("parquet_chunked_write") + .set_min_samples(4) + .add_int64_axis("num_cols", {8, 1024}) + .add_int64_axis("num_chunks", {8, 64}); diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp new file mode 100644 index 0000000..b5d855d --- /dev/null +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + +temp_directory const temp_dir("cudf_nvbench"); + +enum class data_chunk_source_type { device, file, file_datasource, host, host_pinned, file_bgzip }; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + data_chunk_source_type, + [](auto value) { + switch (value) { + case data_chunk_source_type::device: return "device"; + case data_chunk_source_type::file: return "file"; + case data_chunk_source_type::file_datasource: return "file_datasource"; + case data_chunk_source_type::host: return "host"; + case data_chunk_source_type::host_pinned: return "host_pinned"; + case data_chunk_source_type::file_bgzip: return "file_bgzip"; + default: return "Unknown"; + } + }, + [](auto) { return std::string{}; }) + +static cudf::string_scalar create_random_input(int32_t num_chars, + double delim_factor, + double deviation, + std::string delim) +{ + auto const num_delims = static_cast((num_chars * delim_factor) / delim.size()); + auto const num_delim_chars = num_delims * delim.size(); + auto const num_value_chars = num_chars - num_delim_chars; + auto const num_rows = num_delims; + auto const value_size_avg = static_cast(num_value_chars / num_rows); + auto const value_size_min = static_cast(value_size_avg * (1 - deviation)); + auto const value_size_max = static_cast(value_size_avg * (1 + deviation)); + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, value_size_min, value_size_max); + + auto const values = + create_random_column(cudf::type_id::STRING, row_count{num_rows}, table_profile); + + auto delim_scalar = cudf::make_string_scalar(delim); + auto delims_column = cudf::make_column_from_scalar(*delim_scalar, num_rows); + auto input_table = cudf::table_view({values->view(), delims_column->view()}); + auto input_column = cudf::strings::concatenate(input_table); + + // extract the chars from the returned strings column. + auto input_column_contents = input_column->release(); + auto chars_column_contents = input_column_contents.children[1]->release(); + auto chars_buffer = chars_column_contents.data.release(); + + // turn the chars in to a string scalar. + return cudf::string_scalar(std::move(*chars_buffer)); +} + +static void write_bgzip_file(cudf::host_span host_data, std::ostream& output_stream) +{ + // a bit of variability with a decent amount of padding so we don't overflow 16 bit block sizes + std::uniform_int_distribution chunk_size_dist{64000, 65000}; + std::default_random_engine rng{}; + std::size_t pos = 0; + while (pos < host_data.size()) { + auto const remainder = host_data.size() - pos; + auto const chunk_size = std::min(remainder, chunk_size_dist(rng)); + cudf::io::text::detail::bgzip::write_compressed_block(output_stream, + {host_data.data() + pos, chunk_size}); + pos += chunk_size; + } + // empty block denotes EOF + cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {}); +} + +template +static void bench_multibyte_split(nvbench::state& state, + nvbench::type_list>) +{ + auto const delim_size = state.get_int64("delim_size"); + auto const delim_percent = state.get_int64("delim_percent"); + auto const file_size_approx = state.get_int64("size_approx"); + auto const byte_range_percent = state.get_int64("byte_range_percent"); + auto const strip_delimiters = bool(state.get_int64("strip_delimiters")); + + auto const byte_range_factor = static_cast(byte_range_percent) / 100; + CUDF_EXPECTS(delim_percent >= 1, "delimiter percent must be at least 1"); + CUDF_EXPECTS(delim_percent <= 50, "delimiter percent must be at most 50"); + CUDF_EXPECTS(byte_range_percent >= 1, "byte range percent must be at least 1"); + CUDF_EXPECTS(byte_range_percent <= 100, "byte range percent must be at most 100"); + + auto delim = std::string(delim_size, '0'); + // the algorithm can only support 7 equal characters, so use different chars in the delimiter + std::iota(delim.begin(), delim.end(), '1'); + + auto const delim_factor = static_cast(delim_percent) / 100; + std::unique_ptr datasource; + auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); + auto host_input = std::vector{}; + auto host_pinned_input = cudf::detail::pinned_host_vector{}; + + if (source_type != data_chunk_source_type::device && + source_type != data_chunk_source_type::host_pinned) { + host_input = cudf::detail::make_std_vector_sync( + {device_input.data(), static_cast(device_input.size())}, + cudf::get_default_stream()); + } + if (source_type == data_chunk_source_type::host_pinned) { + host_pinned_input.resize(static_cast(device_input.size())); + CUDF_CUDA_TRY(cudaMemcpy( + host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault)); + } + + auto source = [&] { + switch (source_type) { + case data_chunk_source_type::file: + case data_chunk_source_type::file_datasource: { + auto const temp_file_name = random_file_in_dir(temp_dir.path()); + std::ofstream(temp_file_name, std::ofstream::out) + .write(host_input.data(), host_input.size()); + if (source_type == data_chunk_source_type::file) { + return cudf::io::text::make_source_from_file(temp_file_name); + } else { + datasource = cudf::io::datasource::create(temp_file_name); + return cudf::io::text::make_source(*datasource); + } + } + case data_chunk_source_type::host: // + return cudf::io::text::make_source(host_input); + case data_chunk_source_type::host_pinned: + return cudf::io::text::make_source(host_pinned_input); + case data_chunk_source_type::device: // + return cudf::io::text::make_source(device_input); + case data_chunk_source_type::file_bgzip: { + auto const temp_file_name = random_file_in_dir(temp_dir.path()); + { + std::ofstream output_stream(temp_file_name, std::ofstream::out); + write_bgzip_file(host_input, output_stream); + } + return cudf::io::text::make_source_from_bgzip_file(temp_file_name); + } + default: CUDF_FAIL(); + } + }(); + + auto mem_stats_logger = cudf::memory_stats_logger(); + auto const range_size = static_cast(device_input.size() * byte_range_factor); + auto const range_offset = (device_input.size() - range_size) / 2; + cudf::io::text::byte_range_info range{range_offset, range_size}; + cudf::io::text::parse_options options{range, strip_delimiters}; + std::unique_ptr output; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + try_drop_l3_cache(); + output = cudf::io::text::multibyte_split(*source, delim, options); + }); + + state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage"); + // TODO adapt to consistent naming scheme once established + state.add_buffer_size(range_size, "efs", "Encoded file size"); +} + +using source_type_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(bench_multibyte_split, + NVBENCH_TYPE_AXES(nvbench::enum_type_list)) + .set_name("multibyte_split_delimiters") + .set_min_samples(4) + .add_int64_axis("strip_delimiters", {0, 1}) + .add_int64_axis("delim_size", {1, 4, 7}) + .add_int64_axis("delim_percent", {1, 25}) + .add_int64_power_of_two_axis("size_approx", {15}) + .add_int64_axis("byte_range_percent", {50}); + +NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list)) + .set_name("multibyte_split_source") + .set_min_samples(4) + .add_int64_axis("strip_delimiters", {1}) + .add_int64_axis("delim_size", {1}) + .add_int64_axis("delim_percent", {1}) + .add_int64_power_of_two_axis("size_approx", {15, 30}) + .add_int64_axis("byte_range_percent", {10, 100}); diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu new file mode 100644 index 0000000..dcd13cf --- /dev/null +++ b/cpp/benchmarks/iterator/iterator.cu @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#include + +template +T random_int(T min, T max) +{ + static unsigned seed = 13377331; + static std::mt19937 engine{seed}; + static std::uniform_int_distribution uniform{min, max}; + + return uniform(engine); +} + +// ----------------------------------------------------------------------------- +template +inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_items, T init) +{ + size_t temp_storage_bytes = 0; + + cub::DeviceReduce::Reduce( + nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init); + + // Allocate temporary storage + rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::get_default_stream()); + + // Run reduction + cub::DeviceReduce::Reduce( + d_temp_storage.data(), temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init); + + return temp_storage_bytes; +} + +// ----------------------------------------------------------------------------- +template +void raw_stream_bench_cub(cudf::column_view& col, rmm::device_uvector& result) +{ + // std::cout << "raw stream cub: " << "\t"; + + T init{0}; + auto begin = col.data(); + int num_items = col.size(); + + reduce_by_cub(result.begin(), begin, num_items, init); +}; + +template +void iterator_bench_cub(cudf::column_view& col, rmm::device_uvector& result) +{ + // std::cout << "iterator cub " << ( (has_null) ? ": " : ": " ) << "\t"; + + T init{0}; + auto d_col = cudf::column_device_view::create(col); + int num_items = col.size(); + if (has_null) { + auto begin = cudf::detail::make_null_replacement_iterator(*d_col, init); + reduce_by_cub(result.begin(), begin, num_items, init); + } else { + auto begin = d_col->begin(); + reduce_by_cub(result.begin(), begin, num_items, init); + } +} + +// ----------------------------------------------------------------------------- +template +void raw_stream_bench_thrust(cudf::column_view& col, rmm::device_uvector& result) +{ + // std::cout << "raw stream thust: " << "\t\t"; + + T init{0}; + auto d_in = col.data(); + auto d_end = d_in + col.size(); + thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{}); +} + +template +void iterator_bench_thrust(cudf::column_view& col, rmm::device_uvector& result) +{ + // std::cout << "iterator thust " << ( (has_null) ? ": " : ": " ) << "\t"; + + T init{0}; + auto d_col = cudf::column_device_view::create(col); + if (has_null) { + auto d_in = cudf::detail::make_null_replacement_iterator(*d_col, init); + auto d_end = d_in + col.size(); + thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{}); + } else { + auto d_in = d_col->begin(); + auto d_end = d_in + col.size(); + thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{}); + } +} + +// ----------------------------------------------------------------------------- +class Iterator : public cudf::benchmark {}; + +template +void BM_iterator(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + using T = TypeParam; + auto num_gen = thrust::counting_iterator(0); + + cudf::test::fixed_width_column_wrapper wrap_hasnull_F(num_gen, num_gen + column_size); + cudf::column_view hasnull_F = wrap_hasnull_F; + + // Initialize dev_result to false + auto dev_result = cudf::detail::make_zeroed_device_uvector_sync( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + if (cub_or_thrust) { + if (raw_or_iterator) { + raw_stream_bench_cub(hasnull_F, dev_result); // driven by raw pointer + } else { + iterator_bench_cub(hasnull_F, dev_result); // driven by riterator without nulls + } + } else { + if (raw_or_iterator) { + raw_stream_bench_thrust(hasnull_F, dev_result); // driven by raw pointer + } else { + iterator_bench_thrust(hasnull_F, + dev_result); // driven by riterator without nulls + } + } + } + state.SetBytesProcessed(static_cast(state.iterations()) * column_size * + sizeof(TypeParam)); +} + +// operator+ defined for pair iterator reduction +template +__device__ thrust::pair operator+(thrust::pair lhs, thrust::pair rhs) +{ + return thrust::pair{lhs.first * lhs.second + rhs.first * rhs.second, + lhs.second + rhs.second}; +} +// ----------------------------------------------------------------------------- +template +void pair_iterator_bench_cub(cudf::column_view& col, + rmm::device_uvector>& result) +{ + thrust::pair init{0, false}; + auto d_col = cudf::column_device_view::create(col); + int num_items = col.size(); + auto begin = d_col->pair_begin(); + reduce_by_cub(result.begin(), begin, num_items, init); +} + +template +void pair_iterator_bench_thrust(cudf::column_view& col, + rmm::device_uvector>& result) +{ + thrust::pair init{0, false}; + auto d_col = cudf::column_device_view::create(col); + auto d_in = d_col->pair_begin(); + auto d_end = d_in + col.size(); + thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{}); +} + +template +void BM_pair_iterator(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + using T = TypeParam; + auto num_gen = thrust::counting_iterator(0); + auto null_gen = + thrust::make_transform_iterator(num_gen, [](cudf::size_type row) { return row % 2 == 0; }); + + cudf::test::fixed_width_column_wrapper wrap_hasnull_F(num_gen, num_gen + column_size); + cudf::test::fixed_width_column_wrapper wrap_hasnull_T( + num_gen, num_gen + column_size, null_gen); + cudf::column_view hasnull_F = wrap_hasnull_F; + cudf::column_view hasnull_T = wrap_hasnull_T; + + // Initialize dev_result to false + auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>( + 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + if (cub_or_thrust) { + pair_iterator_bench_cub(hasnull_T, + dev_result); // driven by pair iterator with nulls + } else { + pair_iterator_bench_thrust(hasnull_T, + dev_result); // driven by pair iterator with nulls + } + } + state.SetBytesProcessed(static_cast(state.iterations()) * column_size * + sizeof(TypeParam)); +} + +#define ITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust, raw_or_iterator) \ + BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \ + { \ + BM_iterator(state); \ + } \ + BENCHMARK_REGISTER_F(Iterator, name) \ + ->RangeMultiplier(10) \ + ->Range(1000, 10000000) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +ITER_BM_BENCHMARK_DEFINE(double_cub_raw, double, true, true); +ITER_BM_BENCHMARK_DEFINE(double_cub_iter, double, true, false); +ITER_BM_BENCHMARK_DEFINE(double_thrust_raw, double, false, true); +ITER_BM_BENCHMARK_DEFINE(double_thrust_iter, double, false, false); + +#define PAIRITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust) \ + BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \ + { \ + BM_pair_iterator(state); \ + } \ + BENCHMARK_REGISTER_F(Iterator, name) \ + ->RangeMultiplier(10) \ + ->Range(1000, 10000000) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +PAIRITER_BM_BENCHMARK_DEFINE(double_cub_pair, double, true); +PAIRITER_BM_BENCHMARK_DEFINE(double_thrust_pair, double, false); diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu new file mode 100644 index 0000000..d721de0 --- /dev/null +++ b/cpp/benchmarks/join/conditional_join.cu @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +template +class ConditionalJoin : public cudf::benchmark {}; + +// For compatibility with the shared logic for equality (hash) joins, all of +// the join lambdas defined by these macros accept a null_equality parameter +// but ignore it (don't forward it to the underlying join implementation) +// because conditional joins do not use this parameter. +#define CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_inner_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ + } + +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_32bit, int32_t, int32_t, false); +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit, int64_t, int64_t, false); +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_32bit_nulls, int32_t, int32_t, true); +CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit_nulls, int64_t, int64_t, true); + +#define CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_left_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ + } + +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_32bit, int32_t, int32_t, false); +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit, int64_t, int64_t, false); +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_32bit_nulls, int32_t, int32_t, true); +CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit_nulls, int64_t, int64_t, true); + +#define CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_full_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ + } + +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_32bit, int32_t, int32_t, false); +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit, int64_t, int64_t, false); +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_32bit_nulls, int32_t, int32_t, true); +CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit_nulls, int64_t, int64_t, true); + +#define CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_left_anti_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ + } + +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_32bit, + int32_t, + int32_t, + false); +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit, + int64_t, + int64_t, + false); +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_32bit_nulls, + int32_t, + int32_t, + true); +CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit_nulls, + int64_t, + int64_t, + true); + +#define CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::ast::operation binary_pred, \ + cudf::null_equality compare_nulls) { \ + return cudf::conditional_left_semi_join(left, right, binary_pred); \ + }; \ + BM_join(st, join); \ + } + +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_32bit, + int32_t, + int32_t, + false); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_64bit, + int64_t, + int64_t, + false); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_32bit_nulls, + int32_t, + int32_t, + true); +CONDITIONAL_LEFT_SEMI_JOIN_BENCHMARK_DEFINE(conditional_left_semi_join_64bit_nulls, + int64_t, + int64_t, + true); + +// inner join ----------------------------------------------------------------------- +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_inner_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({400'000, 100'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_inner_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({400'000, 100'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_inner_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({400'000, 100'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_inner_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({400'000, 100'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +// left join ----------------------------------------------------------------------- +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +// full join ----------------------------------------------------------------------- +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_full_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_full_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_full_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_full_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +// left anti-join ------------------------------------------------------------- +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_anti_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_anti_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_anti_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_anti_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +// left semi-join ------------------------------------------------------------- +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_semi_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_semi_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_semi_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(ConditionalJoin, conditional_left_semi_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->UseManualTime(); diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh new file mode 100644 index 0000000..84e607a --- /dev/null +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +__global__ static void init_curand(curandState* state, int const nstates) +{ + int ithread = threadIdx.x + blockIdx.x * blockDim.x; + + if (ithread < nstates) { curand_init(1234ULL, ithread, 0, state + ithread); } +} + +template +__global__ static void init_build_tbl(key_type* const build_tbl, + size_type const build_tbl_size, + int const multiplicity, + curandState* state, + int const num_states) +{ + auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x; + auto const stride = blockDim.x * gridDim.x; + assert(start_idx < num_states); + + curandState localState = state[start_idx]; + + for (size_type idx = start_idx; idx < build_tbl_size; idx += stride) { + double const x = curand_uniform_double(&localState); + + build_tbl[idx] = static_cast(x * (build_tbl_size / multiplicity)); + } + + state[start_idx] = localState; +} + +template +__global__ void init_probe_tbl(key_type* const probe_tbl, + size_type const probe_tbl_size, + size_type const build_tbl_size, + key_type const rand_max, + double const selectivity, + int const multiplicity, + curandState* state, + int const num_states) +{ + auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x; + auto const stride = blockDim.x * gridDim.x; + assert(start_idx < num_states); + + curandState localState = state[start_idx]; + + for (size_type idx = start_idx; idx < probe_tbl_size; idx += stride) { + key_type val; + double x = curand_uniform_double(&localState); + + if (x <= selectivity) { + // x <= selectivity means this key in the probe table should be present in the build table, so + // we pick a key from [0, build_tbl_size / multiplicity] + x = curand_uniform_double(&localState); + val = static_cast(x * (build_tbl_size / multiplicity)); + } else { + // This key in the probe table should not be present in the build table, so we pick a key from + // [build_tbl_size, rand_max]. + x = curand_uniform_double(&localState); + val = static_cast(x * (rand_max - build_tbl_size) + build_tbl_size); + } + probe_tbl[idx] = val; + } + + state[start_idx] = localState; +} + +/** + * generate_input_tables generates random integer input tables for database benchmarks. + * + * generate_input_tables generates two random integer input tables for database benchmark + * mainly designed to benchmark join operations. The templates key_type and size_type needed + * to be builtin integer types (e.g. short, int, longlong) and key_type needs to be signed + * as the lottery used internally relies on being able to use negative values to mark drawn + * numbers. The tables need to be preallocated in a memory region accessible by the GPU + * (e.g. device memory, zero copy memory or unified memory). Each value in the build table + * will be from [0,rand_max] and if uniq_build_tbl_keys is true it is ensured that each value + * will be uniq in the build table. Each value in the probe table will be also in the build + * table with a probability of selectivity and a random number from + * [0,rand_max] \setminus \{build_tbl\} otherwise. + * + * @param[out] build_tbl The build table to generate. Usually the smaller table used to + * "build" the hash table in a hash based join implementation. + * @param[in] build_tbl_size number of keys in the build table + * @param[out] probe_tbl The probe table to generate. Usually the larger table used to + * probe into the hash table created from the build table. + * @param[in] build_tbl_size number of keys in the build table + * @param[in] selectivity probability with which an element of the probe table is + * present in the build table. + * @param[in] multiplicity number of matches for each key. + */ +template +void generate_input_tables(key_type* const build_tbl, + size_type const build_tbl_size, + key_type* const probe_tbl, + size_type const probe_tbl_size, + double const selectivity, + int const multiplicity) +{ + // With large values of rand_max the a lot of temporary storage is needed for the lottery. At the + // expense of not being that accurate with applying the selectivity an especially more memory + // efficient implementations would be to partition the random numbers into two intervals and then + // let one table choose random numbers from only one interval and the other only select with + // selective probability from the same interval and from the other in the other cases. + + constexpr int block_size = 128; + + // Maximize exposed parallelism while minimizing storage for curand state + int num_blocks_init_build_tbl{-1}; + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks_init_build_tbl, init_build_tbl, block_size, 0)); + + int num_blocks_init_probe_tbl{-1}; + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks_init_probe_tbl, init_probe_tbl, block_size, 0)); + + int dev_id{-1}; + CUDF_CUDA_TRY(cudaGetDevice(&dev_id)); + + int num_sms{-1}; + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id)); + + int const num_states = + num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size; + rmm::device_uvector devStates(num_states, cudf::get_default_stream()); + + init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states); + + CUDF_CHECK_CUDA(0); + + init_build_tbl<<>>( + build_tbl, build_tbl_size, multiplicity, devStates.data(), num_states); + + CUDF_CHECK_CUDA(0); + + auto const rand_max = std::numeric_limits::max(); + + init_probe_tbl + <<>>(probe_tbl, + probe_tbl_size, + build_tbl_size, + rand_max, + selectivity, + multiplicity, + devStates.data(), + num_states); + + CUDF_CHECK_CUDA(0); +} diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu new file mode 100644 index 0000000..1c02a44 --- /dev/null +++ b/cpp/benchmarks/join/join.cu @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +template +void nvbench_inner_join(nvbench::state& state, + nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + auto const has_nulls = cudf::has_nested_nulls(left_input) || cudf::has_nested_nulls(right_input) + ? cudf::nullable_join::YES + : cudf::nullable_join::NO; + cudf::hash_join hj_obj(left_input, has_nulls, compare_nulls, stream); + return hj_obj.inner_join(right_input, std::nullopt, stream); + }; + + BM_join(state, join); +} + +template +void nvbench_left_join(nvbench::state& state, + nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + auto const has_nulls = cudf::has_nested_nulls(left_input) || cudf::has_nested_nulls(right_input) + ? cudf::nullable_join::YES + : cudf::nullable_join::NO; + cudf::hash_join hj_obj(left_input, has_nulls, compare_nulls, stream); + return hj_obj.left_join(right_input, std::nullopt, stream); + }; + + BM_join(state, join); +} + +template +void nvbench_full_join(nvbench::state& state, + nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + auto const has_nulls = cudf::has_nested_nulls(left_input) || cudf::has_nested_nulls(right_input) + ? cudf::nullable_join::YES + : cudf::nullable_join::NO; + cudf::hash_join hj_obj(left_input, has_nulls, compare_nulls, stream); + return hj_obj.full_join(right_input, std::nullopt, stream); + }; + + BM_join(state, join); +} + +// inner join ----------------------------------------------------------------------- +NVBENCH_BENCH_TYPES(nvbench_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("inner_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("inner_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("inner_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("inner_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +// left join ------------------------------------------------------------------------ +NVBENCH_BENCH_TYPES(nvbench_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("left_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("left_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("left_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("left_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +// full join ------------------------------------------------------------------------ +NVBENCH_BENCH_TYPES(nvbench_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("full_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("full_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("full_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("full_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp new file mode 100644 index 0000000..7d1b1c7 --- /dev/null +++ b/cpp/benchmarks/join/join_common.hpp @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "generate_input_tables.cuh" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +struct null75_generator { + thrust::minstd_rand engine; + thrust::uniform_int_distribution rand_gen; + null75_generator() : engine(), rand_gen() {} + __device__ bool operator()(size_t i) + { + engine.discard(i); + // roughly 75% nulls + return (rand_gen(engine) & 3) == 0; + } +}; + +enum class join_t { CONDITIONAL, MIXED, HASH }; + +inline void skip_helper(nvbench::state& state) +{ + auto const build_table_size = state.get_int64("Build Table Size"); + auto const probe_table_size = state.get_int64("Probe Table Size"); + + if (build_table_size > probe_table_size) { + state.skip("Large build tables are skipped."); + return; + } + + if (build_table_size * 100 <= probe_table_size) { + state.skip("Large probe tables are skipped."); + return; + } +} + +template +void BM_join(state_type& state, Join JoinFunc) +{ + auto const build_table_size = [&]() { + if constexpr (std::is_same_v) { + return static_cast(state.range(0)); + } + if constexpr (std::is_same_v) { + return static_cast(state.get_int64("Build Table Size")); + } + }(); + auto const probe_table_size = [&]() { + if constexpr (std::is_same_v) { + return static_cast(state.range(1)); + } + if constexpr (std::is_same_v) { + return static_cast(state.get_int64("Probe Table Size")); + } + }(); + + double const selectivity = 0.3; + int const multiplicity = 1; + + // Generate build and probe tables + auto build_random_null_mask = [](int size) { + // roughly 75% nulls + auto validity = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{}); + return cudf::detail::valid_if(validity, + validity + size, + thrust::identity{}, + cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + }; + + std::unique_ptr build_key_column0 = [&]() { + auto [null_mask, null_count] = build_random_null_mask(build_table_size); + return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + build_table_size, + std::move(null_mask), + null_count) + : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + build_table_size); + }(); + std::unique_ptr probe_key_column0 = [&]() { + auto [null_mask, null_count] = build_random_null_mask(probe_table_size); + return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + probe_table_size, + std::move(null_mask), + null_count) + : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + probe_table_size); + }(); + + generate_input_tables( + build_key_column0->mutable_view().data(), + build_table_size, + probe_key_column0->mutable_view().data(), + probe_table_size, + selectivity, + multiplicity); + + // Copy build_key_column0 and probe_key_column0 into new columns. + // If Nullable, the new columns will be assigned new nullmasks. + auto const build_key_column1 = [&]() { + auto col = std::make_unique(build_key_column0->view()); + if (Nullable) { + auto [null_mask, null_count] = build_random_null_mask(build_table_size); + col->set_null_mask(std::move(null_mask), null_count); + } + return col; + }(); + auto const probe_key_column1 = [&]() { + auto col = std::make_unique(probe_key_column0->view()); + if (Nullable) { + auto [null_mask, null_count] = build_random_null_mask(probe_table_size); + col->set_null_mask(std::move(null_mask), null_count); + } + return col; + }(); + + auto init = cudf::make_fixed_width_scalar(static_cast(0)); + auto build_payload_column = cudf::sequence(build_table_size, *init); + auto probe_payload_column = cudf::sequence(probe_table_size, *init); + + CUDF_CHECK_CUDA(0); + + cudf::table_view build_table( + {build_key_column0->view(), build_key_column1->view(), *build_payload_column}); + cudf::table_view probe_table( + {probe_key_column0->view(), probe_key_column1->view(), *probe_payload_column}); + + // Setup join parameters and result table + [[maybe_unused]] std::vector columns_to_join = {0}; + + // Benchmark the inner join operation + if constexpr (std::is_same_v and + (join_type != join_t::CONDITIONAL)) { + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + + auto result = JoinFunc(probe_table.select(columns_to_join), + build_table.select(columns_to_join), + cudf::null_equality::UNEQUAL); + } + } + if constexpr (std::is_same_v and (join_type != join_t::CONDITIONAL)) { + if constexpr (join_type == join_t::MIXED) { + auto const col_ref_left_0 = cudf::ast::column_reference(0); + auto const col_ref_right_0 = + cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT); + auto left_zero_eq_right_zero = + cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + auto result = JoinFunc(probe_table.select(columns_to_join), + build_table.select(columns_to_join), + probe_table.select({1}), + build_table.select({1}), + left_zero_eq_right_zero, + cudf::null_equality::UNEQUAL, + stream_view); + }); + } + if constexpr (join_type == join_t::HASH) { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + auto result = JoinFunc(probe_table.select(columns_to_join), + build_table.select(columns_to_join), + cudf::null_equality::UNEQUAL, + stream_view); + }); + } + } + + // Benchmark conditional join + if constexpr (std::is_same_v and join_type == join_t::CONDITIONAL) { + // Common column references. + auto const col_ref_left_0 = cudf::ast::column_reference(0); + auto const col_ref_right_0 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT); + auto left_zero_eq_right_zero = + cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + + auto result = + JoinFunc(probe_table, build_table, left_zero_eq_right_zero, cudf::null_equality::UNEQUAL); + } + } +} diff --git a/cpp/benchmarks/join/left_join.cu b/cpp/benchmarks/join/left_join.cu new file mode 100644 index 0000000..96bbd1b --- /dev/null +++ b/cpp/benchmarks/join/left_join.cu @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +template +class Join : public cudf::benchmark {}; + +#define LEFT_ANTI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_anti_join(left, right, compare_nulls); \ + }; \ + BM_join(st, join); \ + } + +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit, int32_t, int32_t, false); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit, int64_t, int64_t, false); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_32bit_nulls, int32_t, int32_t, true); +LEFT_ANTI_JOIN_BENCHMARK_DEFINE(left_anti_join_64bit_nulls, int64_t, int64_t, true); + +#define LEFT_SEMI_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) \ + { \ + auto join = [](cudf::table_view const& left, \ + cudf::table_view const& right, \ + cudf::null_equality compare_nulls) { \ + return cudf::left_semi_join(left, right, compare_nulls); \ + }; \ + BM_join(st, join); \ + } + +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit, int32_t, int32_t, false); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit, int64_t, int64_t, false); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_32bit_nulls, int32_t, int32_t, true); +LEFT_SEMI_JOIN_BENCHMARK_DEFINE(left_semi_join_64bit_nulls, int64_t, int64_t, true); + +// left anti-join ------------------------------------------------------------- +BENCHMARK_REGISTER_F(Join, left_anti_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_anti_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_anti_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_anti_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); + +// left semi-join ------------------------------------------------------------- +BENCHMARK_REGISTER_F(Join, left_semi_join_32bit) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_semi_join_64bit) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_semi_join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, left_semi_join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu new file mode 100644 index 0000000..67be464 --- /dev/null +++ b/cpp/benchmarks/join/mixed_join.cu @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +template +void nvbench_mixed_inner_join( + nvbench::state& state, nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_equality_input, + cudf::table_view const& right_equality_input, + cudf::table_view const& left_conditional_input, + cudf::table_view const& right_conditional_input, + cudf::ast::operation binary_pred, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + return cudf::mixed_inner_join(left_equality_input, + right_equality_input, + left_conditional_input, + right_conditional_input, + binary_pred, + compare_nulls); + }; + + BM_join(state, join); +} + +template +void nvbench_mixed_left_join( + nvbench::state& state, nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_equality_input, + cudf::table_view const& right_equality_input, + cudf::table_view const& left_conditional_input, + cudf::table_view const& right_conditional_input, + cudf::ast::operation binary_pred, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + return cudf::mixed_left_join(left_equality_input, + right_equality_input, + left_conditional_input, + right_conditional_input, + binary_pred, + compare_nulls); + }; + + BM_join(state, join); +} + +template +void nvbench_mixed_full_join( + nvbench::state& state, nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_equality_input, + cudf::table_view const& right_equality_input, + cudf::table_view const& left_conditional_input, + cudf::table_view const& right_conditional_input, + cudf::ast::operation binary_pred, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + return cudf::mixed_full_join(left_equality_input, + right_equality_input, + left_conditional_input, + right_conditional_input, + binary_pred, + compare_nulls); + }; + + BM_join(state, join); +} + +template +void nvbench_mixed_left_semi_join( + nvbench::state& state, nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_equality_input, + cudf::table_view const& right_equality_input, + cudf::table_view const& left_conditional_input, + cudf::table_view const& right_conditional_input, + cudf::ast::operation binary_pred, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + return cudf::mixed_left_semi_join(left_equality_input, + right_equality_input, + left_conditional_input, + right_conditional_input, + binary_pred, + compare_nulls); + }; + + BM_join(state, join); +} + +template +void nvbench_mixed_left_anti_join( + nvbench::state& state, nvbench::type_list>) +{ + skip_helper(state); + + auto join = [](cudf::table_view const& left_equality_input, + cudf::table_view const& right_equality_input, + cudf::table_view const& left_conditional_input, + cudf::table_view const& right_conditional_input, + cudf::ast::operation binary_pred, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) { + return cudf::mixed_left_anti_join(left_equality_input, + right_equality_input, + left_conditional_input, + right_conditional_input, + binary_pred, + compare_nulls); + }; + + BM_join(state, join); +} + +// inner join ----------------------------------------------------------------------- +NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_inner_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_inner_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_inner_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_inner_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +// left join ------------------------------------------------------------------------ +NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +// full join ------------------------------------------------------------------------ +NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_full_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_full_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_full_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_full_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +// left semi join ------------------------------------------------------------------------ +NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_semi_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_semi_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_semi_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_semi_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +// left anti join ------------------------------------------------------------------------ +NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_anti_join_32bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_anti_join_64bit") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_anti_join_32bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {100'000, 10'000'000, 80'000'000, 100'000'000}) + .add_int64_axis("Probe Table Size", + {100'000, 400'000, 10'000'000, 40'000'000, 100'000'000, 240'000'000}); + +NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, + NVBENCH_TYPE_AXES(nvbench::type_list, + nvbench::type_list, + nvbench::enum_type_list)) + .set_name("mixed_left_anti_join_64bit_nulls") + .set_type_axes_names({"Key Type", "Payload Type", "Nullable"}) + .add_int64_axis("Build Table Size", {40'000'000, 50'000'000}) + .add_int64_axis("Probe Table Size", {50'000'000, 120'000'000}); diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu new file mode 100644 index 0000000..dbc3234 --- /dev/null +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +class ScatterLists : public cudf::benchmark {}; + +template +void BM_lists_scatter(::benchmark::State& state) +{ + auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); + + cudf::size_type const base_size{(cudf::size_type)state.range(0)}; + cudf::size_type const num_elements_per_row{(cudf::size_type)state.range(1)}; + auto const num_rows = (cudf::size_type)ceil(double(base_size) / num_elements_per_row); + + auto source_base_col = make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + base_size, + cudf::mask_state::UNALLOCATED, + stream, + mr); + auto target_base_col = make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + base_size, + cudf::mask_state::UNALLOCATED, + stream, + mr); + thrust::sequence(rmm::exec_policy(stream), + source_base_col->mutable_view().begin(), + source_base_col->mutable_view().end()); + thrust::sequence(rmm::exec_policy(stream), + target_base_col->mutable_view().begin(), + target_base_col->mutable_view().end()); + + auto source_offsets = + make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + num_rows + 1, + cudf::mask_state::UNALLOCATED, + stream, + mr); + auto target_offsets = + make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + num_rows + 1, + cudf::mask_state::UNALLOCATED, + stream, + mr); + + thrust::sequence(rmm::exec_policy(stream), + source_offsets->mutable_view().begin(), + source_offsets->mutable_view().end(), + 0, + num_elements_per_row); + thrust::sequence(rmm::exec_policy(stream), + target_offsets->mutable_view().begin(), + target_offsets->mutable_view().end(), + 0, + num_elements_per_row); + + auto source = make_lists_column(num_rows, + std::move(source_offsets), + std::move(source_base_col), + 0, + cudf::create_null_mask(num_rows, cudf::mask_state::UNALLOCATED), + stream, + mr); + auto target = make_lists_column(num_rows, + std::move(target_offsets), + std::move(target_base_col), + 0, + cudf::create_null_mask(num_rows, cudf::mask_state::UNALLOCATED), + stream, + mr); + + auto scatter_map = make_fixed_width_column(cudf::data_type{cudf::type_to_id()}, + num_rows, + cudf::mask_state::UNALLOCATED, + stream, + mr); + auto m_scatter_map = scatter_map->mutable_view(); + thrust::sequence(rmm::exec_policy(stream), + m_scatter_map.begin(), + m_scatter_map.end(), + num_rows - 1, + -1); + + if (not coalesce) { + thrust::default_random_engine g; + thrust::shuffle(rmm::exec_policy(stream), + m_scatter_map.begin(), + m_scatter_map.begin(), + g); + } + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + scatter(cudf::table_view{{*source}}, + *scatter_map, + cudf::table_view{{*target}}, + cudf::get_default_stream(), + mr); + } + + state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * 2 * + sizeof(TypeParam)); +} + +#define SBM_BENCHMARK_DEFINE(name, type, coalesce) \ + BENCHMARK_DEFINE_F(ScatterLists, name)(::benchmark::State & state) \ + { \ + BM_lists_scatter(state); \ + } \ + BENCHMARK_REGISTER_F(ScatterLists, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 25}, {64, 2048}}) /* 1K-1B rows, 64-2048 elements */ \ + ->UseManualTime(); + +SBM_BENCHMARK_DEFINE(double_type_colesce_o, double, true); +SBM_BENCHMARK_DEFINE(double_type_colesce_x, double, false); diff --git a/cpp/benchmarks/lists/set_operations.cpp b/cpp/benchmarks/lists/set_operations.cpp new file mode 100644 index 0000000..5b24092 --- /dev/null +++ b/cpp/benchmarks/lists/set_operations.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +namespace { + +constexpr auto max_list_size = 20; + +auto generate_random_lists(cudf::size_type num_rows, cudf::size_type depth, double null_freq) +{ + auto builder = + data_profile_builder() + .cardinality(0) + .distribution(cudf::type_id::LIST, distribution_id::UNIFORM, 0, max_list_size) + .list_depth(depth) + .null_probability(null_freq > 0 ? std::optional{null_freq} : std::nullopt); + + auto data_table = + create_random_table({cudf::type_id::LIST}, row_count{num_rows}, data_profile{builder}); + return std::move(data_table->release().front()); +} + +template +void nvbench_set_op(nvbench::state& state, BenchFuncPtr bfunc) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const depth = static_cast(state.get_int64("depth")); + auto const null_freq = state.get_float64("null_frequency"); + + auto const lhs = generate_random_lists(num_rows, depth, null_freq); + auto const rhs = generate_random_lists(num_rows, depth, null_freq); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + bfunc(cudf::lists_column_view{*lhs}, + cudf::lists_column_view{*rhs}, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + rmm::mr::get_current_device_resource()); + }); +} + +} // namespace + +void nvbench_have_overlap(nvbench::state& state) +{ + nvbench_set_op(state, &cudf::lists::have_overlap); +} + +void nvbench_intersect_distinct(nvbench::state& state) +{ + nvbench_set_op(state, &cudf::lists::intersect_distinct); +} + +NVBENCH_BENCH(nvbench_have_overlap) + .set_name("have_overlap") + .add_int64_power_of_two_axis("num_rows", {10, 13, 16}) + .add_int64_axis("depth", {1, 4}) + .add_float64_axis("null_frequency", {0, 0.2, 0.8}); + +NVBENCH_BENCH(nvbench_intersect_distinct) + .set_name("intersect_distinct") + .add_int64_power_of_two_axis("num_rows", {10, 13, 16}) + .add_int64_axis("depth", {1, 4}) + .add_float64_axis("null_frequency", {0, 0.2, 0.8}); diff --git a/cpp/benchmarks/merge/merge.cpp b/cpp/benchmarks/merge/merge.cpp new file mode 100644 index 0000000..2d2f4fd --- /dev/null +++ b/cpp/benchmarks/merge/merge.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +// to enable, run cmake with -DBUILD_BENCHMARKS=ON + +// Fixture that enables RMM pool mode +class Merge : public cudf::benchmark {}; + +using IntColWrap = cudf::test::fixed_width_column_wrapper; + +void BM_merge(benchmark::State& state) +{ + cudf::size_type const avg_rows = 1 << 19; // 512K rows + int const num_tables = state.range(0); + + // Content is irrelevant for the benchmark + auto data_sequence = thrust::make_constant_iterator(0); + + // Using 0 seed to ensure consistent pseudo-numbers on each run + std::mt19937 rand_gen(0); + // Gaussian distribution with 98% of elements are in range [0, avg_rows*2] + std::normal_distribution<> table_size_dist(avg_rows, avg_rows / 2); + // Used to generate a random monotonic sequence for each table key column + std::uniform_int_distribution<> key_dist(0, 10); + + std::vector> columns; + size_t total_rows = 0; + std::vector tables; + for (int i = 0; i < num_tables; ++i) { + cudf::size_type const rows = std::round(table_size_dist(rand_gen)); + // Ensure size in range [0, avg_rows*2] + auto const clamped_rows = std::clamp(rows, 0, avg_rows * 2); + + int32_t prev_key = 0; + auto key_sequence = cudf::detail::make_counting_transform_iterator(0, [&](auto row) { + prev_key += key_dist(rand_gen); + return prev_key; + }); + + columns.emplace_back( + std::pair{IntColWrap(key_sequence, key_sequence + clamped_rows), + IntColWrap(data_sequence, data_sequence + clamped_rows)}); + tables.push_back(cudf::table_view{{columns.back().first, columns.back().second}}); + total_rows += clamped_rows; + } + std::vector const key_cols{0}; + std::vector const column_order{cudf::order::ASCENDING}; + std::vector const null_precedence{}; + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + auto result = cudf::merge(tables, key_cols, column_order, null_precedence); + } + + state.SetBytesProcessed(state.iterations() * 2 * sizeof(int32_t) * total_rows); +} + +#define MBM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Merge, name)(::benchmark::State & state) { BM_merge(state); } \ + BENCHMARK_REGISTER_F(Merge, name) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime() \ + ->RangeMultiplier(2) \ + ->Ranges({{2, 128}}); + +MBM_BENCHMARK_DEFINE(pow2tables); diff --git a/cpp/benchmarks/null_mask/set_null_mask.cpp b/cpp/benchmarks/null_mask/set_null_mask.cpp new file mode 100644 index 0000000..4ac4c96 --- /dev/null +++ b/cpp/benchmarks/null_mask/set_null_mask.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +class SetNullmask : public cudf::benchmark {}; + +void BM_setnullmask(benchmark::State& state) +{ + cudf::size_type const size{(cudf::size_type)state.range(0)}; + rmm::device_buffer mask = cudf::create_null_mask(size, cudf::mask_state::UNINITIALIZED); + auto begin = 0, end = size; + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::set_null_mask(static_cast(mask.data()), begin, end, true); + } + + state.SetBytesProcessed(static_cast(state.iterations()) * size / 8); +} + +#define NBM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(SetNullmask, name)(::benchmark::State & state) { BM_setnullmask(state); } \ + BENCHMARK_REGISTER_F(SetNullmask, name) \ + ->RangeMultiplier(1 << 10) \ + ->Range(1 << 10, 1 << 30) \ + ->UseManualTime(); + +NBM_BENCHMARK_DEFINE(SetNullMaskKernel); diff --git a/cpp/benchmarks/quantiles/quantiles.cpp b/cpp/benchmarks/quantiles/quantiles.cpp new file mode 100644 index 0000000..24f9cc9 --- /dev/null +++ b/cpp/benchmarks/quantiles/quantiles.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include + +class Quantiles : public cudf::benchmark {}; + +static void BM_quantiles(benchmark::State& state, bool nulls) +{ + using Type = int; + + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + cudf::size_type const n_cols{(cudf::size_type)state.range(1)}; + cudf::size_type const n_quantiles{(cudf::size_type)state.range(2)}; + + // Create columns with values in the range [0,100) + data_profile profile = data_profile_builder().cardinality(0).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + profile.set_null_probability(nulls ? std::optional{0.01} + : std::nullopt); // 1% nulls or no null mask (<0) + + auto input_table = create_random_table( + cycle_dtypes({cudf::type_to_id()}, n_cols), row_count{n_rows}, profile); + auto input = cudf::table_view(*input_table); + + std::vector q(n_quantiles); + thrust::tabulate( + thrust::seq, q.begin(), q.end(), [n_quantiles](auto i) { return i * (1.0f / n_quantiles); }); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + + auto result = cudf::quantiles(input, q); + // auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input); + } +} + +#define QUANTILES_BENCHMARK_DEFINE(name, nulls) \ + BENCHMARK_DEFINE_F(Quantiles, name) \ + (::benchmark::State & st) { BM_quantiles(st, nulls); } \ + BENCHMARK_REGISTER_F(Quantiles, name) \ + ->RangeMultiplier(4) \ + ->Ranges({{1 << 16, 1 << 26}, {1, 8}, {1, 12}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +QUANTILES_BENCHMARK_DEFINE(no_nulls, false) +QUANTILES_BENCHMARK_DEFINE(nulls, true) diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp new file mode 100644 index 0000000..8b1e71c --- /dev/null +++ b/cpp/benchmarks/reduction/anyall.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +class Reduction : public cudf::benchmark {}; + +template +void BM_reduction_anyall(benchmark::State& state, + std::unique_ptr const& agg) +{ + cudf::size_type const column_size{static_cast(state.range(0))}; + auto const dtype = cudf::type_to_id(); + data_profile const profile = data_profile_builder().no_validity().distribution( + dtype, distribution_id::UNIFORM, 0, agg->kind == cudf::aggregation::ANY ? 0 : 100); + auto const values = create_random_column(dtype, row_count{column_size}, profile); + + cudf::data_type output_dtype{cudf::type_id::BOOL8}; + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::reduce(*values, *agg, output_dtype); + } +} + +#define concat(a, b, c) a##b##c +#define get_agg(op) concat(cudf::make_, op, _aggregation()) + +// TYPE, OP +#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ + BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) \ + { \ + BM_reduction_anyall(state, get_agg(aggregation)); \ + } \ + BENCHMARK_REGISTER_F(Reduction, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ + RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) + +REDUCE_BENCHMARK_DEFINE(bool, all); +REDUCE_BENCHMARK_DEFINE(int8_t, all); +REDUCE_BENCHMARK_DEFINE(int32_t, all); +REDUCE_BENCHMARK_DEFINE(float, all); +REDUCE_BENCHMARK_DEFINE(bool, any); +REDUCE_BENCHMARK_DEFINE(int8_t, any); +REDUCE_BENCHMARK_DEFINE(int32_t, any); +REDUCE_BENCHMARK_DEFINE(float, any); diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp new file mode 100644 index 0000000..c1c44c9 --- /dev/null +++ b/cpp/benchmarks/reduction/dictionary.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +class ReductionDictionary : public cudf::benchmark {}; + +template +void BM_reduction_dictionary(benchmark::State& state, + std::unique_ptr const& agg) +{ + cudf::size_type const column_size{static_cast(state.range(0))}; + + // int column and encoded dictionary column + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_to_id(), + distribution_id::UNIFORM, + (agg->kind == cudf::aggregation::ALL ? 1 : 0), + (agg->kind == cudf::aggregation::ANY ? 0 : 100)); + auto int_column = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); + auto number_col = cudf::cast(*int_column, cudf::data_type{cudf::type_to_id()}); + auto values = cudf::dictionary::encode(*number_col); + + cudf::data_type output_dtype = [&] { + if (agg->kind == cudf::aggregation::ANY || agg->kind == cudf::aggregation::ALL) + return cudf::data_type{cudf::type_id::BOOL8}; + if (agg->kind == cudf::aggregation::MEAN) return cudf::data_type{cudf::type_id::FLOAT64}; + return cudf::data_type{cudf::type_to_id()}; + }(); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::reduce(*values, *agg, output_dtype); + } +} + +#define concat(a, b, c) a##b##c +#define get_agg(op) concat(cudf::make_, op, _aggregation()) + +// TYPE, OP +#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ + BENCHMARK_DEFINE_F(ReductionDictionary, name)(::benchmark::State & state) \ + { \ + BM_reduction_dictionary(state, get_agg(aggregation)); \ + } \ + BENCHMARK_REGISTER_F(ReductionDictionary, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ + RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) + +REDUCE_BENCHMARK_DEFINE(int32_t, all); +REDUCE_BENCHMARK_DEFINE(float, all); +REDUCE_BENCHMARK_DEFINE(int32_t, any); +REDUCE_BENCHMARK_DEFINE(float, any); +REDUCE_BENCHMARK_DEFINE(int32_t, min); +REDUCE_BENCHMARK_DEFINE(float, min); +REDUCE_BENCHMARK_DEFINE(int32_t, max); +REDUCE_BENCHMARK_DEFINE(float, max); +REDUCE_BENCHMARK_DEFINE(int32_t, mean); +REDUCE_BENCHMARK_DEFINE(float, mean); diff --git a/cpp/benchmarks/reduction/minmax.cpp b/cpp/benchmarks/reduction/minmax.cpp new file mode 100644 index 0000000..963c266 --- /dev/null +++ b/cpp/benchmarks/reduction/minmax.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +class Reduction : public cudf::benchmark {}; + +template +void BM_reduction(benchmark::State& state) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const input_column = + create_random_column(dtype, row_count{column_size}, data_profile_builder().no_validity()); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::minmax(*input_column); + } +} + +#define concat(a, b, c) a##b##c +#define get_agg(op) concat(cudf::make_, op, _aggregation()) + +// TYPE, OP +#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ + BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) { BM_reduction(state); } \ + BENCHMARK_REGISTER_F(Reduction, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ + RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) + +REDUCE_BENCHMARK_DEFINE(bool, minmax); +REDUCE_BENCHMARK_DEFINE(int8_t, minmax); +REDUCE_BENCHMARK_DEFINE(int32_t, minmax); +using cudf::timestamp_ms; +REDUCE_BENCHMARK_DEFINE(timestamp_ms, minmax); +REDUCE_BENCHMARK_DEFINE(float, minmax); diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp new file mode 100644 index 0000000..e55f3b9 --- /dev/null +++ b/cpp/benchmarks/reduction/rank.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +template +static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list) +{ + auto const dtype = cudf::type_to_id(); + + double const null_probability = state.get_float64("null_probability"); + size_t const size = state.get_int64("data_size"); + + data_profile const profile = data_profile_builder() + .null_probability(null_probability) + .distribution(dtype, distribution_id::UNIFORM, 0, 5); + + auto const table = create_random_table({dtype}, table_size_bytes{size / 2}, profile); + + auto const new_tbl = cudf::repeat(table->view(), 2); + cudf::column_view input(new_tbl->view().column(0)); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + auto result = cudf::detail::inclusive_dense_rank_scan( + input, stream_view, rmm::mr::get_current_device_resource()); + }); +} + +using data_type = nvbench::type_list; + +NVBENCH_BENCH_TYPES(nvbench_reduction_scan, NVBENCH_TYPE_AXES(data_type)) + .set_name("rank_scan") + .add_float64_axis("null_probability", {0, 0.1, 0.5, 0.9}) + .add_int64_axis("data_size", + { + 10000, // 10k + 100000, // 100k + 1000000, // 1M + 10000000, // 10M + 100000000, // 100M + }); diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp new file mode 100644 index 0000000..5bd3e2e --- /dev/null +++ b/cpp/benchmarks/reduction/reduce.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +class Reduction : public cudf::benchmark {}; + +template +void BM_reduction(benchmark::State& state, std::unique_ptr const& agg) +{ + cudf::size_type const column_size{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + data_profile const profile = + data_profile_builder().no_validity().distribution(dtype, distribution_id::UNIFORM, 0, 100); + auto const input_column = create_random_column(dtype, row_count{column_size}, profile); + + cudf::data_type output_dtype = + (agg->kind == cudf::aggregation::MEAN || agg->kind == cudf::aggregation::VARIANCE || + agg->kind == cudf::aggregation::STD) + ? cudf::data_type{cudf::type_id::FLOAT64} + : input_column->type(); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::reduce(*input_column, *agg, output_dtype); + } +} + +#define concat(a, b, c) a##b##c +#define get_agg(op) concat(cudf::make_, op, _aggregation()) + +// TYPE, OP +#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ + BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) \ + { \ + BM_reduction(state, get_agg(aggregation)); \ + } \ + BENCHMARK_REGISTER_F(Reduction, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ + RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) + +#define REDUCE_BENCHMARK_NUMERIC(aggregation) \ + REDUCE_BENCHMARK_DEFINE(bool, aggregation); \ + REDUCE_BENCHMARK_DEFINE(int8_t, aggregation); \ + REDUCE_BENCHMARK_DEFINE(int32_t, aggregation); \ + REDUCE_BENCHMARK_DEFINE(int64_t, aggregation); \ + REDUCE_BENCHMARK_DEFINE(float, aggregation); \ + REDUCE_BENCHMARK_DEFINE(double, aggregation); + +REDUCE_BENCHMARK_NUMERIC(sum); +REDUCE_BENCHMARK_DEFINE(int32_t, product); +REDUCE_BENCHMARK_DEFINE(float, product); +REDUCE_BENCHMARK_DEFINE(int64_t, min); +REDUCE_BENCHMARK_DEFINE(double, min); +using cudf::timestamp_ms; +REDUCE_BENCHMARK_DEFINE(timestamp_ms, min); +REDUCE_BENCHMARK_DEFINE(int8_t, mean); +REDUCE_BENCHMARK_DEFINE(float, mean); +REDUCE_BENCHMARK_DEFINE(int32_t, variance); +REDUCE_BENCHMARK_DEFINE(double, variance); +REDUCE_BENCHMARK_DEFINE(int64_t, std); +REDUCE_BENCHMARK_DEFINE(float, std); diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp new file mode 100644 index 0000000..8c9883e --- /dev/null +++ b/cpp/benchmarks/reduction/scan.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +class ReductionScan : public cudf::benchmark {}; + +template +static void BM_reduction_scan(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const column = create_random_column(dtype, row_count{n_rows}); + if (!include_nulls) column->set_null_mask(rmm::device_buffer{}, 0); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::scan( + *column, *cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); + } +} + +#define SCAN_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReductionScan, name) \ + (::benchmark::State & state) { BM_reduction_scan(state, nulls); } \ + BENCHMARK_REGISTER_F(ReductionScan, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); +SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false); +SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false); +SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false); +SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true); +SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true); +SCAN_BENCHMARK_DEFINE(double_nulls, double, true); diff --git a/cpp/benchmarks/reduction/scan_structs.cpp b/cpp/benchmarks/reduction/scan_structs.cpp new file mode 100644 index 0000000..ee97b54 --- /dev/null +++ b/cpp/benchmarks/reduction/scan_structs.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +static constexpr cudf::size_type num_struct_members = 8; +static constexpr cudf::size_type max_int = 100; +static constexpr cudf::size_type max_str_length = 32; + +static void nvbench_structs_scan(nvbench::state& state) +{ + auto const null_probability = [&] { + auto const null_prob_val = state.get_float64("null_probability"); + return null_prob_val > 0 ? std::optional{null_prob_val} : std::nullopt; + }(); + auto const size = static_cast(state.get_int64("data_size")); + auto const profile = static_cast( + data_profile_builder() + .null_probability(null_probability) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, max_int) + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length)); + + auto data_table = create_random_table( + cycle_dtypes({cudf::type_id::INT32, cudf::type_id::STRING}, num_struct_members), + row_count{size}, + profile); + auto [null_mask, null_count] = create_random_null_mask(size, null_probability); + auto const input = cudf::make_structs_column( + size, std::move(data_table->release()), null_count, std::move(null_mask)); + + auto const agg = cudf::make_min_aggregation(); + auto const null_policy = static_cast(state.get_int64("null_policy")); + auto const stream = cudf::get_default_stream(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto const result = cudf::detail::scan_inclusive( + *input, *agg, null_policy, stream, rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH(nvbench_structs_scan) + .set_name("structs_scan") + .add_float64_axis("null_probability", {0, 0.1, 0.5, 0.9}) + .add_int64_axis("null_policy", {0, 1}) + .add_int64_axis("data_size", + { + 10000, // 10k + 100000, // 100k + 1000000, // 1M + 10000000, // 10M + }); diff --git a/cpp/benchmarks/reduction/segmented_reduce.cpp b/cpp/benchmarks/reduction/segmented_reduce.cpp new file mode 100644 index 0000000..7accb82 --- /dev/null +++ b/cpp/benchmarks/reduction/segmented_reduce.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +bool constexpr is_boolean_output_agg(cudf::segmented_reduce_aggregation::Kind kind) +{ + return kind == cudf::segmented_reduce_aggregation::ALL || + kind == cudf::segmented_reduce_aggregation::ANY; +} + +bool constexpr is_float_output_agg(cudf::segmented_reduce_aggregation::Kind kind) +{ + return kind == cudf::segmented_reduce_aggregation::MEAN || + kind == cudf::segmented_reduce_aggregation::VARIANCE || + kind == cudf::segmented_reduce_aggregation::STD; +} + +template +std::unique_ptr make_reduce_aggregation() +{ + switch (kind) { + case cudf::segmented_reduce_aggregation::SUM: + return cudf::make_sum_aggregation(); + case cudf::segmented_reduce_aggregation::PRODUCT: + return cudf::make_product_aggregation(); + case cudf::segmented_reduce_aggregation::MIN: + return cudf::make_min_aggregation(); + case cudf::segmented_reduce_aggregation::MAX: + return cudf::make_max_aggregation(); + case cudf::segmented_reduce_aggregation::ALL: + return cudf::make_all_aggregation(); + case cudf::segmented_reduce_aggregation::ANY: + return cudf::make_any_aggregation(); + case cudf::segmented_reduce_aggregation::SUM_OF_SQUARES: + return cudf::make_sum_of_squares_aggregation(); + case cudf::segmented_reduce_aggregation::MEAN: + return cudf::make_mean_aggregation(); + case cudf::segmented_reduce_aggregation::VARIANCE: + return cudf::make_variance_aggregation(); + case cudf::segmented_reduce_aggregation::STD: + return cudf::make_std_aggregation(); + case cudf::segmented_reduce_aggregation::NUNIQUE: + return cudf::make_nunique_aggregation(); + default: CUDF_FAIL("Unsupported segmented reduce aggregation in this benchmark"); + } +} + +template +std::pair, std::unique_ptr> make_test_data( + nvbench::state& state) +{ + auto const column_size{cudf::size_type(state.get_int64("column_size"))}; + auto const num_segments{cudf::size_type(state.get_int64("num_segments"))}; + + auto segment_length = column_size / num_segments; + + auto const dtype = cudf::type_to_id(); + data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution( + dtype, distribution_id::UNIFORM, 0, 100); + auto input = create_random_column(dtype, row_count{column_size}, profile); + + auto offsets = cudf::sequence(num_segments + 1, + cudf::numeric_scalar(0), + cudf::numeric_scalar(segment_length)); + return std::pair(std::move(input), std::move(offsets)); +} + +template +void BM_Segmented_Reduction(nvbench::state& state, + nvbench::type_list>) +{ + auto const column_size{cudf::size_type(state.get_int64("column_size"))}; + auto const num_segments{cudf::size_type(state.get_int64("num_segments"))}; + + auto [input, offsets] = make_test_data(state); + auto agg = make_reduce_aggregation(); + + auto const output_type = [] { + if (is_boolean_output_agg(kind)) { return cudf::data_type{cudf::type_id::BOOL8}; } + if (is_float_output_agg(kind)) { return cudf::data_type{cudf::type_id::FLOAT64}; } + if (kind == cudf::segmented_reduce_aggregation::NUNIQUE) { + return cudf::data_type{cudf::type_to_id()}; + } + return cudf::data_type{cudf::type_to_id()}; + }(); + + state.add_element_count(column_size); + state.add_global_memory_reads(column_size); + if (is_boolean_output_agg(kind)) { + state.add_global_memory_writes(num_segments); // BOOL8 + } else { + state.add_global_memory_writes(num_segments); + } + + auto const input_view = input->view(); + auto const offsets_view = offsets->view(); + auto const offset_span = cudf::device_span{ + offsets_view.template data(), static_cast(offsets_view.size())}; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::sync, [input_view, output_type, offset_span, &agg](nvbench::launch& launch) { + segmented_reduce(input_view, offset_span, *agg, output_type, cudf::null_policy::INCLUDE); + }); +} + +using Types = nvbench::type_list; +// Skip benchmarking MAX/ANY since they are covered by MIN/ALL respectively. +// Also VARIANCE includes STD calculation. +using AggKinds = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(BM_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, AggKinds)) + .set_name("segmented_reduction") + .set_type_axes_names({"DataType", "AggregationKinds"}) + .add_int64_axis("column_size", {100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("num_segments", {1'000, 10'000, 100'000}); diff --git a/cpp/benchmarks/replace/clamp.cpp b/cpp/benchmarks/replace/clamp.cpp new file mode 100644 index 0000000..601c65a --- /dev/null +++ b/cpp/benchmarks/replace/clamp.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class ReplaceClamp : public cudf::benchmark {}; + +template +static void BM_clamp(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const input = create_random_column(dtype, row_count{n_rows}); + if (!include_nulls) input->set_null_mask(rmm::device_buffer{}, 0); + + auto [low_scalar, high_scalar] = cudf::minmax(*input); + + // set the clamps 2 in from the min and max + { + using ScalarType = cudf::scalar_type_t; + auto lvalue = static_cast(low_scalar.get()); + auto hvalue = static_cast(high_scalar.get()); + + // super heavy clamp + auto mid = lvalue->value() + (hvalue->value() - lvalue->value()) / 2; + lvalue->set_value(mid - 10); + hvalue->set_value(mid + 10); + } + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::clamp(*input, *low_scalar, *high_scalar); + } +} + +#define CLAMP_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReplaceClamp, name) \ + (::benchmark::State & state) { BM_clamp(state, nulls); } \ + BENCHMARK_REGISTER_F(ReplaceClamp, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +CLAMP_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); +CLAMP_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false); +CLAMP_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false); +CLAMP_BENCHMARK_DEFINE(float_no_nulls, float, false); +CLAMP_BENCHMARK_DEFINE(int16_nulls, int16_t, true); +CLAMP_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true); +CLAMP_BENCHMARK_DEFINE(double_nulls, double, true); diff --git a/cpp/benchmarks/replace/nans.cpp b/cpp/benchmarks/replace/nans.cpp new file mode 100644 index 0000000..d33de36 --- /dev/null +++ b/cpp/benchmarks/replace/nans.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class ReplaceNans : public cudf::benchmark {}; + +template +static void BM_replace_nans(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const input = create_random_column(dtype, row_count{n_rows}); + if (!include_nulls) input->set_null_mask(rmm::device_buffer{}, 0); + + auto zero = cudf::make_fixed_width_scalar(0); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::replace_nans(*input, *zero); + } +} + +#define NANS_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReplaceNans, name) \ + (::benchmark::State & state) { BM_replace_nans(state, nulls); } \ + BENCHMARK_REGISTER_F(ReplaceNans, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +NANS_BENCHMARK_DEFINE(float32_nulls, float, true); +NANS_BENCHMARK_DEFINE(float64_nulls, double, true); +NANS_BENCHMARK_DEFINE(float32_no_nulls, float, false); +NANS_BENCHMARK_DEFINE(float64_no_nulls, double, false); diff --git a/cpp/benchmarks/search/contains_scalar.cpp b/cpp/benchmarks/search/contains_scalar.cpp new file mode 100644 index 0000000..8d3c3f5 --- /dev/null +++ b/cpp/benchmarks/search/contains_scalar.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +namespace { +template +std::unique_ptr create_column_data(cudf::size_type n_rows, bool has_nulls = false) +{ + data_profile profile = data_profile_builder().cardinality(0).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 1000); + profile.set_null_probability(has_nulls ? std::optional{0.1} : std::nullopt); + + return create_random_column(cudf::type_to_id(), row_count{n_rows}, profile); +} + +} // namespace + +static void nvbench_contains_scalar(nvbench::state& state) +{ + using Type = int; + + auto const has_nulls = static_cast(state.get_int64("has_nulls")); + auto const size = state.get_int64("data_size"); + + auto const haystack = create_column_data(size, has_nulls); + auto const needle = cudf::make_fixed_width_scalar(size / 2); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto const stream_view = rmm::cuda_stream_view{launch.get_stream()}; + [[maybe_unused]] auto const result = cudf::detail::contains(*haystack, *needle, stream_view); + }); +} + +NVBENCH_BENCH(nvbench_contains_scalar) + .set_name("contains_scalar") + .add_int64_power_of_two_axis("data_size", {10, 12, 14, 16, 18, 20, 22, 24, 26}) + .add_int64_axis("has_nulls", {0, 1}); diff --git a/cpp/benchmarks/search/contains_table.cpp b/cpp/benchmarks/search/contains_table.cpp new file mode 100644 index 0000000..17702d0 --- /dev/null +++ b/cpp/benchmarks/search/contains_table.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +#include + +auto constexpr num_unique_elements = 1000; + +template +static void nvbench_contains_table(nvbench::state& state, nvbench::type_list) +{ + auto const size = state.get_int64("table_size"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_unique_elements) + .list_depth(1); + } else { + builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements); + } + + auto const haystack = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + auto const needles = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 1); + + auto mem_stats_logger = cudf::memory_stats_logger(); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto const stream_view = rmm::cuda_stream_view{launch.get_stream()}; + [[maybe_unused]] auto const result = + cudf::detail::contains(haystack->view(), + needles->view(), + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + stream_view, + rmm::mr::get_current_device_resource()); + }); + + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); +} + +NVBENCH_BENCH_TYPES(nvbench_contains_table, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("contains_table") + .set_type_axes_names({"type"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000}); diff --git a/cpp/benchmarks/search/search.cpp b/cpp/benchmarks/search/search.cpp new file mode 100644 index 0000000..68bfa26 --- /dev/null +++ b/cpp/benchmarks/search/search.cpp @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +class Search : public cudf::benchmark {}; + +void BM_column(benchmark::State& state, bool nulls) +{ + auto const column_size{static_cast(state.range(0))}; + auto const values_size = column_size; + + auto init_data = cudf::make_fixed_width_scalar(static_cast(0)); + auto init_value = cudf::make_fixed_width_scalar(static_cast(values_size)); + auto step = cudf::make_fixed_width_scalar(static_cast(-1)); + auto column = cudf::sequence(column_size, *init_data); + auto values = cudf::sequence(values_size, *init_value, *step); + if (nulls) { + auto [column_null_mask, column_null_count] = create_random_null_mask(column->size(), 0.1, 1); + column->set_null_mask(std::move(column_null_mask), column_null_count); + auto [values_null_mask, values_null_count] = create_random_null_mask(values->size(), 0.1, 2); + values->set_null_mask(std::move(values_null_mask), values_null_count); + } + + auto data_table = cudf::sort(cudf::table_view({*column})); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto col = cudf::upper_bound(data_table->view(), + cudf::table_view({*values}), + {cudf::order::ASCENDING}, + {cudf::null_order::BEFORE}); + } +} + +BENCHMARK_DEFINE_F(Search, Column_AllValid)(::benchmark::State& state) { BM_column(state, false); } +BENCHMARK_DEFINE_F(Search, Column_Nulls)(::benchmark::State& state) { BM_column(state, true); } + +BENCHMARK_REGISTER_F(Search, Column_AllValid) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(100000000); + +BENCHMARK_REGISTER_F(Search, Column_Nulls) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Arg(100000000); + +void BM_table(benchmark::State& state) +{ + using Type = float; + + auto const num_columns{static_cast(state.range(0))}; + auto const column_size{static_cast(state.range(1))}; + auto const values_size = column_size; + + data_profile profile = data_profile_builder().cardinality(0).null_probability(0.1).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto data_table = create_random_table( + cycle_dtypes({cudf::type_to_id()}, num_columns), row_count{column_size}, profile); + auto values_table = create_random_table( + cycle_dtypes({cudf::type_to_id()}, num_columns), row_count{values_size}, profile); + + std::vector orders(num_columns, cudf::order::ASCENDING); + std::vector null_orders(num_columns, cudf::null_order::BEFORE); + auto sorted = cudf::sort(*data_table); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto col = cudf::lower_bound(sorted->view(), *values_table, orders, null_orders); + } +} + +BENCHMARK_DEFINE_F(Search, Table)(::benchmark::State& state) { BM_table(state); } + +static void CustomArguments(benchmark::internal::Benchmark* b) +{ + for (int num_cols = 1; num_cols <= 10; num_cols *= 2) + for (int col_size = 1000; col_size <= 100000000; col_size *= 10) + b->Args({num_cols, col_size}); +} + +BENCHMARK_REGISTER_F(Search, Table) + ->UseManualTime() + ->Unit(benchmark::kMillisecond) + ->Apply(CustomArguments); + +void BM_contains(benchmark::State& state, bool nulls) +{ + auto const column_size{static_cast(state.range(0))}; + auto const values_size = column_size; + + auto init_data = cudf::make_fixed_width_scalar(static_cast(0)); + auto init_value = cudf::make_fixed_width_scalar(static_cast(values_size)); + auto step = cudf::make_fixed_width_scalar(static_cast(-1)); + auto column = cudf::sequence(column_size, *init_data); + auto values = cudf::sequence(values_size, *init_value, *step); + if (nulls) { + auto [column_null_mask, column_null_count] = create_random_null_mask(column->size(), 0.1, 1); + column->set_null_mask(std::move(column_null_mask), column_null_count); + auto [values_null_mask, values_null_count] = create_random_null_mask(values->size(), 0.1, 2); + values->set_null_mask(std::move(values_null_mask), values_null_count); + } + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto col = cudf::contains(*column, *values); + } +} + +BENCHMARK_DEFINE_F(Search, ColumnContains_AllValid)(::benchmark::State& state) +{ + BM_contains(state, false); +} +BENCHMARK_DEFINE_F(Search, ColumnContains_Nulls)(::benchmark::State& state) +{ + BM_contains(state, true); +} + +BENCHMARK_REGISTER_F(Search, ColumnContains_AllValid) + ->RangeMultiplier(8) + ->Ranges({{1 << 10, 1 << 26}}) + ->UseManualTime() + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(Search, ColumnContains_Nulls) + ->RangeMultiplier(8) + ->Ranges({{1 << 10, 1 << 26}}) + ->UseManualTime() + ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/sort/nested_types_common.hpp b/cpp/benchmarks/sort/nested_types_common.hpp new file mode 100644 index 0000000..93853ba --- /dev/null +++ b/cpp/benchmarks/sort/nested_types_common.hpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +// This error appears in GCC 11.3 and may be a compiler bug or nvbench bug. +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#include +#pragma GCC diagnostic pop + +#include + +inline std::unique_ptr create_lists_data(nvbench::state& state, + cudf::size_type const num_columns = 1, + cudf::size_type const min_val = 0, + cudf::size_type const max_val = 5) +{ + size_t const size_bytes(state.get_int64("size_bytes")); + cudf::size_type const depth{static_cast(state.get_int64("depth"))}; + auto const null_frequency{state.get_float64("null_frequency")}; + + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::LIST, distribution_id::UNIFORM, min_val, max_val); + table_profile.set_list_depth(depth); + table_profile.set_null_probability(null_frequency); + return create_random_table(std::vector(num_columns, cudf::type_id::LIST), + table_size_bytes{size_bytes}, + table_profile); +} + +inline std::unique_ptr create_structs_data(nvbench::state& state, + cudf::size_type const n_cols = 1) +{ + using Type = int; + using column_wrapper = cudf::test::fixed_width_column_wrapper; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, 100); + + cudf::size_type const n_rows{static_cast(state.get_int64("NumRows"))}; + cudf::size_type const depth{static_cast(state.get_int64("Depth"))}; + bool const nulls{static_cast(state.get_int64("Nulls"))}; + + // Create columns with values in the range [0,100) + std::vector columns; + columns.reserve(n_cols); + std::generate_n(std::back_inserter(columns), n_cols, [&]() { + auto const elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto row) { return distribution(generator); }); + if (!nulls) return column_wrapper(elements, elements + n_rows); + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; }); + return column_wrapper(elements, elements + n_rows, valids); + }); + + std::vector> cols; + std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) { + return col.release(); + }); + + std::vector> child_cols = std::move(cols); + // Nest the child columns in a struct, then nest that struct column inside another + // struct column up to the desired depth + for (int i = 0; i < depth; i++) { + std::vector struct_validity; + std::uniform_int_distribution bool_distribution(0, 100 * (i + 1)); + std::generate_n( + std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); }); + cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity); + child_cols = std::vector>{}; + child_cols.push_back(struct_col.release()); + } + + // Create table view + return std::make_unique(std::move(child_cols)); +} diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp new file mode 100644 index 0000000..6231c70 --- /dev/null +++ b/cpp/benchmarks/sort/rank.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +class Rank : public cudf::benchmark {}; + +static void BM_rank(benchmark::State& state, bool nulls) +{ + using Type = int; + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + + // Create columns with values in the range [0,100) + data_profile profile = data_profile_builder().cardinality(0).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + profile.set_null_probability(nulls ? std::optional{0.2} : std::nullopt); + auto keys = create_random_column(cudf::type_to_id(), row_count{n_rows}, profile); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + + auto result = cudf::rank(keys->view(), + cudf::rank_method::FIRST, + cudf::order::ASCENDING, + nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE, + cudf::null_order::AFTER, + false); + } +} + +#define RANK_BENCHMARK_DEFINE(name, nulls) \ + BENCHMARK_DEFINE_F(Rank, name) \ + (::benchmark::State & st) { BM_rank(st, nulls); } \ + BENCHMARK_REGISTER_F(Rank, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +RANK_BENCHMARK_DEFINE(no_nulls, false) +RANK_BENCHMARK_DEFINE(nulls, true) diff --git a/cpp/benchmarks/sort/rank_lists.cpp b/cpp/benchmarks/sort/rank_lists.cpp new file mode 100644 index 0000000..49dc409 --- /dev/null +++ b/cpp/benchmarks/sort/rank_lists.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nested_types_common.hpp" +#include "rank_types_common.hpp" + +#include + +#include + +#include + +template +void nvbench_rank_lists(nvbench::state& state, nvbench::type_list>) +{ + auto const table = create_lists_data(state); + + auto const null_frequency{state.get_float64("null_frequency")}; + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::rank(table->view().column(0), + method, + cudf::order::ASCENDING, + null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE, + cudf::null_order::AFTER, + rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH_TYPES(nvbench_rank_lists, NVBENCH_TYPE_AXES(methods)) + .set_name("rank_lists") + .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28}) + .add_int64_axis("depth", {1, 4}) + .add_float64_axis("null_frequency", {0, 0.2}); diff --git a/cpp/benchmarks/sort/rank_structs.cpp b/cpp/benchmarks/sort/rank_structs.cpp new file mode 100644 index 0000000..85427e2 --- /dev/null +++ b/cpp/benchmarks/sort/rank_structs.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nested_types_common.hpp" +#include "rank_types_common.hpp" + +#include + +#include + +template +void nvbench_rank_structs(nvbench::state& state, nvbench::type_list>) +{ + auto const table = create_structs_data(state); + + bool const nulls{static_cast(state.get_int64("Nulls"))}; + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::rank(table->view().column(0), + method, + cudf::order::ASCENDING, + nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE, + cudf::null_order::AFTER, + rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH_TYPES(nvbench_rank_structs, NVBENCH_TYPE_AXES(methods)) + .set_name("rank_structs") + .add_int64_power_of_two_axis("NumRows", {10, 18, 26}) + .add_int64_axis("Depth", {0, 1, 8}) + .add_int64_axis("Nulls", {0, 1}); diff --git a/cpp/benchmarks/sort/rank_types_common.hpp b/cpp/benchmarks/sort/rank_types_common.hpp new file mode 100644 index 0000000..adb5860 --- /dev/null +++ b/cpp/benchmarks/sort/rank_types_common.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +enum class rank_method : int32_t {}; + +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + cudf::rank_method, + [](cudf::rank_method value) { + switch (value) { + case cudf::rank_method::FIRST: return "FIRST"; + case cudf::rank_method::AVERAGE: return "AVERAGE"; + case cudf::rank_method::MIN: return "MIN"; + case cudf::rank_method::MAX: return "MAX"; + case cudf::rank_method::DENSE: return "DENSE"; + default: return "unknown"; + } + }, + [](cudf::rank_method value) { + switch (value) { + case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST"; + case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE"; + case cudf::rank_method::MIN: return "cudf::rank_method::MIN"; + case cudf::rank_method::MAX: return "cudf::rank_method::MAX"; + case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE"; + default: return "unknown"; + } + }) + +using methods = nvbench::enum_type_list; diff --git a/cpp/benchmarks/sort/segmented_sort.cpp b/cpp/benchmarks/sort/segmented_sort.cpp new file mode 100644 index 0000000..2e83525 --- /dev/null +++ b/cpp/benchmarks/sort/segmented_sort.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +void nvbench_segmented_sort(nvbench::state& state) +{ + auto const stable = static_cast(state.get_int64("stable")); + auto const dtype = cudf::type_to_id(); + auto const size_bytes = static_cast(state.get_int64("size_bytes")); + auto const null_freq = state.get_float64("null_frequency"); + auto const row_width = static_cast(state.get_int64("row_width")); + + data_profile const table_profile = + data_profile_builder().null_probability(null_freq).distribution( + dtype, distribution_id::UNIFORM, 0, 10); + auto const input = + create_random_table({cudf::type_id::INT32}, table_size_bytes{size_bytes}, table_profile); + auto const rows = input->num_rows(); + + auto const segments = cudf::sequence((rows / row_width) + 1, + cudf::numeric_scalar(0), + cudf::numeric_scalar(row_width)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.add_element_count(size_bytes, "bytes"); + state.add_global_memory_reads(rows * row_width); + state.add_global_memory_writes(rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + if (stable) + cudf::stable_segmented_sorted_order(*input, *segments); + else + cudf::segmented_sorted_order(*input, *segments); + }); +} + +NVBENCH_BENCH(nvbench_segmented_sort) + .set_name("segmented_sort") + .add_int64_axis("stable", {0, 1}) + .add_int64_power_of_two_axis("size_bytes", {16, 18, 20, 22, 24, 28}) + .add_float64_axis("null_frequency", {0, 0.1}) + .add_int64_axis("row_width", {16, 128, 1024}); diff --git a/cpp/benchmarks/sort/sort.cpp b/cpp/benchmarks/sort/sort.cpp new file mode 100644 index 0000000..267a740 --- /dev/null +++ b/cpp/benchmarks/sort/sort.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +template +class Sort : public cudf::benchmark {}; + +template +static void BM_sort(benchmark::State& state, bool nulls) +{ + using Type = int; + auto const dtype = cudf::type_to_id(); + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + cudf::size_type const n_cols{(cudf::size_type)state.range(1)}; + + // Create table with values in the range [0,100) + data_profile const profile = data_profile_builder() + .cardinality(0) + .null_probability(nulls ? std::optional{0.01} : std::nullopt) + .distribution(dtype, distribution_id::UNIFORM, 0, 100); + auto input_table = create_random_table(cycle_dtypes({dtype}, n_cols), row_count{n_rows}, profile); + cudf::table_view input{*input_table}; + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + + auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input); + } +} + +#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \ + BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \ + (::benchmark::State & st) { BM_sort(st, nulls); } \ + BENCHMARK_REGISTER_F(Sort, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SORT_BENCHMARK_DEFINE(unstable_no_nulls, false, false) +SORT_BENCHMARK_DEFINE(stable_no_nulls, true, false) +SORT_BENCHMARK_DEFINE(unstable, false, true) +SORT_BENCHMARK_DEFINE(stable, true, true) diff --git a/cpp/benchmarks/sort/sort_lists.cpp b/cpp/benchmarks/sort/sort_lists.cpp new file mode 100644 index 0000000..4b04323 --- /dev/null +++ b/cpp/benchmarks/sort/sort_lists.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nested_types_common.hpp" + +#include + +#include + +namespace { +constexpr cudf::size_type min_val = 0; +constexpr cudf::size_type max_val = 100; + +void sort_multiple_lists(nvbench::state& state) +{ + auto const num_columns = static_cast(state.get_int64("num_columns")); + auto const input_table = create_lists_data(state, num_columns, min_val, max_val); + auto const stream = cudf::get_default_stream(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::detail::sorted_order( + *input_table, {}, {}, stream, rmm::mr::get_current_device_resource()); + }); +} + +void sort_lists_of_structs(nvbench::state& state) +{ + auto const num_columns = static_cast(state.get_int64("num_columns")); + auto const lists_table = create_lists_data(state, num_columns, min_val, max_val); + + // After having a table of (multiple) lists columns, convert those lists columns into lists of + // structs columns. The children of these structs columns are also children of the original lists + // columns. + // Such resulted lists-of-structs columns are very similar to the original lists-of-integers + // columns so their benchmarks can be somewhat comparable. + std::vector lists_of_structs; + for (auto const& col : lists_table->view()) { + auto const child = col.child(cudf::lists_column_view::child_column_index); + + // Put the child column under a struct column having the same null mask/null count. + auto const new_child = cudf::column_view{cudf::data_type{cudf::type_id::STRUCT}, + child.size(), + nullptr, + child.null_mask(), + child.null_count(), + child.offset(), + {child}}; + auto const converted_col = + cudf::column_view{cudf::data_type{cudf::type_id::LIST}, + col.size(), + nullptr, + col.null_mask(), + col.null_count(), + col.offset(), + {col.child(cudf::lists_column_view::offsets_column_index), new_child}}; + lists_of_structs.push_back(converted_col); + } + + auto const input_table = cudf::table_view{lists_of_structs}; + auto const stream = cudf::get_default_stream(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + cudf::detail::sorted_order(input_table, {}, {}, stream, rmm::mr::get_current_device_resource()); + }); +} + +} // namespace + +void nvbench_sort_lists(nvbench::state& state) +{ + auto const has_lists_of_structs = state.get_int64("lists_of_structs") > 0; + if (has_lists_of_structs) { + sort_lists_of_structs(state); + } else { + sort_multiple_lists(state); + } +} + +NVBENCH_BENCH(nvbench_sort_lists) + .set_name("sort_list") + .add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28}) + .add_int64_axis("depth", {1, 4}) + .add_int64_axis("num_columns", {1}) + .add_int64_axis("lists_of_structs", {0, 1}) + .add_float64_axis("null_frequency", {0, 0.2}); diff --git a/cpp/benchmarks/sort/sort_strings.cpp b/cpp/benchmarks/sort/sort_strings.cpp new file mode 100644 index 0000000..dde1068 --- /dev/null +++ b/cpp/benchmarks/sort/sort_strings.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +class Sort : public cudf::benchmark {}; + +static void BM_sort(benchmark::State& state) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + + auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::sort(table->view()); + } +} + +#define SORT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Sort, name) \ + (::benchmark::State & st) { BM_sort(st); } \ + BENCHMARK_REGISTER_F(Sort, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 24}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SORT_BENCHMARK_DEFINE(strings) diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp new file mode 100644 index 0000000..1d54fa4 --- /dev/null +++ b/cpp/benchmarks/sort/sort_structs.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nested_types_common.hpp" + +#include + +#include + +void nvbench_sort_struct(nvbench::state& state) +{ + auto const input = create_structs_data(state); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource()); + }); +} + +NVBENCH_BENCH(nvbench_sort_struct) + .set_name("sort_struct") + .add_int64_power_of_two_axis("NumRows", {10, 18, 26}) + .add_int64_axis("Depth", {0, 1, 8}) + .add_int64_axis("Nulls", {0, 1}); diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp new file mode 100644 index 0000000..f78aa9f --- /dev/null +++ b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace { + +constexpr cudf::size_type hundredM = 1e8; +constexpr cudf::size_type tenM = 1e7; +constexpr cudf::size_type tenK = 1e4; +constexpr cudf::size_type fifty_percent = 50; + +void percent_range(benchmark::internal::Benchmark* b) +{ + b->Unit(benchmark::kMillisecond); + for (int percent = 0; percent <= 100; percent += 10) + b->Args({hundredM, percent}); +} + +void size_range(benchmark::internal::Benchmark* b) +{ + b->Unit(benchmark::kMillisecond); + for (int size = tenK; size <= hundredM; size *= 10) + b->Args({size, fifty_percent}); +} + +template +void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns) +{ + cudf::size_type const column_size{static_cast(state.range(0))}; + cudf::size_type const percent_true{static_cast(state.range(1))}; + + float const fraction = percent_true / 100.f; + cudf::size_type const column_size_out = fraction * column_size; + int64_t const mask_size = + sizeof(bool) * column_size + cudf::bitmask_allocation_size_bytes(column_size); + int64_t const validity_bytes_in = (fraction >= 1.0f / 32) + ? cudf::bitmask_allocation_size_bytes(column_size) + : 4 * column_size_out; + int64_t const validity_bytes_out = cudf::bitmask_allocation_size_bytes(column_size_out); + int64_t const column_bytes_out = sizeof(T) * column_size_out; + int64_t const column_bytes_in = column_bytes_out; // we only read unmasked inputs + + int64_t const bytes_read = + (column_bytes_in + validity_bytes_in) * num_columns + // reading columns + mask_size; // reading boolean mask + int64_t const bytes_written = + (column_bytes_out + validity_bytes_out) * num_columns; // writing columns + + state.SetItemsProcessed(state.iterations() * column_size * num_columns); + state.SetBytesProcessed(static_cast(state.iterations()) * (bytes_read + bytes_written)); +} + +} // namespace + +template +void BM_apply_boolean_mask(benchmark::State& state, cudf::size_type num_columns) +{ + cudf::size_type const column_size{static_cast(state.range(0))}; + cudf::size_type const percent_true{static_cast(state.range(1))}; + + data_profile profile = data_profile_builder().cardinality(0).null_probability(0.0).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + + auto source_table = create_random_table( + cycle_dtypes({cudf::type_to_id()}, num_columns), row_count{column_size}, profile); + + profile.set_bool_probability_true(percent_true / 100.0); + profile.set_null_probability(std::nullopt); // no null mask + auto mask = create_random_column(cudf::type_id::BOOL8, row_count{column_size}, profile); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto result = cudf::apply_boolean_mask(*source_table, mask->view()); + } + + calculate_bandwidth(state, num_columns); +} + +template +class ApplyBooleanMask : public cudf::benchmark { + public: + using TypeParam = T; +}; + +#define ABM_BENCHMARK_DEFINE(name, type, n_columns) \ + BENCHMARK_TEMPLATE_DEFINE_F(ApplyBooleanMask, name, type)(::benchmark::State & st) \ + { \ + BM_apply_boolean_mask(st, n_columns); \ + } + +ABM_BENCHMARK_DEFINE(float_1_col, float, 1); +ABM_BENCHMARK_DEFINE(float_2_col, float, 2); +ABM_BENCHMARK_DEFINE(float_4_col, float, 4); + +// shmoo 1, 2, 4 column float across percentage true +BENCHMARK_REGISTER_F(ApplyBooleanMask, float_1_col)->Apply(percent_range); +BENCHMARK_REGISTER_F(ApplyBooleanMask, float_2_col)->Apply(percent_range); +BENCHMARK_REGISTER_F(ApplyBooleanMask, float_4_col)->Apply(percent_range); + +// shmoo 1, 2, 4 column float across column sizes with 50% true +BENCHMARK_REGISTER_F(ApplyBooleanMask, float_1_col)->Apply(size_range); +BENCHMARK_REGISTER_F(ApplyBooleanMask, float_2_col)->Apply(size_range); +BENCHMARK_REGISTER_F(ApplyBooleanMask, float_4_col)->Apply(size_range); + +// spot benchmark other types +ABM_BENCHMARK_DEFINE(int8_1_col, int8_t, 1); +ABM_BENCHMARK_DEFINE(int16_1_col, int16_t, 1); +ABM_BENCHMARK_DEFINE(int32_1_col, int32_t, 1); +ABM_BENCHMARK_DEFINE(int64_1_col, int64_t, 1); +ABM_BENCHMARK_DEFINE(double_1_col, double, 1); +BENCHMARK_REGISTER_F(ApplyBooleanMask, int8_1_col)->Args({tenM, fifty_percent}); +BENCHMARK_REGISTER_F(ApplyBooleanMask, int16_1_col)->Args({tenM, fifty_percent}); +BENCHMARK_REGISTER_F(ApplyBooleanMask, int32_1_col)->Args({tenM, fifty_percent}); +BENCHMARK_REGISTER_F(ApplyBooleanMask, int64_1_col)->Args({tenM, fifty_percent}); +BENCHMARK_REGISTER_F(ApplyBooleanMask, double_1_col)->Args({tenM, fifty_percent}); diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp new file mode 100644 index 0000000..c04b651 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/distinct.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); + +template +void nvbench_distinct(nvbench::state& state, nvbench::type_list) +{ + cudf::size_type const num_rows = state.get_int64("NumRows"); + + data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + + auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); + + auto input_column = source_column->view(); + auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::distinct(input_table, + {0}, + cudf::duplicate_keep_option::KEEP_ANY, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL); + }); +} + +using data_type = nvbench::type_list; + +NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type)) + .set_name("distinct") + .set_type_axes_names({"Type"}) + .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}); + +template +void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) +{ + auto const size = state.get_int64("ColumnSize"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, 4) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4) + .list_depth(1); + } else { + // We're comparing distinct() on a non-nested column to that on a list column with the same + // number of distinct rows. The max list size is 4 and the number of distinct values in the + // list's child is 5. So the number of distinct rows in the list = 1 + 5 + 5^2 + 5^3 + 5^4 = 781 + // We want this column to also have 781 distinct values. + builder.distribution(dtype, distribution_id::UNIFORM, 0, 781); + } + + auto const table = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::distinct(*table, + {0}, + cudf::duplicate_keep_option::KEEP_ANY, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL); + }); +} + +NVBENCH_BENCH_TYPES(nvbench_distinct_list, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("distinct_list") + .set_type_axes_names({"Type"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("ColumnSize", {100'000'000}); diff --git a/cpp/benchmarks/stream_compaction/distinct_count.cpp b/cpp/benchmarks/stream_compaction/distinct_count.cpp new file mode 100644 index 0000000..2b2c901 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/distinct_count.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +template +static void bench_distinct_count(nvbench::state& state, nvbench::type_list) +{ + auto const dtype = cudf::type_to_id(); + auto const size = static_cast(state.get_int64("num_rows")); + auto const null_probability = state.get_float64("null_probability"); + + data_profile profile = + data_profile_builder().distribution(dtype, distribution_id::UNIFORM, 0, size / 100); + if (null_probability > 0) { + profile.set_null_probability({null_probability}); + } else { + profile.set_null_probability(std::nullopt); + } + + auto const data_table = create_random_table({dtype}, row_count{size}, profile); + auto const& data_column = data_table->get_column(0); + auto const input_table = cudf::table_view{{data_column, data_column, data_column}}; + + auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::distinct_count(input_table, cudf::null_equality::EQUAL); + }); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); +} + +using data_type = nvbench::type_list; + +NVBENCH_BENCH_TYPES(bench_distinct_count, NVBENCH_TYPE_AXES(data_type)) + .set_name("distinct_count") + .add_int64_axis("num_rows", + { + 10000, // 10k + 100000, // 100k + 1000000, // 1M + 10000000, // 10M + 100000000, // 100M + }) + .add_float64_axis("null_probability", {0, 0.5}); diff --git a/cpp/benchmarks/stream_compaction/stable_distinct.cpp b/cpp/benchmarks/stream_compaction/stable_distinct.cpp new file mode 100644 index 0000000..bcee304 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/stable_distinct.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); + +template +void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list) +{ + cudf::size_type const num_rows = state.get_int64("NumRows"); + + data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + + auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); + + auto input_column = source_column->view(); + auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::stable_distinct(input_table, + {0}, + cudf::duplicate_keep_option::KEEP_ANY, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL); + }); +} + +using data_type = nvbench::type_list; + +NVBENCH_BENCH_TYPES(nvbench_stable_distinct, NVBENCH_TYPE_AXES(data_type)) + .set_name("stable_distinct") + .set_type_axes_names({"Type"}) + .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}); + +template +void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list) +{ + auto const size = state.get_int64("ColumnSize"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, 4) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4) + .list_depth(1); + } else { + // We're comparing stable_distinct() on a non-nested column to that on a list column with the + // same number of stable_distinct rows. The max list size is 4 and the number of distinct values + // in the list's child is 5. So the number of distinct rows in the list = 1 + 5 + 5^2 + 5^3 + + // 5^4 = 781 We want this column to also have 781 distinct values. + builder.distribution(dtype, distribution_id::UNIFORM, 0, 781); + } + + auto const table = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::stable_distinct(*table, + {0}, + cudf::duplicate_keep_option::KEEP_ANY, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL); + }); +} + +NVBENCH_BENCH_TYPES(nvbench_stable_distinct_list, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("stable_distinct_list") + .set_type_axes_names({"Type"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("ColumnSize", {100'000'000}); diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp new file mode 100644 index 0000000..854bc17 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/unique.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +// necessary for custom enum types +// see: https://github.com/NVIDIA/nvbench/blob/main/examples/enums.cu +NVBENCH_DECLARE_ENUM_TYPE_STRINGS( + // Enum type: + cudf::duplicate_keep_option, + // Callable to generate input strings: + [](cudf::duplicate_keep_option option) { + switch (option) { + case cudf::duplicate_keep_option::KEEP_FIRST: return "KEEP_FIRST"; + case cudf::duplicate_keep_option::KEEP_LAST: return "KEEP_LAST"; + case cudf::duplicate_keep_option::KEEP_NONE: return "KEEP_NONE"; + default: return "ERROR"; + } + }, + // Callable to generate descriptions: + [](auto) { return std::string{}; }) + +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); + +template +void nvbench_unique(nvbench::state& state, nvbench::type_list>) +{ + // KEEP_FIRST and KEEP_ANY are equivalent for unique + if constexpr (not std::is_same_v and + Keep == cudf::duplicate_keep_option::KEEP_ANY) { + state.skip("Skip unwanted benchmarks."); + } + + cudf::size_type const num_rows = state.get_int64("NumRows"); + auto const sorting = state.get_int64("Sort"); + + data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows / 100); + + auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); + + auto input_column = source_column->view(); + auto input_table = cudf::table_view({input_column, input_column, input_column, input_column}); + + auto const run_bench = [&](auto const input) { + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::unique(input, {0}, Keep, cudf::null_equality::EQUAL); + }); + }; + + if (sorting) { + auto const sort_order = cudf::sorted_order(input_table); + auto const sort_table = cudf::gather(input_table, *sort_order); + run_bench(*sort_table); + } else { + run_bench(input_table); + } +} + +using data_type = nvbench::type_list; +using keep_option = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(nvbench_unique, NVBENCH_TYPE_AXES(data_type, keep_option)) + .set_name("unique") + .set_type_axes_names({"Type", "KeepOption"}) + .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_int64_axis("Sort", {0, 1}); + +template +void nvbench_unique_list(nvbench::state& state, nvbench::type_list>) +{ + // KEEP_FIRST and KEEP_ANY are equivalent for unique + if constexpr (Keep == cudf::duplicate_keep_option::KEEP_ANY) { + state.skip("Skip unwanted benchmarks."); + } + + auto const size = state.get_int64("ColumnSize"); + auto const dtype = cudf::type_to_id(); + double const null_probability = state.get_float64("null_probability"); + auto const sorting = state.get_int64("Sort"); + + auto builder = data_profile_builder().null_probability(null_probability); + if (dtype == cudf::type_id::LIST) { + builder.distribution(dtype, distribution_id::UNIFORM, 0, 4) + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4) + .list_depth(1); + } else { + // We're comparing unique() on a non-nested column to that on a list column with the same + // number of unique rows. The max list size is 4 and the number of unique values in the + // list's child is 5. So the number of unique rows in the list = 1 + 5 + 5^2 + 5^3 + 5^4 = 781 + // We want this column to also have 781 unique values. + builder.distribution(dtype, distribution_id::UNIFORM, 0, 781); + } + + auto const input_table = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, data_profile{builder}, 0); + + auto const run_bench = [&](auto const input) { + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::unique(input, {0}, Keep, cudf::null_equality::EQUAL); + }); + }; + + if (sorting) { + auto const sort_order = cudf::sorted_order(*input_table); + auto const sort_table = cudf::gather(*input_table, *sort_order); + run_bench(*sort_table); + } else { + run_bench(*input_table); + } +} + +NVBENCH_BENCH_TYPES(nvbench_unique_list, + NVBENCH_TYPE_AXES(nvbench::type_list, keep_option)) + .set_name("unique_list") + .set_type_axes_names({"Type", "KeepOption"}) + .add_float64_axis("null_probability", {0.0, 0.1}) + .add_int64_axis("ColumnSize", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000}) + .add_int64_axis("Sort", {0, 1}); diff --git a/cpp/benchmarks/stream_compaction/unique_count.cpp b/cpp/benchmarks/stream_compaction/unique_count.cpp new file mode 100644 index 0000000..e003c47 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/unique_count.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +template +void nvbench_unique_count(nvbench::state& state, nvbench::type_list) +{ + auto const num_rows = static_cast(state.get_int64("NumRows")); + auto const nulls = state.get_float64("NullProbability"); + + data_profile profile = data_profile_builder().cardinality(0).null_probability(nulls).distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows / 100); + + auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); + auto sorted_table = cudf::sort(cudf::table_view({source_column->view()})); + + auto input = sorted_table->view(); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::unique_count(input, cudf::null_equality::EQUAL); + }); +} + +using data_type = nvbench::type_list; + +NVBENCH_BENCH_TYPES(nvbench_unique_count, NVBENCH_TYPE_AXES(data_type)) + .set_name("unique_count") + .set_type_axes_names({"Type"}) + .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}) + .add_float64_axis("NullProbability", {0.0, 0.1}); diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp new file mode 100644 index 0000000..385bb76 --- /dev/null +++ b/cpp/benchmarks/string/case.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +void bench_case(nvbench::state& state) +{ + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const max_width = static_cast(state.get_int64("row_width")); + auto const encoding = state.get_string("encoding"); + + if (static_cast(n_rows) * static_cast(max_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + + auto col_view = column->view(); + + cudf::column::contents ascii_contents; + if (encoding == "ascii") { + data_profile ascii_profile = data_profile_builder().no_validity().distribution( + cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126); // nice ASCII range + auto input = cudf::strings_column_view(col_view); + auto ascii_column = + create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile); + auto ascii_data = ascii_column->view(); + + col_view = cudf::column_view(col_view.type(), + col_view.size(), + nullptr, + col_view.null_mask(), + col_view.null_count(), + 0, + {input.offsets(), ascii_data}); + + ascii_contents = ascii_column->release(); + } + auto input = cudf::strings_column_view(col_view); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + state.add_element_count(input.chars_size(), "chars_size"); + state.add_global_memory_reads(input.chars_size()); + state.add_global_memory_writes(input.chars_size()); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); }); +} + +NVBENCH_BENCH(bench_case) + .set_name("case") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("encoding", {"ascii", "utf8"}); diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp new file mode 100644 index 0000000..59e6245 --- /dev/null +++ b/cpp/benchmarks/string/char_types.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_char_types(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const api_type = state.get_string("api"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + auto input_types = cudf::strings::string_character_types::SPACE; + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); // all bytes are read; + if (api_type == "all") { + state.add_global_memory_writes(num_rows); // output is a bool8 per row + } else { + state.add_global_memory_writes(chars_size); + } + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + if (api_type == "all") { + auto result = cudf::strings::all_characters_of_type(input, input_types); + } else { + auto result = cudf::strings::filter_characters_of_type(input, input_types); + } + }); +} + +NVBENCH_BENCH(bench_char_types) + .set_name("char_types") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("api", {"all", "filter"}); diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp new file mode 100644 index 0000000..4ed54a3 --- /dev/null +++ b/cpp/benchmarks/string/combine.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +class StringCombine : public cudf::benchmark {}; + +static void BM_combine(benchmark::State& state) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{n_rows}, table_profile); + cudf::strings_column_view input1(table->view().column(0)); + cudf::strings_column_view input2(table->view().column(1)); + cudf::string_scalar separator("+"); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::strings::concatenate(table->view(), separator); + } + + state.SetBytesProcessed(state.iterations() * (input1.chars_size() + input2.chars_size())); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 4; + int const max_rowlen = 1 << 11; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringCombine, name) \ + (::benchmark::State & st) { BM_combine(st); } \ + BENCHMARK_REGISTER_F(StringCombine, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(concat) diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp new file mode 100644 index 0000000..af45d5d --- /dev/null +++ b/cpp/benchmarks/string/contains.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +std::unique_ptr build_input_column(cudf::size_type n_rows, + cudf::size_type row_width, + int32_t hit_rate) +{ + // build input table using the following data + auto raw_data = cudf::test::strings_column_wrapper( + { + "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; + "012345 6789 01234 56789 0123 456", // the rest do not match + "abc 4567890 DEFGHI 0987 Wxyz 123", + "abcdefghijklmnopqrstuvwxyz 01234", + "", + "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", + "9876543210,abcdefghijklmnopqrstU", + "9876543210,abcdefghijklmnopqrstU", + "123 édf 4567890 DéFG 0987 X5", + "1", + }) + .release(); + + if (row_width / 32 > 1) { + std::vector columns; + for (int i = 0; i < row_width / 32; ++i) { + columns.push_back(raw_data->view()); + } + raw_data = cudf::strings::concatenate(cudf::table_view(columns)); + } + auto data_view = raw_data->view(); + + // compute number of rows in n_rows that should match + auto matches = static_cast(n_rows * hit_rate) / 100; + + // Create a randomized gather-map to build a column out of the strings in data. + data_profile gather_profile = + data_profile_builder().cardinality(0).null_probability(0.0).distribution( + cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); + auto gather_table = + create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); + gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + + // Create scatter map by placing 0-index values throughout the gather-map + auto scatter_data = cudf::sequence( + matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); + auto zero_scalar = cudf::numeric_scalar(0); + auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); + auto gather_map = table->view().column(0); + table = cudf::gather(cudf::table_view({data_view}), gather_map); + + return std::move(table->release().front()); +} + +// longer pattern lengths demand more working memory per string +std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$"}; + +static void bench_contains(nvbench::state& state) +{ + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const pattern_index = static_cast(state.get_int64("pattern")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + if (static_cast(n_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + auto col = build_input_column(n_rows, row_width, hit_rate); + auto input = cudf::strings_column_view(col->view()); + + auto pattern = patterns[pattern_index]; + auto program = cudf::strings::regex_program::create(pattern); + + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(input.size()); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::contains_re(input, *program); }); +} + +NVBENCH_BENCH(bench_contains) + .set_name("contains") + .add_int64_axis("row_width", {32, 64, 128, 256, 512}) + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) + .add_int64_axis("hit_rate", {50, 100}) // percentage + .add_int64_axis("pattern", {0, 1}); diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp new file mode 100644 index 0000000..5f332a3 --- /dev/null +++ b/cpp/benchmarks/string/convert_datetime.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +class StringDateTime : public cudf::benchmark {}; + +enum class direction { to, from }; + +template +void BM_convert_datetime(benchmark::State& state, direction dir) +{ + auto const n_rows = static_cast(state.range(0)); + auto const data_type = cudf::data_type(cudf::type_to_id()); + + auto const column = create_random_column(data_type.id(), row_count{n_rows}); + cudf::column_view input(column->view()); + + auto source = dir == direction::to ? cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S") + : make_empty_column(cudf::data_type{cudf::type_id::STRING}); + cudf::strings_column_view source_string(source->view()); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + if (dir == direction::to) + cudf::strings::to_timestamps(source_string, data_type, "%Y-%m-%d %H:%M:%S"); + else + cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S"); + } + + auto const bytes = dir == direction::to ? source_string.chars_size() : n_rows * sizeof(TypeParam); + state.SetBytesProcessed(state.iterations() * bytes); +} + +#define STR_BENCHMARK_DEFINE(name, type, dir) \ + BENCHMARK_DEFINE_F(StringDateTime, name)(::benchmark::State & state) \ + { \ + BM_convert_datetime(state, dir); \ + } \ + BENCHMARK_REGISTER_F(StringDateTime, name) \ + ->RangeMultiplier(1 << 5) \ + ->Range(1 << 10, 1 << 25) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +STR_BENCHMARK_DEFINE(from_days, cudf::timestamp_D, direction::from); +STR_BENCHMARK_DEFINE(from_seconds, cudf::timestamp_s, direction::from); +STR_BENCHMARK_DEFINE(from_mseconds, cudf::timestamp_ms, direction::from); +STR_BENCHMARK_DEFINE(from_useconds, cudf::timestamp_us, direction::from); +STR_BENCHMARK_DEFINE(from_nseconds, cudf::timestamp_ns, direction::from); + +STR_BENCHMARK_DEFINE(to_days, cudf::timestamp_D, direction::to); +STR_BENCHMARK_DEFINE(to_seconds, cudf::timestamp_s, direction::to); +STR_BENCHMARK_DEFINE(to_mseconds, cudf::timestamp_ms, direction::to); +STR_BENCHMARK_DEFINE(to_useconds, cudf::timestamp_us, direction::to); +STR_BENCHMARK_DEFINE(to_nseconds, cudf::timestamp_ns, direction::to); diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp new file mode 100644 index 0000000..f12d292 --- /dev/null +++ b/cpp/benchmarks/string/convert_durations.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include + +class DurationsToString : public cudf::benchmark {}; +template +void BM_convert_from_durations(benchmark::State& state) +{ + cudf::size_type const source_size = state.range(0); + + // Every element is valid + auto data = cudf::detail::make_counting_transform_iterator( + 0, [source_size](auto i) { return TypeParam{i - source_size / 2}; }); + + cudf::test::fixed_width_column_wrapper source_durations(data, data + source_size); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::strings::from_durations(source_durations, "%D days %H:%M:%S"); + } + + state.SetBytesProcessed(state.iterations() * source_size * sizeof(TypeParam)); +} + +class StringToDurations : public cudf::benchmark {}; +template +void BM_convert_to_durations(benchmark::State& state) +{ + cudf::size_type const source_size = state.range(0); + + // Every element is valid + auto data = cudf::detail::make_counting_transform_iterator( + 0, [source_size](auto i) { return TypeParam{i - source_size / 2}; }); + + cudf::test::fixed_width_column_wrapper source_durations(data, data + source_size); + auto results = cudf::strings::from_durations(source_durations, "%D days %H:%M:%S"); + cudf::strings_column_view source_string(*results); + auto output_type = cudf::data_type(cudf::type_to_id()); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cudf::strings::to_durations(source_string, output_type, "%D days %H:%M:%S"); + } + + state.SetBytesProcessed(state.iterations() * source_size * sizeof(TypeParam)); +} + +#define DSBM_BENCHMARK_DEFINE(name, type) \ + BENCHMARK_DEFINE_F(DurationsToString, name)(::benchmark::State & state) \ + { \ + BM_convert_from_durations(state); \ + } \ + BENCHMARK_REGISTER_F(DurationsToString, name) \ + ->RangeMultiplier(1 << 5) \ + ->Range(1 << 10, 1 << 25) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +#define SDBM_BENCHMARK_DEFINE(name, type) \ + BENCHMARK_DEFINE_F(StringToDurations, name)(::benchmark::State & state) \ + { \ + BM_convert_to_durations(state); \ + } \ + BENCHMARK_REGISTER_F(StringToDurations, name) \ + ->RangeMultiplier(1 << 5) \ + ->Range(1 << 10, 1 << 25) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +DSBM_BENCHMARK_DEFINE(from_durations_D, cudf::duration_D); +DSBM_BENCHMARK_DEFINE(from_durations_s, cudf::duration_s); +DSBM_BENCHMARK_DEFINE(from_durations_ms, cudf::duration_ms); +DSBM_BENCHMARK_DEFINE(from_durations_us, cudf::duration_us); +DSBM_BENCHMARK_DEFINE(from_durations_ns, cudf::duration_ns); + +SDBM_BENCHMARK_DEFINE(to_durations_D, cudf::duration_D); +SDBM_BENCHMARK_DEFINE(to_durations_s, cudf::duration_s); +SDBM_BENCHMARK_DEFINE(to_durations_ms, cudf::duration_ms); +SDBM_BENCHMARK_DEFINE(to_durations_us, cudf::duration_us); +SDBM_BENCHMARK_DEFINE(to_durations_ns, cudf::duration_ns); diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp new file mode 100644 index 0000000..0cc98ee --- /dev/null +++ b/cpp/benchmarks/string/convert_fixed_point.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +namespace { + +std::unique_ptr get_strings_column(cudf::size_type rows) +{ + auto result = + create_random_column(cudf::type_id::FLOAT32, row_count{static_cast(rows)}); + return cudf::strings::from_floats(result->view()); +} + +} // anonymous namespace + +class StringsToFixedPoint : public cudf::benchmark {}; + +template +void convert_to_fixed_point(benchmark::State& state) +{ + auto const rows = static_cast(state.range(0)); + auto const strings_col = get_strings_column(rows); + auto const strings_view = cudf::strings_column_view(strings_col->view()); + auto const dtype = cudf::data_type{cudf::type_to_id(), numeric::scale_type{-2}}; + + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype); + } + + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed(state.iterations() * + (strings_view.chars_size() + rows * cudf::size_of(dtype))); +} + +class StringsFromFixedPoint : public cudf::benchmark {}; + +template +void convert_from_fixed_point(benchmark::State& state) +{ + auto const rows = static_cast(state.range(0)); + auto const strings_col = get_strings_column(rows); + auto const dtype = cudf::data_type{cudf::type_to_id(), numeric::scale_type{-2}}; + auto const fp_col = + cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype); + + std::unique_ptr results = nullptr; + + for (auto _ : state) { + cuda_event_timer raii(state, true); + results = cudf::strings::from_fixed_point(fp_col->view()); + } + + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed( + state.iterations() * + (cudf::strings_column_view(results->view()).chars_size() + rows * cudf::size_of(dtype))); +} + +#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type) \ + BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \ + { \ + convert_to_fixed_point(state); \ + } \ + BENCHMARK_REGISTER_F(StringsToFixedPoint, name) \ + ->RangeMultiplier(4) \ + ->Range(1 << 12, 1 << 24) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type) \ + BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \ + { \ + convert_from_fixed_point(state); \ + } \ + BENCHMARK_REGISTER_F(StringsFromFixedPoint, name) \ + ->RangeMultiplier(4) \ + ->Range(1 << 12, 1 << 24) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32); +CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64); + +CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32); +CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64); diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp new file mode 100644 index 0000000..cce5d0f --- /dev/null +++ b/cpp/benchmarks/string/convert_numerics.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +namespace { + +template +std::unique_ptr get_numerics_column(cudf::size_type rows) +{ + return create_random_column(cudf::type_to_id(), row_count{rows}); +} + +template +std::unique_ptr get_strings_column(cudf::size_type rows) +{ + auto const numerics_col = get_numerics_column(rows); + if constexpr (std::is_floating_point_v) { + return cudf::strings::from_floats(numerics_col->view()); + } else { + return cudf::strings::from_integers(numerics_col->view()); + } +} +} // anonymous namespace + +class StringsToNumeric : public cudf::benchmark {}; + +template +void convert_to_number(benchmark::State& state) +{ + auto const rows = static_cast(state.range(0)); + + auto const strings_col = get_strings_column(rows); + auto const strings_view = cudf::strings_column_view(strings_col->view()); + auto const col_type = cudf::type_to_id(); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + if constexpr (std::is_floating_point_v) { + cudf::strings::to_floats(strings_view, cudf::data_type{col_type}); + } else { + cudf::strings::to_integers(strings_view, cudf::data_type{col_type}); + } + } + + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed(state.iterations() * + (strings_view.chars_size() + rows * sizeof(NumericType))); +} + +class StringsFromNumeric : public cudf::benchmark {}; + +template +void convert_from_number(benchmark::State& state) +{ + auto const rows = static_cast(state.range(0)); + + auto const numerics_col = get_numerics_column(rows); + auto const numerics_view = numerics_col->view(); + + std::unique_ptr results = nullptr; + + for (auto _ : state) { + cuda_event_timer raii(state, true); + if constexpr (std::is_floating_point_v) + results = cudf::strings::from_floats(numerics_view); + else + results = cudf::strings::from_integers(numerics_view); + } + + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed( + state.iterations() * + (cudf::strings_column_view(results->view()).chars_size() + rows * sizeof(NumericType))); +} + +#define CONVERT_TO_NUMERICS_BD(name, type) \ + BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \ + { \ + convert_to_number(state); \ + } \ + BENCHMARK_REGISTER_F(StringsToNumeric, name) \ + ->RangeMultiplier(4) \ + ->Range(1 << 10, 1 << 17) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +#define CONVERT_FROM_NUMERICS_BD(name, type) \ + BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \ + { \ + convert_from_number(state); \ + } \ + BENCHMARK_REGISTER_F(StringsFromNumeric, name) \ + ->RangeMultiplier(4) \ + ->Range(1 << 10, 1 << 17) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +CONVERT_TO_NUMERICS_BD(strings_to_float32, float); +CONVERT_TO_NUMERICS_BD(strings_to_float64, double); +CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t); +CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t); +CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t); +CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t); + +CONVERT_FROM_NUMERICS_BD(strings_from_float32, float); +CONVERT_FROM_NUMERICS_BD(strings_from_float64, double); +CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t); +CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t); +CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t); +CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t); diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu new file mode 100644 index 0000000..27438f8 --- /dev/null +++ b/cpp/benchmarks/string/copy.cu @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +class StringCopy : public cudf::benchmark {}; + +enum copy_type { gather, scatter }; + +static void BM_copy(benchmark::State& state, copy_type ct) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + + auto const source = + create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); + auto const target = + create_random_table({cudf::type_id::STRING}, row_count{n_rows}, table_profile); + + // scatter indices + auto index_map_col = make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, n_rows, cudf::mask_state::UNALLOCATED); + auto index_map = index_map_col->mutable_view(); + thrust::shuffle_copy(thrust::device, + thrust::counting_iterator(0), + thrust::counting_iterator(n_rows), + index_map.begin(), + thrust::default_random_engine()); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + switch (ct) { + case gather: cudf::gather(source->view(), index_map); break; + case scatter: cudf::scatter(source->view(), index_map, target->view()); break; + } + } + + state.SetBytesProcessed(state.iterations() * + cudf::strings_column_view(source->view().column(0)).chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + + // Benchmark for very small strings + b->Args({67108864, 2}); +} + +#define COPY_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringCopy, name) \ + (::benchmark::State & st) { BM_copy(st, copy_type::name); } \ + BENCHMARK_REGISTER_F(StringCopy, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +COPY_BENCHMARK_DEFINE(gather) +COPY_BENCHMARK_DEFINE(scatter) diff --git a/cpp/benchmarks/string/count.cpp b/cpp/benchmarks/string/count.cpp new file mode 100644 index 0000000..0840646 --- /dev/null +++ b/cpp/benchmarks/string/count.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include + +static void bench_count(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + std::string pattern = "\\d+"; + + auto prog = cudf::strings::regex_program::create(pattern); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(input.size()); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::strings::count_re(input, *prog); }); +} + +NVBENCH_BENCH(bench_count) + .set_name("count") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp new file mode 100644 index 0000000..135dada --- /dev/null +++ b/cpp/benchmarks/string/extract.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +static void bench_extract(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + auto groups = static_cast(state.get_int64("groups")); + + std::default_random_engine generator; + std::uniform_int_distribution words_dist(0, 999); + std::vector samples(100); // 100 unique rows of data to reuse + std::generate(samples.begin(), samples.end(), [&]() { + std::string row; // build a row of random tokens + while (static_cast(row.size()) < row_width) { + row += std::to_string(words_dist(generator)) + " "; + } + return row; + }); + + std::string pattern{""}; + while (groups--) { + pattern += "(\\d+) "; + } + + cudf::test::strings_column_wrapper samples_column(samples.begin(), samples.end()); + data_profile const profile = data_profile_builder().no_validity().distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0ul, samples.size() - 1); + auto map = + create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); + auto input = cudf::gather( + cudf::table_view{{samples_column}}, map->view(), cudf::out_of_bounds_policy::DONT_CHECK); + cudf::strings_column_view strings_view(input->get_column(0).view()); + auto prog = cudf::strings::regex_program::create(pattern); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = strings_view.chars_size(); + state.add_element_count(chars_size, "chars_size"); // number of bytes; + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::extract(strings_view, *prog); + }); +} + +NVBENCH_BENCH(bench_extract) + .set_name("extract") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_int64_axis("groups", {1, 2, 4}); diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu new file mode 100644 index 0000000..c73bcb0 --- /dev/null +++ b/cpp/benchmarks/string/factory.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include + +namespace { +using string_pair = thrust::pair; +struct string_view_to_pair { + __device__ string_pair operator()(thrust::pair const& p) + { + return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; + } +}; +} // namespace + +class StringsFactory : public cudf::benchmark {}; + +static void BM_factory(benchmark::State& state) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + auto d_column = cudf::column_device_view::create(column->view()); + rmm::device_uvector pairs(d_column->size(), cudf::get_default_stream()); + thrust::transform(thrust::device, + d_column->pair_begin(), + d_column->pair_end(), + pairs.data(), + string_view_to_pair{}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::make_strings_column(pairs, cudf::get_default_stream()); + } + + cudf::strings_column_view input(column->view()); + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringsFactory, name) \ + (::benchmark::State & st) { BM_factory(st); } \ + BENCHMARK_REGISTER_F(StringsFactory, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(factory) diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp new file mode 100644 index 0000000..b935fc4 --- /dev/null +++ b/cpp/benchmarks/string/filter.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +enum FilterAPI { filter, filter_chars, strip }; + +class StringFilterChars : public cudf::benchmark {}; + +static void BM_filter_chars(benchmark::State& state, FilterAPI api) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + + auto const types = cudf::strings::string_character_types::SPACE; + std::vector> filter_table{ + {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}}; + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + switch (api) { + case filter: cudf::strings::filter_characters_of_type(input, types); break; + case filter_chars: cudf::strings::filter_characters(input, filter_table); break; + case strip: cudf::strings::strip(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_multiplier = 8; + int const min_length = 1 << 5; + int const max_length = 1 << 13; + int const length_multiplier = 2; + generate_string_bench_args( + b, min_rows, max_rows, row_multiplier, min_length, max_length, length_multiplier); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringFilterChars, name) \ + (::benchmark::State & st) { BM_filter_chars(st, FilterAPI::name); } \ + BENCHMARK_REGISTER_F(StringFilterChars, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(filter) +STRINGS_BENCHMARK_DEFINE(filter_chars) +STRINGS_BENCHMARK_DEFINE(strip) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp new file mode 100644 index 0000000..5f2e694 --- /dev/null +++ b/cpp/benchmarks/string/find.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +enum FindAPI { find, find_multi, contains, starts_with, ends_with }; + +class StringFindScalar : public cudf::benchmark {}; + +static void BM_find_scalar(benchmark::State& state, FindAPI find_api) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + cudf::string_scalar target("+"); + cudf::test::strings_column_wrapper targets({"+", "-"}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + switch (find_api) { + case find: cudf::strings::find(input, target); break; + case find_multi: + cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); + break; + case contains: cudf::strings::contains(input, target); break; + case starts_with: cudf::strings::starts_with(input, target); break; + case ends_with: cudf::strings::ends_with(input, target); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 2; + for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < static_cast(std::numeric_limits::max())) { + b->Args({row_count, rowlen}); + } + } + } +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringFindScalar, name) \ + (::benchmark::State & st) { BM_find_scalar(st, name); } \ + BENCHMARK_REGISTER_F(StringFindScalar, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(find) +STRINGS_BENCHMARK_DEFINE(find_multi) +STRINGS_BENCHMARK_DEFINE(contains) +STRINGS_BENCHMARK_DEFINE(starts_with) +STRINGS_BENCHMARK_DEFINE(ends_with) diff --git a/cpp/benchmarks/string/gather.cpp b/cpp/benchmarks/string/gather.cpp new file mode 100644 index 0000000..530b09b --- /dev/null +++ b/cpp/benchmarks/string/gather.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_gather(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const input_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + + data_profile const map_profile = data_profile_builder().no_validity().distribution( + cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_rows); + auto const map_table = + create_random_table({cudf::type_id::INT32}, row_count{num_rows}, map_profile); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + auto chars_size = cudf::strings_column_view(input_table->view().column(0)).chars_size(); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::gather( + input_table->view(), map_table->view().column(0), cudf::out_of_bounds_policy::NULLIFY); + }); +} + +NVBENCH_BENCH(bench_gather) + .set_name("gather") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/string/join_strings.cpp b/cpp/benchmarks/string/join_strings.cpp new file mode 100644 index 0000000..a122c00 --- /dev/null +++ b/cpp/benchmarks/string/join_strings.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_join(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto const chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); // number of bytes; + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // all bytes are written + + std::string separator(":"); + std::string narep("null"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::join_strings(input, separator, narep); + }); +} + +NVBENCH_BENCH(bench_join) + .set_name("strings_join") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/string/json.cu new file mode 100644 index 0000000..7e89edf --- /dev/null +++ b/cpp/benchmarks/string/json.cu @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +class JsonPath : public cudf::benchmark {}; + +std::vector const Books{ + R"json({ +"category": "reference", +"author": "Nigel Rees", +"title": "Sayings of the Century", +"price": 8.95 +})json", + R"json({ +"category": "fiction", +"author": "Evelyn Waugh", +"title": "Sword of Honour", +"price": 12.99 +})json", + R"json({ +"category": "fiction", +"author": "Herman Melville", +"title": "Moby Dick", +"isbn": "0-553-21311-3", +"price": 8.99 +})json", + R"json({ +"category": "fiction", +"author": "J. R. R. Tolkien", +"title": "The Lord of the Rings", +"isbn": "0-395-19395-8", +"price": 22.99 +})json"}; +constexpr int Approx_book_size = 110; +std::vector const Bicycles{ + R"json({"color": "red", "price": 9.95})json", + R"json({"color": "green", "price": 29.95})json", + R"json({"color": "blue", "price": 399.95})json", + R"json({"color": "yellow", "price": 99.95})json", + R"json({"color": "mauve", "price": 199.95})json", +}; +constexpr int Approx_bicycle_size = 33; +std::string Misc{"\n\"expensive\": 10\n"}; + +struct json_benchmark_row_builder { + int const desired_bytes; + cudf::size_type const num_rows; + cudf::column_device_view const d_books_bicycles[2]; // Books, Bicycles strings + cudf::column_device_view const d_book_pct; // Book percentage + cudf::column_device_view const d_misc_order; // Misc-Store order + cudf::column_device_view const d_store_order; // Books-Bicycles order + int32_t* d_offsets{}; + char* d_chars{}; + thrust::minstd_rand rng{5236}; + thrust::uniform_int_distribution dist{}; + + // internal data structure for {bytes, out_ptr} with operator+= + struct bytes_and_ptr { + cudf::size_type bytes; + char* ptr; + __device__ bytes_and_ptr& operator+=(cudf::string_view const& str_append) + { + bytes += str_append.size_bytes(); + if (ptr) { ptr = cudf::strings::detail::copy_string(ptr, str_append); } + return *this; + } + }; + + __device__ inline void copy_items(int this_idx, + cudf::size_type num_items, + bytes_and_ptr& output_str) + { + using param_type = thrust::uniform_int_distribution::param_type; + dist.param(param_type{0, d_books_bicycles[this_idx].size() - 1}); + cudf::string_view comma(",\n", 2); + for (int i = 0; i < num_items; i++) { + if (i > 0) { output_str += comma; } + int idx = dist(rng); + auto item = d_books_bicycles[this_idx].element(idx); + output_str += item; + } + } + + __device__ void operator()(cudf::size_type idx) + { + int num_books = 2; + int num_bicycles = 2; + int remaining_bytes = max( + 0, desired_bytes - ((num_books * Approx_book_size) + (num_bicycles * Approx_bicycle_size))); + + // divide up the remainder between books and bikes + auto book_pct = d_book_pct.element(idx); + // {Misc, store} OR {store, Misc} + // store: {books, bicycles} OR store: {bicycles, books} + float bicycle_pct = 1.0f - book_pct; + num_books += (remaining_bytes * book_pct) / Approx_book_size; + num_bicycles += (remaining_bytes * bicycle_pct) / Approx_bicycle_size; + + char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; + bytes_and_ptr output_str{0, out_ptr}; + // + cudf::string_view comma(",\n", 2); + cudf::string_view brace1("{\n", 2); + cudf::string_view store_member_start[2]{{"\"book\": [\n", 10}, {"\"bicycle\": [\n", 13}}; + cudf::string_view store("\"store\": {\n", 11); + cudf::string_view Misc{"\"expensive\": 10", 15}; + cudf::string_view brace2("\n}", 2); + cudf::string_view square2{"\n]", 2}; + + output_str += brace1; + if (d_misc_order.element(idx)) { // Misc. first. + output_str += Misc; + output_str += comma; + } + output_str += store; + for (int store_order = 0; store_order < 2; store_order++) { + if (store_order > 0) { output_str += comma; } + int this_idx = (d_store_order.element(idx) == store_order); + auto& mem_start = store_member_start[this_idx]; + output_str += mem_start; + copy_items(this_idx, this_idx == 0 ? num_books : num_bicycles, output_str); + output_str += square2; + } + output_str += brace2; + if (!d_misc_order.element(idx)) { // Misc, if not first. + output_str += comma; + output_str += Misc; + } + output_str += brace2; + if (!output_str.ptr) d_offsets[idx] = output_str.bytes; + } +}; + +auto build_json_string_column(int desired_bytes, int num_rows) +{ + data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( + cudf::type_id::FLOAT32, distribution_id::UNIFORM, 0.0, 1.0); + auto float_2bool_columns = + create_random_table({cudf::type_id::FLOAT32, cudf::type_id::BOOL8, cudf::type_id::BOOL8}, + row_count{num_rows}, + profile); + + cudf::test::strings_column_wrapper books(Books.begin(), Books.end()); + cudf::test::strings_column_wrapper bicycles(Bicycles.begin(), Bicycles.end()); + auto d_books = cudf::column_device_view::create(books); + auto d_bicycles = cudf::column_device_view::create(bicycles); + auto d_book_pct = cudf::column_device_view::create(float_2bool_columns->get_column(0)); + auto d_misc_order = cudf::column_device_view::create(float_2bool_columns->get_column(1)); + auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2)); + json_benchmark_row_builder jb{ + desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order}; + auto children = cudf::strings::detail::make_strings_children( + jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + return cudf::make_strings_column( + num_rows, std::move(children.first), std::move(children.second), 0, {}); +} + +void BM_case(benchmark::State& state, std::string query_arg) +{ + srand(5236); + int num_rows = state.range(0); + int desired_bytes = state.range(1); + auto input = build_json_string_column(desired_bytes, num_rows); + cudf::strings_column_view scv(input->view()); + size_t num_chars = scv.chars().size(); + + std::string json_path(query_arg); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto result = cudf::strings::get_json_object(scv, json_path); + CUDF_CUDA_TRY(cudaStreamSynchronize(0)); + } + + // this isn't strictly 100% accurate. a given query isn't necessarily + // going to visit every single incoming character. but in spirit it does. + state.SetBytesProcessed(state.iterations() * num_chars); +} + +#define JSON_BENCHMARK_DEFINE(name, query) \ + BENCHMARK_DEFINE_F(JsonPath, name)(::benchmark::State & state) { BM_case(state, query); } \ + BENCHMARK_REGISTER_F(JsonPath, name) \ + ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +JSON_BENCHMARK_DEFINE(query0, "$"); +JSON_BENCHMARK_DEFINE(query1, "$.store"); +JSON_BENCHMARK_DEFINE(query2, "$.store.book"); +JSON_BENCHMARK_DEFINE(query3, "$.store.*"); +JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]"); +JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category"); +JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']"); +JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']"); +JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]"); diff --git a/cpp/benchmarks/string/lengths.cpp b/cpp/benchmarks/string/lengths.cpp new file mode 100644 index 0000000..36c4bf6 --- /dev/null +++ b/cpp/benchmarks/string/lengths.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_lengths(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(num_rows); // output is an integer per row + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::count_characters(input); + }); +} + +NVBENCH_BENCH(bench_lengths) + .set_name("lengths") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp new file mode 100644 index 0000000..6ac8324 --- /dev/null +++ b/cpp/benchmarks/string/like.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace { +std::unique_ptr build_input_column(cudf::size_type n_rows, + cudf::size_type row_width, + int32_t hit_rate) +{ + // build input table using the following data + auto raw_data = cudf::test::strings_column_wrapper( + { + "123 abc 4567890 DEFGHI 0987 5W43", // matches always; + "012345 6789 01234 56789 0123 456", // the rest do not match + "abc 4567890 DEFGHI 0987 Wxyz 123", + "abcdefghijklmnopqrstuvwxyz 01234", + "", + "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", + "9876543210,abcdefghijklmnopqrstU", + "9876543210,abcdefghijklmnopqrstU", + "123 édf 4567890 DéFG 0987 X5", + "1", + }) + .release(); + if (row_width / 32 > 1) { + std::vector columns; + for (int i = 0; i < row_width / 32; ++i) { + columns.push_back(raw_data->view()); + } + raw_data = cudf::strings::concatenate(cudf::table_view(columns)); + } + auto data_view = raw_data->view(); + + // compute number of rows in n_rows that should match + auto matches = static_cast(n_rows * hit_rate) / 100; + + // Create a randomized gather-map to build a column out of the strings in data. + data_profile gather_profile = + data_profile_builder().cardinality(0).null_probability(0.0).distribution( + cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); + auto gather_table = + create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); + gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + + // Create scatter map by placing 0-index values throughout the gather-map + auto scatter_data = cudf::sequence( + matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); + auto zero_scalar = cudf::numeric_scalar(0); + auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); + auto gather_map = table->view().column(0); + table = cudf::gather(cudf::table_view({data_view}), gather_map); + + return std::move(table->release().front()); +} + +} // namespace + +static void bench_like(nvbench::state& state) +{ + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + if (static_cast(n_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + auto col = build_input_column(n_rows, row_width, hit_rate); + auto input = cudf::strings_column_view(col->view()); + + // This pattern forces reading the entire target string (when matched expected) + auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4.$" + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); // number of bytes; + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(n_rows); // writes are BOOL8 + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::strings::like(input, pattern); }); +} + +NVBENCH_BENCH(bench_like) + .set_name("strings_like") + .add_int64_axis("row_width", {32, 64, 128, 256, 512}) + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) + .add_int64_axis("hit_rate", {10, 25, 70, 100}); diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp new file mode 100644 index 0000000..9264552 --- /dev/null +++ b/cpp/benchmarks/string/repeat_strings.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include +#include +#include + +static constexpr cudf::size_type default_repeat_times = 16; +static constexpr cudf::size_type min_repeat_times = -16; +static constexpr cudf::size_type max_repeat_times = 16; + +static std::unique_ptr create_data_table(cudf::size_type n_cols, + cudf::size_type n_rows, + cudf::size_type max_str_length) +{ + CUDF_EXPECTS(n_cols == 1 || n_cols == 2, "Invalid number of columns."); + + std::vector dtype_ids{cudf::type_id::STRING}; + auto builder = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + + if (n_cols == 2) { + dtype_ids.push_back(cudf::type_id::INT32); + builder.distribution( + cudf::type_id::INT32, distribution_id::NORMAL, min_repeat_times, max_repeat_times); + } + + return create_random_table(dtype_ids, row_count{n_rows}, data_profile{builder}); +} + +static void BM_repeat_strings_scalar_times(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + auto const table = create_data_table(1, n_rows, max_str_length); + auto const strings_col = cudf::strings_column_view(table->view().column(0)); + + for ([[maybe_unused]] auto _ : state) { + [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::strings::repeat_strings(strings_col, default_repeat_times); + } + + state.SetBytesProcessed(state.iterations() * strings_col.chars_size()); +} + +static void BM_repeat_strings_column_times(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + auto const table = create_data_table(2, n_rows, max_str_length); + auto const strings_col = cudf::strings_column_view(table->view().column(0)); + auto const repeat_times_col = table->view().column(1); + + for ([[maybe_unused]] auto _ : state) { + [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::strings::repeat_strings(strings_col, repeat_times_col); + } + + state.SetBytesProcessed(state.iterations() * + (strings_col.chars_size() + repeat_times_col.size() * sizeof(int32_t))); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 8; + int const max_rows = 1 << 18; + int const row_mult = 4; + int const min_strlen = 1 << 4; + int const max_strlen = 1 << 8; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_strlen, max_strlen, len_mult); +} + +class RepeatStrings : public cudf::benchmark {}; + +#define REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(RepeatStrings, name) \ + (::benchmark::State & st) { BM_repeat_strings_scalar_times(st); } \ + BENCHMARK_REGISTER_F(RepeatStrings, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +#define REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(RepeatStrings, name) \ + (::benchmark::State & st) { BM_repeat_strings_column_times(st); } \ + BENCHMARK_REGISTER_F(RepeatStrings, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +REPEAT_STRINGS_SCALAR_TIMES_BENCHMARK_DEFINE(scalar_times) +REPEAT_STRINGS_COLUMN_TIMES_BENCHMARK_DEFINE(column_times) diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp new file mode 100644 index 0000000..5ddf09f --- /dev/null +++ b/cpp/benchmarks/string/replace.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +class StringReplace : public cudf::benchmark {}; + +enum replace_type { scalar, slice, multi }; + +static void BM_replace(benchmark::State& state, replace_type rt) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + cudf::string_scalar target("+"); + cudf::string_scalar repl(""); + cudf::test::strings_column_wrapper targets({"+", "-"}); + cudf::test::strings_column_wrapper repls({"", ""}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + switch (rt) { + case scalar: cudf::strings::replace(input, target, repl); break; + case slice: cudf::strings::replace_slice(input, repl, 1, 10); break; + case multi: + cudf::strings::replace( + input, cudf::strings_column_view(targets), cudf::strings_column_view(repls)); + break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringReplace, name) \ + (::benchmark::State & st) { BM_replace(st, replace_type::name); } \ + BENCHMARK_REGISTER_F(StringReplace, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(scalar) +STRINGS_BENCHMARK_DEFINE(slice) +STRINGS_BENCHMARK_DEFINE(multi) diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp new file mode 100644 index 0000000..b8efd76 --- /dev/null +++ b/cpp/benchmarks/string/replace_re.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include + +static void bench_replace(nvbench::state& state) +{ + auto const n_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const rtype = state.get_string("type"); + + if (static_cast(n_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + + auto program = cudf::strings::regex_program::create("(\\d+)"); + + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); + + if (rtype == "backref") { + auto replacement = std::string("#\\1X"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::replace_with_backrefs(input, *program, replacement); + }); + } else { + auto replacement = std::string("77"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::replace_re(input, *program, replacement); + }); + } +} + +NVBENCH_BENCH(bench_replace) + .set_name("replace_re") + .add_int64_axis("row_width", {32, 64, 128, 256, 512}) + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) + .add_string_axis("type", {"replace", "backref"}); diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp new file mode 100644 index 0000000..31cd463 --- /dev/null +++ b/cpp/benchmarks/string/reverse.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_reverse(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); // number of bytes; + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = cudf::strings::reverse(input); }); +} + +NVBENCH_BENCH(bench_reverse) + .set_name("reverse") + .add_int64_axis("row_width", {8, 16, 32, 64, 128}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp new file mode 100644 index 0000000..6c1d7d9 --- /dev/null +++ b/cpp/benchmarks/string/slice.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +class StringSlice : public cudf::benchmark {}; + +enum slice_type { position, multi_position }; + +static void BM_slice(benchmark::State& state, slice_type rt) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + auto starts_itr = thrust::constant_iterator(max_str_length / 3); + auto stops_itr = thrust::constant_iterator(max_str_length / 2); + cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); + cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + switch (rt) { + case position: + cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2); + break; + case multi_position: cudf::strings::slice_strings(input, starts, stops); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringSlice, name) \ + (::benchmark::State & st) { BM_slice(st, slice_type::name); } \ + BENCHMARK_REGISTER_F(StringSlice, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(position) +STRINGS_BENCHMARK_DEFINE(multi_position) diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp new file mode 100644 index 0000000..eb724fa --- /dev/null +++ b/cpp/benchmarks/string/split.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include + +static void bench_split(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const stype = state.get_string("type"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + cudf::strings_column_view input(column->view()); + cudf::string_scalar target("+"); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); // number of bytes; + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // all bytes are written + + if (stype == "split") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::split(input, target); }); + } else if (stype == "split_ws") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::split(input); }); + } else if (stype == "record") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::split_record(input, target); }); + } else { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::split_record(input); }); + } +} + +NVBENCH_BENCH(bench_split) + .set_name("split") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("type", {"split", "split_ws", "record", "record_ws"}); diff --git a/cpp/benchmarks/string/split_re.cpp b/cpp/benchmarks/string/split_re.cpp new file mode 100644 index 0000000..67aa6f0 --- /dev/null +++ b/cpp/benchmarks/string/split_re.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include + +static void bench_split(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + auto prog = cudf::strings::regex_program::create("\\d+"); + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + cudf::strings_column_view input(column->view()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(); + state.add_element_count(chars_size, "chars_size"); // number of bytes; + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::strings::split_record_re(input, *prog); + }); +} + +NVBENCH_BENCH(bench_split) + .set_name("split_re") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp new file mode 100644 index 0000000..92a4637 --- /dev/null +++ b/cpp/benchmarks/string/string_bench_args.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +/** + * @brief Generate row count and row length argument ranges for a string benchmark. + * + * Generates a series of row count and row length arguments for string benchmarks. + * Combinations of row count and row length that would exceed the maximum string character + * column data length are not generated. + * + * @param b Benchmark to update with row count and row length arguments. + * @param min_rows Minimum row count argument to generate. + * @param max_rows Maximum row count argument to generate. + * @param rows_mult Row count multiplier to generate intermediate row count arguments. + * @param min_rowlen Minimum row length argument to generate. + * @param max_rowlen Maximum row length argument to generate. + * @param rowlen_mult Row length multiplier to generate intermediate row length arguments. + */ +inline void generate_string_bench_args(benchmark::internal::Benchmark* b, + int min_rows, + int max_rows, + int rows_mult, + int min_rowlen, + int max_rowlen, + int rowlen_mult) +{ + for (int row_count = min_rows; row_count <= max_rows; row_count *= rows_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= rowlen_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < static_cast(std::numeric_limits::max())) { + b->Args({row_count, rowlen}); + } + } + } +} diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp new file mode 100644 index 0000000..00ca745 --- /dev/null +++ b/cpp/benchmarks/string/translate.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +class StringTranslate : public cudf::benchmark {}; + +using entry_type = std::pair; + +static void BM_translate(benchmark::State& state, int entry_count) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + + std::vector entries(entry_count); + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(entry_count), + entries.begin(), + [](auto idx) -> entry_type { + return entry_type{'!' + idx, '~' - idx}; + }); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + cudf::strings::translate(input, entries); + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name, entries) \ + BENCHMARK_DEFINE_F(StringTranslate, name) \ + (::benchmark::State & st) { BM_translate(st, entries); } \ + BENCHMARK_REGISTER_F(StringTranslate, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(translate_small, 5) +STRINGS_BENCHMARK_DEFINE(translate_medium, 25) +STRINGS_BENCHMARK_DEFINE(translate_large, 50) diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu new file mode 100644 index 0000000..9ede89b --- /dev/null +++ b/cpp/benchmarks/string/url_decode.cu @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +struct url_string_generator { + char* chars; + double esc_seq_chance; + thrust::minstd_rand engine; + thrust::uniform_real_distribution esc_seq_dist; + url_string_generator(char* c, double esc_seq_chance, thrust::minstd_rand& engine) + : chars(c), esc_seq_chance(esc_seq_chance), engine(engine), esc_seq_dist(0, 1) + { + } + + __device__ void operator()(thrust::tuple str_begin_end) + { + auto begin = thrust::get<0>(str_begin_end); + auto end = thrust::get<1>(str_begin_end); + engine.discard(begin); + for (auto i = begin; i < end; ++i) { + if (esc_seq_dist(engine) < esc_seq_chance and i < end - 3) { + chars[i] = '%'; + chars[i + 1] = '2'; + chars[i + 2] = '0'; + i += 2; + } else { + chars[i] = 'a'; + } + } + } +}; + +auto generate_column(cudf::size_type num_rows, cudf::size_type chars_per_row, double esc_seq_chance) +{ + std::vector strings{std::string(chars_per_row, 'a')}; + auto col_1a = cudf::test::strings_column_wrapper(strings.begin(), strings.end()); + auto table_a = cudf::repeat(cudf::table_view{{col_1a}}, num_rows); + auto result_col = std::move(table_a->release()[0]); // string column with num_rows aaa... + auto chars_col = result_col->child(cudf::strings_column_view::chars_column_index).mutable_view(); + auto offset_col = result_col->child(cudf::strings_column_view::offsets_column_index).view(); + + auto engine = thrust::default_random_engine{}; + thrust::for_each_n(thrust::device, + thrust::make_zip_iterator(offset_col.begin(), + offset_col.begin() + 1), + num_rows, + url_string_generator{chars_col.begin(), esc_seq_chance, engine}); + return result_col; +} + +class UrlDecode : public cudf::benchmark {}; + +void BM_url_decode(benchmark::State& state, int esc_seq_pct) +{ + cudf::size_type const num_rows = state.range(0); + cudf::size_type const chars_per_row = state.range(1); + + auto column = generate_column(num_rows, chars_per_row, esc_seq_pct / 100.0); + auto strings_view = cudf::strings_column_view(column->view()); + + for (auto _ : state) { + cuda_event_timer raii(state, true, cudf::get_default_stream()); + auto result = cudf::strings::url_decode(strings_view); + } + + state.SetBytesProcessed(state.iterations() * num_rows * + (chars_per_row + sizeof(cudf::size_type))); +} + +#define URLD_BENCHMARK_DEFINE(esc_seq_pct) \ + BENCHMARK_DEFINE_F(UrlDecode, esc_seq_pct) \ + (::benchmark::State & st) { BM_url_decode(st, esc_seq_pct); } \ + BENCHMARK_REGISTER_F(UrlDecode, esc_seq_pct) \ + ->Args({100000000, 10}) \ + ->Args({10000000, 100}) \ + ->Args({1000000, 1000}) \ + ->Unit(benchmark::kMillisecond) \ + ->UseManualTime(); + +URLD_BENCHMARK_DEFINE(10) +URLD_BENCHMARK_DEFINE(50) diff --git a/cpp/benchmarks/synchronization/synchronization.cpp b/cpp/benchmarks/synchronization/synchronization.cpp new file mode 100644 index 0000000..5993bb2 --- /dev/null +++ b/cpp/benchmarks/synchronization/synchronization.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "synchronization.hpp" + +#include + +#include +#include + +cuda_event_timer::cuda_event_timer(benchmark::State& state, + bool flush_l2_cache, + rmm::cuda_stream_view stream) + : stream(stream), p_state(&state) +{ + // flush all of L2$ + if (flush_l2_cache) { + int current_device = 0; + CUDF_CUDA_TRY(cudaGetDevice(¤t_device)); + + int l2_cache_bytes = 0; + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); + + if (l2_cache_bytes > 0) { + int const memset_value = 0; + rmm::device_buffer l2_cache_buffer(l2_cache_bytes, stream); + CUDF_CUDA_TRY( + cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream.value())); + } + } + + CUDF_CUDA_TRY(cudaEventCreate(&start)); + CUDF_CUDA_TRY(cudaEventCreate(&stop)); + CUDF_CUDA_TRY(cudaEventRecord(start, stream.value())); +} + +cuda_event_timer::~cuda_event_timer() +{ + CUDF_CUDA_TRY(cudaEventRecord(stop, stream.value())); + CUDF_CUDA_TRY(cudaEventSynchronize(stop)); + + float milliseconds = 0.0f; + CUDF_CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop)); + p_state->SetIterationTime(milliseconds / (1000.0f)); + CUDF_CUDA_TRY(cudaEventDestroy(start)); + CUDF_CUDA_TRY(cudaEventDestroy(stop)); +} diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp new file mode 100644 index 0000000..e56d881 --- /dev/null +++ b/cpp/benchmarks/synchronization/synchronization.hpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file synchronization.hpp + * @brief This is the header file for `cuda_event_timer`. + */ + +/** + * @brief This class serves as a wrapper for using `cudaEvent_t` as the user + * defined timer within the framework of google benchmark + * (https://github.com/google/benchmark). + * + * It is built on top of the idea of Resource acquisition is initialization + * (RAII). In the following we show a minimal example of how to use this class. + + #include + #include + + static void sample_cuda_benchmark(benchmark::State& state) { + + for (auto _ : state){ + + // default stream, could be another stream + rmm::cuda_stream_view stream{cudf::get_default_stream()}; + + // Create (Construct) an object of this class. You HAVE to pass in the + // benchmark::State object you are using. It measures the time from its + // creation to its destruction that is spent on the specified CUDA stream. + // It also clears the L2 cache by cudaMemset'ing a device buffer that is of + // the size of the L2 cache (if flush_l2_cache is set to true and there is + // an L2 cache on the current device). + cuda_event_timer raii(state, true, stream); // flush_l2_cache = true + + // Now perform the operations that is to be benchmarked + sample_kernel<<<1, 256, 0, stream.value()>>>(); // Possibly launching a CUDA kernel + + } + } + + // Register the function as a benchmark. You will need to set the `UseManualTime()` + // flag in order to use the timer embedded in this class. + BENCHMARK(sample_cuda_benchmark)->UseManualTime(); + + + */ + +#pragma once + +// Google Benchmark library +#include + +#include +#include + +#include + +#include + +class cuda_event_timer { + public: + /** + * @brief This c'tor clears the L2$ by cudaMemset'ing a buffer of L2$ size + * and starts the timer. + * + * @param[in,out] state This is the benchmark::State whose timer we are going + * to update. + * @param[in] flush_l2_cache_ whether or not to flush the L2 cache before + * every iteration. + * @param[in] stream_ The CUDA stream we are measuring time on. + */ + cuda_event_timer(benchmark::State& state, + bool flush_l2_cache, + rmm::cuda_stream_view stream = cudf::get_default_stream()); + + // The user must provide a benchmark::State object to set + // the timer so we disable the default c'tor. + cuda_event_timer() = delete; + + // The d'tor stops the timer and performs a synchronization. + // Time of the benchmark::State object provided to the c'tor + // will be set to the value given by `cudaEventElapsedTime`. + ~cuda_event_timer(); + + private: + cudaEvent_t start; + cudaEvent_t stop; + rmm::cuda_stream_view stream; + benchmark::State* p_state; +}; diff --git a/cpp/benchmarks/text/edit_distance.cpp b/cpp/benchmarks/text/edit_distance.cpp new file mode 100644 index 0000000..8a8bd9a --- /dev/null +++ b/cpp/benchmarks/text/edit_distance.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +#include + +#include + +static void bench_edit_distance(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const strings_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const strings_table = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile); + cudf::strings_column_view input1(strings_table->view().column(0)); + cudf::strings_column_view input2(strings_table->view().column(1)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = input1.chars_size() + input2.chars_size(); + state.add_global_memory_reads(chars_size); + // output are integers (one per row) + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::edit_distance(input1, input2); }); +} + +NVBENCH_BENCH(bench_edit_distance) + .set_name("edit_distance") + .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144}) + .add_int64_axis("row_width", {8, 16, 32, 64, 128, 256}); diff --git a/cpp/benchmarks/text/hash_ngrams.cpp b/cpp/benchmarks/text/hash_ngrams.cpp new file mode 100644 index 0000000..5bbd2fc --- /dev/null +++ b/cpp/benchmarks/text/hash_ngrams.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +#include + +#include + +static void bench_hash_ngrams(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const ngrams = static_cast(state.get_int64("ngrams")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const strings_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const strings_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile); + cudf::strings_column_view input(strings_table->view().column(0)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); + // output are hashes: approximate total number of hashes + state.add_global_memory_writes(num_rows * ngrams); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::hash_character_ngrams(input, ngrams); + }); +} + +NVBENCH_BENCH(bench_hash_ngrams) + .set_name("hash_ngrams") + .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144}) + .add_int64_axis("row_width", {128, 512, 2048}) + .add_int64_axis("ngrams", {5, 10}); diff --git a/cpp/benchmarks/text/jaccard.cpp b/cpp/benchmarks/text/jaccard.cpp new file mode 100644 index 0000000..70470b8 --- /dev/null +++ b/cpp/benchmarks/text/jaccard.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +#include + +#include + +static void bench_jaccard(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const substring_width = static_cast(state.get_int64("substring_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const strings_profile = + data_profile_builder() + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .no_validity(); + auto const input_table = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile); + cudf::strings_column_view input1(input_table->view().column(0)); + cudf::strings_column_view input2(input_table->view().column(1)); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = input1.chars_size() + input2.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::jaccard_index(input1, input2, substring_width); + }); +} + +NVBENCH_BENCH(bench_jaccard) + .set_name("jaccard") + .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144}) + .add_int64_axis("row_width", {128, 512, 2048}) + .add_int64_axis("substring_width", {5, 10}); diff --git a/cpp/benchmarks/text/minhash.cpp b/cpp/benchmarks/text/minhash.cpp new file mode 100644 index 0000000..1b60caa --- /dev/null +++ b/cpp/benchmarks/text/minhash.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +#include + +#include + +static void bench_minhash(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const hash_width = static_cast(state.get_int64("hash_width")); + auto const seed_count = static_cast(state.get_int64("seed_count")); + auto const base64 = state.get_int64("hash_type") == 64; + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const strings_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const strings_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile); + cudf::strings_column_view input(strings_table->view().column(0)); + + data_profile const seeds_profile = data_profile_builder().null_probability(0).distribution( + cudf::type_to_id(), distribution_id::NORMAL, 0, row_width); + auto const seed_type = base64 ? cudf::type_id::UINT64 : cudf::type_id::UINT32; + auto const seeds_table = create_random_table({seed_type}, row_count{seed_count}, seeds_profile); + auto seeds = seeds_table->get_column(0); + seeds.set_null_mask(rmm::device_buffer{}, 0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); // output are hashes + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = base64 ? nvtext::minhash64(input, seeds.view(), hash_width) + : nvtext::minhash(input, seeds.view(), hash_width); + }); +} + +NVBENCH_BENCH(bench_minhash) + .set_name("minhash") + .add_int64_axis("num_rows", {1024, 8192, 16364, 131072}) + .add_int64_axis("row_width", {128, 512, 2048}) + .add_int64_axis("hash_width", {5, 10}) + .add_int64_axis("seed_count", {2, 26}) + .add_int64_axis("hash_type", {32, 64}); diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp new file mode 100644 index 0000000..f3fd5cc --- /dev/null +++ b/cpp/benchmarks/text/ngrams.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +class TextNGrams : public cudf::benchmark {}; + +enum class ngrams_type { tokens, characters }; + +static void BM_ngrams(benchmark::State& state, ngrams_type nt) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::strings_column_view input(column->view()); + auto const separator = cudf::string_scalar("_"); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + switch (nt) { + case ngrams_type::tokens: nvtext::generate_ngrams(input, 2, separator); break; + case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 5; + int const max_rowlen = 40; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextNGrams, name) \ + (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ + BENCHMARK_REGISTER_F(TextNGrams, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(tokens) +NVTEXT_BENCHMARK_DEFINE(characters) diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp new file mode 100644 index 0000000..6878fa4 --- /dev/null +++ b/cpp/benchmarks/text/normalize.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +#include + +static void bench_normalize(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const normalize_type = state.get_string("type"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + cudf::strings_column_view input(column->view()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); + + if (normalize_type == "spaces") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::normalize_spaces(input); }); + } else { + bool const to_lower = (normalize_type == "to_lower"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::normalize_characters(input, to_lower); + }); + } +} + +NVBENCH_BENCH(bench_normalize) + .set_name("normalize") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("type", {"spaces", "characters", "to_lower"}); diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp new file mode 100644 index 0000000..257f62a --- /dev/null +++ b/cpp/benchmarks/text/replace.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +#include + +#include + +#include + +static void bench_replace(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + std::vector words{" ", "one ", "two ", "three ", "four ", + "five ", "six ", "sevén ", "eight ", "nine ", + "ten ", "eleven ", "twelve ", "thirteen ", "fourteen ", + "fifteen ", "sixteen ", "seventeen ", "eighteen ", "nineteen "}; + + std::default_random_engine generator; + std::uniform_int_distribution tokens_dist(0, words.size() - 1); + std::string row; // build a row of random tokens + while (static_cast(row.size()) < row_width) + row += words[tokens_dist(generator)]; + + std::uniform_int_distribution position_dist(0, 16); + + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto idx) { return row.c_str() + position_dist(generator); }); + cudf::test::strings_column_wrapper input(elements, elements + num_rows); + cudf::strings_column_view view(input); + + cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"}); + cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"}); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = view.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::replace_tokens( + view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); + }); +} + +NVBENCH_BENCH(bench_replace) + .set_name("replace") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp new file mode 100644 index 0000000..1dd7322 --- /dev/null +++ b/cpp/benchmarks/text/subword.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +static std::string create_hash_vocab_file() +{ + std::string dir_template{std::filesystem::temp_directory_path().string()}; + if (char const* env_p = std::getenv("WORKSPACE")) dir_template = env_p; + std::string hash_file = dir_template + "/hash_vocab.txt"; + // create a fake hashed vocab text file for this test + // this only works with words in the strings in the benchmark code below + std::vector> coefficients(23, {65559, 0}); + std::ofstream outfile(hash_file, std::ofstream::out); + outfile << "1\n0\n" << coefficients.size() << "\n"; + for (auto c : coefficients) + outfile << c.first << " " << c.second << "\n"; + std::vector hash_table(23, 0); + outfile << hash_table.size() << "\n"; + hash_table[0] = 3015668L; + hash_table[1] = 6205475701751155871L; + hash_table[5] = 6358029; + hash_table[16] = 451412625363L; + hash_table[20] = 6206321707968235495L; + for (auto h : hash_table) + outfile << h << "\n"; + outfile << "100\n101\n102\n\n"; + return hash_file; +} + +static void BM_subword_tokenizer(benchmark::State& state) +{ + auto const nrows = static_cast(state.range(0)); + std::vector h_strings(nrows, "This is a test "); + cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end()); + std::string hash_file = create_hash_vocab_file(); + std::vector offsets{14}; + uint32_t max_sequence_length = 64; + uint32_t stride = 48; + uint32_t do_truncate = 0; + uint32_t do_lower = 1; + // + auto vocab = nvtext::load_vocabulary_file(hash_file); + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto result = nvtext::subword_tokenize(cudf::strings_column_view{strings}, + *vocab, + max_sequence_length, + stride, + do_lower, + do_truncate); + } +} + +class Subword : public cudf::benchmark {}; + +#define SUBWORD_BM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Subword, name)(::benchmark::State & state) { BM_subword_tokenizer(state); } \ + BENCHMARK_REGISTER_F(Subword, name) \ + ->RangeMultiplier(2) \ + ->Range(1 << 10, 1 << 17) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SUBWORD_BM_BENCHMARK_DEFINE(BM_subword_tokenizer); + +// BENCHMARK_MAIN(); diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp new file mode 100644 index 0000000..b556a84 --- /dev/null +++ b/cpp/benchmarks/text/tokenize.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +static void bench_tokenize(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const tokenize_type = state.get_string("type"); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + cudf::strings_column_view input(column->view()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + auto chars_size = input.chars_size(); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); + + if (tokenize_type == "whitespace") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::tokenize(input); }); + } else if (tokenize_type == "multi") { + cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::tokenize(input, cudf::strings_column_view(delimiters)); + }); + } else if (tokenize_type == "count") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::count_tokens(input); }); + } else if (tokenize_type == "count_multi") { + cudf::test::strings_column_wrapper delimiters({" ", "+", "-"}); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters)); + }); + } else if (tokenize_type == "ngrams") { + auto const delimiter = cudf::string_scalar(""); + auto const separator = cudf::string_scalar("_"); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = nvtext::ngrams_tokenize(input, 2, delimiter, separator); + }); + } else if (tokenize_type == "characters") { + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); }); + } +} + +NVBENCH_BENCH(bench_tokenize) + .set_name("tokenize") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"}); diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp new file mode 100644 index 0000000..2f41bda --- /dev/null +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include + +static void BM_transpose(benchmark::State& state) +{ + auto count = state.range(0); + auto int_column_generator = + thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { + return cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, count, cudf::mask_state::ALL_VALID); + }); + + auto input_table = cudf::table(std::vector(int_column_generator, int_column_generator + count)); + auto input = input_table.view(); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto output = cudf::transpose(input); + } +} + +class Transpose : public cudf::benchmark {}; + +#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) { BM_transpose(state); } \ + BENCHMARK_REGISTER_F(Transpose, name) \ + ->RangeMultiplier(4) \ + ->Range(4, 4 << 13) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +TRANSPOSE_BM_BENCHMARK_DEFINE(transpose_simple); diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu new file mode 100644 index 0000000..3f985cf --- /dev/null +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +enum DispatchingType { HOST_DISPATCHING, DEVICE_DISPATCHING, NO_DISPATCHING }; + +enum FunctorType { BANDWIDTH_BOUND, COMPUTE_BOUND }; + +template +struct Functor { + static __device__ NotFloat f(NotFloat x) { return x; } +}; + +template +struct Functor>> { + static __device__ Float f(Float x) + { + if (ft == BANDWIDTH_BOUND) { + return x + static_cast(1) - static_cast(1); + } else { + for (int i = 0; i < 1000; i++) { + x = (x * x + static_cast(1)) - x * x - static_cast(1); + } + return x; + } + } +}; + +constexpr int block_size = 256; + +// This is for NO_DISPATCHING +template +__global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_type n_cols) +{ + using F = Functor; + cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x; + while (index < n_rows) { + for (int c = 0; c < n_cols; c++) { + A[c][index] = F::f(A[c][index]); + } + index += blockDim.x * gridDim.x; + } +} + +// This is for HOST_DISPATCHING +template +__global__ void host_dispatching_kernel(cudf::mutable_column_device_view source_column) +{ + using F = Functor; + T* A = source_column.data(); + cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x; + while (index < source_column.size()) { + A[index] = F::f(A[index]); + index += blockDim.x * gridDim.x; + } +} + +template +struct ColumnHandle { + template ())> + void operator()(cudf::mutable_column_device_view source_column, int work_per_thread) + { + cudf::detail::grid_1d grid_config{source_column.size(), block_size}; + int grid_size = grid_config.num_blocks; + // Launch the kernel. + host_dispatching_kernel<<>>(source_column); + } + + template ())> + void operator()(cudf::mutable_column_device_view source_column, int work_per_thread) + { + CUDF_FAIL("Invalid type to benchmark."); + } +}; + +// The following is for DEVICE_DISPATCHING: +// The dispatching is done on device. The loop loops over +// each row (across different columns). Type is dispatched each time +// a column is visited so the total number of dispatching is +// n_rows * n_cols. +template +struct RowHandle { + template ())> + __device__ void operator()(cudf::mutable_column_device_view source, cudf::size_type index) + { + using F = Functor; + source.data()[index] = F::f(source.data()[index]); + } + + template ())> + __device__ void operator()(cudf::mutable_column_device_view source, cudf::size_type index) + { + CUDF_UNREACHABLE("Unsupported type."); + } +}; + +// This is for DEVICE_DISPATCHING +template +__global__ void device_dispatching_kernel(cudf::mutable_table_device_view source) +{ + cudf::size_type const n_rows = source.num_rows(); + cudf::size_type index = threadIdx.x + blockIdx.x * blockDim.x; + + while (index < n_rows) { + for (cudf::size_type i = 0; i < source.num_columns(); i++) { + cudf::type_dispatcher( + source.column(i).type(), RowHandle{}, source.column(i), index); + } + index += blockDim.x * gridDim.x; + } // while +} + +template +void launch_kernel(cudf::mutable_table_view input, T** d_ptr, int work_per_thread) +{ + cudf::size_type const n_rows = input.num_rows(); + cudf::size_type const n_cols = input.num_columns(); + + cudf::detail::grid_1d grid_config{n_rows, block_size}; + int grid_size = grid_config.num_blocks; + + if (dispatching_type == HOST_DISPATCHING) { + // std::vector v_stream(n_cols); + for (int c = 0; c < n_cols; c++) { + auto d_column = cudf::mutable_column_device_view::create(input.column(c)); + cudf::type_dispatcher( + d_column->type(), ColumnHandle{}, *d_column, work_per_thread); + } + } else if (dispatching_type == DEVICE_DISPATCHING) { + auto d_table_view = cudf::mutable_table_device_view::create(input); + auto f = device_dispatching_kernel; + // Launch the kernel + f<<>>(*d_table_view); + } else if (dispatching_type == NO_DISPATCHING) { + auto f = no_dispatching_kernel; + // Launch the kernel + f<<>>(d_ptr, n_rows, n_cols); + } +} + +template +void type_dispatcher_benchmark(::benchmark::State& state) +{ + auto const n_cols = static_cast(state.range(0)); + auto const source_size = static_cast(state.range(1)); + auto const work_per_thread = static_cast(state.range(2)); + + auto init = cudf::make_fixed_width_scalar(static_cast(0)); + + std::vector> source_column_wrappers; + std::vector source_columns; + + for (int i = 0; i < n_cols; ++i) { + source_column_wrappers.push_back(cudf::sequence(source_size, *init)); + source_columns.push_back(*source_column_wrappers[i]); + } + cudf::mutable_table_view source_table{source_columns}; + + // For no dispatching + std::vector h_vec(n_cols); + std::vector h_vec_p(n_cols); + std::transform(h_vec.begin(), h_vec.end(), h_vec_p.begin(), [source_size](auto& col) { + col.resize(source_size * sizeof(TypeParam), cudf::get_default_stream()); + return static_cast(col.data()); + }); + rmm::device_uvector d_vec(n_cols, cudf::get_default_stream()); + + if (dispatching_type == NO_DISPATCHING) { + CUDF_CUDA_TRY( + cudaMemcpy(d_vec.data(), h_vec_p.data(), sizeof(TypeParam*) * n_cols, cudaMemcpyDefault)); + } + + // Warm up + launch_kernel(source_table, d_vec.data(), work_per_thread); + CUDF_CUDA_TRY(cudaDeviceSynchronize()); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + launch_kernel(source_table, d_vec.data(), work_per_thread); + } + + state.SetBytesProcessed(static_cast(state.iterations()) * source_size * n_cols * 2 * + sizeof(TypeParam)); +} + +class TypeDispatcher : public cudf::benchmark {}; + +#define TBM_BENCHMARK_DEFINE(name, TypeParam, functor_type, dispatching_type) \ + BENCHMARK_DEFINE_F(TypeDispatcher, name)(::benchmark::State & state) \ + { \ + type_dispatcher_benchmark(state); \ + } \ + BENCHMARK_REGISTER_F(TypeDispatcher, name) \ + ->RangeMultiplier(2) \ + ->Ranges({{1, 8}, {1 << 10, 1 << 26}, {1, 1}}) \ + ->UseManualTime(); + +TBM_BENCHMARK_DEFINE(fp64_bandwidth_host, double, BANDWIDTH_BOUND, HOST_DISPATCHING); +TBM_BENCHMARK_DEFINE(fp64_bandwidth_device, double, BANDWIDTH_BOUND, DEVICE_DISPATCHING); +TBM_BENCHMARK_DEFINE(fp64_bandwidth_no, double, BANDWIDTH_BOUND, NO_DISPATCHING); +TBM_BENCHMARK_DEFINE(fp64_compute_host, double, COMPUTE_BOUND, HOST_DISPATCHING); +TBM_BENCHMARK_DEFINE(fp64_compute_device, double, COMPUTE_BOUND, DEVICE_DISPATCHING); +TBM_BENCHMARK_DEFINE(fp64_compute_no, double, COMPUTE_BOUND, NO_DISPATCHING); diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake new file mode 100644 index 0000000..f79e4c3 --- /dev/null +++ b/cpp/cmake/Modules/ConfigureCUDA.cmake @@ -0,0 +1,47 @@ +# ============================================================================= +# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +if(CMAKE_COMPILER_IS_GNUCXX) + list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) +endif() + +list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) + +# set warnings as errors +if(CUDA_WARNINGS_AS_ERRORS) + list(APPEND CUDF_CUDA_FLAGS -Werror=all-warnings) +else() + list(APPEND CUDF_CUDA_FLAGS -Werror=cross-execution-space-call) +endif() +list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) + +if(DISABLE_DEPRECATION_WARNINGS) + list(APPEND CUDF_CXX_FLAGS -Wno-deprecated-declarations) + list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations) +endif() + +# make sure we produce smallest binary size +list(APPEND CUDF_CUDA_FLAGS -Xfatbin=-compress-all) + +# Option to enable line info in CUDA device compilation to allow introspection when profiling / +# memchecking +if(CUDA_ENABLE_LINEINFO) + list(APPEND CUDF_CUDA_FLAGS -lineinfo) +endif() + +# Debug options +if(CMAKE_BUILD_TYPE MATCHES Debug) + message(VERBOSE "CUDF: Building with debugging flags") + list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-rdynamic) +endif() diff --git a/cpp/cmake/Modules/FindcuFile.cmake b/cpp/cmake/Modules/FindcuFile.cmake new file mode 100644 index 0000000..1df4f12 --- /dev/null +++ b/cpp/cmake/Modules/FindcuFile.cmake @@ -0,0 +1,120 @@ +# ============================================================================= +# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +#[=======================================================================[.rst: +FindcuFile +---------- + +Find cuFile headers and libraries. + +Imported Targets +^^^^^^^^^^^^^^^^ + +``cufile::cuFile`` + The cuFile library, if found. +``cufile::cuFileRDMA`` + The cuFile RDMA library, if found. + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables in your project: + +``cuFile_FOUND`` + true if (the requested version of) cuFile is available. +``cuFile_VERSION`` + the version of cuFile. +``cuFile_LIBRARIES`` + the libraries to link against to use cuFile. +``cuFileRDMA_LIBRARIES`` + the libraries to link against to use cuFile RDMA. +``cuFile_INCLUDE_DIRS`` + where to find the cuFile headers. +``cuFile_COMPILE_OPTIONS`` + this should be passed to target_compile_options(), if the + target is not used for linking + +#]=======================================================================] + +# use pkg-config to get the directories and then use these values in the FIND_PATH() and +# FIND_LIBRARY() calls +find_package(PkgConfig QUIET) +pkg_check_modules(PKG_cuFile QUIET cuFile) + +set(cuFile_COMPILE_OPTIONS ${PKG_cuFile_CFLAGS_OTHER}) +set(cuFile_VERSION ${PKG_cuFile_VERSION}) + +# Find the location of the CUDA Toolkit +find_package(CUDAToolkit QUIET) +find_path( + cuFile_INCLUDE_DIR + NAMES cufile.h + HINTS ${PKG_cuFile_INCLUDE_DIRS} ${CUDAToolkit_INCLUDE_DIRS} +) + +find_library( + cuFile_LIBRARY + NAMES cufile + HINTS ${PKG_cuFile_LIBRARY_DIRS} ${CUDAToolkit_LIBRARY_DIR} +) + +find_library( + cuFileRDMA_LIBRARY + NAMES cufile_rdma + HINTS ${PKG_cuFile_LIBRARY_DIRS} ${CUDAToolkit_LIBRARY_DIR} +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + cuFile + FOUND_VAR cuFile_FOUND + REQUIRED_VARS cuFile_LIBRARY cuFileRDMA_LIBRARY cuFile_INCLUDE_DIR + VERSION_VAR cuFile_VERSION +) + +if(cuFile_INCLUDE_DIR AND NOT TARGET cufile::cuFile_interface) + add_library(cufile::cuFile_interface INTERFACE IMPORTED GLOBAL) + target_include_directories( + cufile::cuFile_interface INTERFACE "$" + ) + target_compile_options(cufile::cuFile_interface INTERFACE "${cuFile_COMPILE_OPTIONS}") + target_compile_definitions(cufile::cuFile_interface INTERFACE CUFILE_FOUND) +endif() + +if(cuFile_FOUND AND NOT TARGET cufile::cuFile) + add_library(cufile::cuFile UNKNOWN IMPORTED GLOBAL) + set_target_properties( + cufile::cuFile + PROPERTIES IMPORTED_LOCATION "${cuFile_LIBRARY}" + INTERFACE_COMPILE_OPTIONS "${cuFile_COMPILE_OPTIONS}" + INTERFACE_INCLUDE_DIRECTORIES "${cuFile_INCLUDE_DIR}" + ) +endif() + +if(cuFile_FOUND AND NOT TARGET cufile::cuFileRDMA) + add_library(cufile::cuFileRDMA UNKNOWN IMPORTED GLOBAL) + set_target_properties( + cufile::cuFileRDMA + PROPERTIES IMPORTED_LOCATION "${cuFileRDMA_LIBRARY}" + INTERFACE_COMPILE_OPTIONS "${cuFile_COMPILE_OPTIONS}" + INTERFACE_INCLUDE_DIRECTORIES "${cuFile_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced(cuFile_LIBRARY cuFileRDMA_LIBRARY cuFile_INCLUDE_DIR) + +if(cuFile_FOUND) + set(cuFile_LIBRARIES ${cuFile_LIBRARY}) + set(cuFileRDMA_LIBRARIES ${cuFileRDMA_LIBRARY}) + set(cuFile_INCLUDE_DIRS ${cuFile_INCLUDE_DIR}) +endif() diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake new file mode 100644 index 0000000..df285bd --- /dev/null +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -0,0 +1,66 @@ +# ============================================================================= +# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Create `jitify_preprocess` executable +add_executable(jitify_preprocess "${JITIFY_INCLUDE_DIR}/jitify2_preprocess.cpp") + +target_compile_definitions(jitify_preprocess PRIVATE "_FILE_OFFSET_BITS=64") +target_link_libraries(jitify_preprocess CUDA::cudart ${CMAKE_DL_LIBS}) + +# Take a list of files to JIT-compile and run them through jitify_preprocess. +function(jit_preprocess_files) + cmake_parse_arguments(ARG "" "SOURCE_DIRECTORY" "FILES" ${ARGN}) + + foreach(inc IN LISTS libcudacxx_raw_includes) + list(APPEND libcudacxx_includes "-I${inc}") + endforeach() + foreach(ARG_FILE ${ARG_FILES}) + set(ARG_OUTPUT ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files/${ARG_FILE}.jit.hpp) + get_filename_component(jit_output_directory "${ARG_OUTPUT}" DIRECTORY) + list(APPEND JIT_PREPROCESSED_FILES "${ARG_OUTPUT}") + + # Note: need to pass _FILE_OFFSET_BITS=64 in COMMAND due to a limitation in how conda builds + # glibc + add_custom_command( + OUTPUT ${ARG_OUTPUT} + DEPENDS jitify_preprocess "${ARG_SOURCE_DIRECTORY}/${ARG_FILE}" + WORKING_DIRECTORY ${ARG_SOURCE_DIRECTORY} + VERBATIM + COMMAND ${CMAKE_COMMAND} -E make_directory "${jit_output_directory}" + COMMAND + "${CMAKE_COMMAND}" -E env LD_LIBRARY_PATH=${CUDAToolkit_LIBRARY_DIR} + $ ${ARG_FILE} -o + ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -m -std=c++17 + -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -I${CUDF_SOURCE_DIR}/include + -I${CUDF_SOURCE_DIR}/src ${libcudacxx_includes} -I${CUDAToolkit_INCLUDE_DIRS} + --no-preinclude-workarounds --no-replace-pragma-once + COMMENT "Custom command to JIT-compile files." + ) + endforeach() + set(JIT_PREPROCESSED_FILES + "${JIT_PREPROCESSED_FILES}" + PARENT_SCOPE + ) +endfunction() + +jit_preprocess_files( + SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu transform/jit/kernel.cu + rolling/jit/kernel.cu +) + +add_custom_target( + jitify_preprocess_run + DEPENDS ${JIT_PREPROCESSED_FILES} + COMMENT "Target representing jitified files." +) diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json new file mode 100644 index 0000000..a65afe9 --- /dev/null +++ b/cpp/cmake/config.json @@ -0,0 +1,47 @@ +{ + "parse": { + "additional_commands": { + "CPMFindPackage": { + "kwargs": { + "NAME": 1, + "GITHUB_REPOSITORY": "?", + "GIT_TAG": "?", + "VERSION": "?", + "GIT_SHALLOW": "?", + "OPTIONS": "*", + "FIND_PACKAGE_ARGUMENTS": "*" + } + }, + "ConfigureTest": { + "flags": ["TEST_NAME", "TEST_SRC"], + "kwargs": { + "GPUS": 1, + "PERCENT": 1 + } + }, + "ConfigureBench": { + "flags": ["BENCH_NAME", "BENCH_SRC"] + } + } + }, + "format": { + "line_width": 100, + "tab_size": 2, + "command_case": "unchanged", + "max_lines_hwrap": 1, + "max_pargs_hwrap": 999, + "dangle_parens": true + }, + "lint": { + "disabled_codes": ["C0301"], + "function_pattern": "[0-9A-z_]+", + "macro_pattern": "[0-9A-z_]+", + "global_var_pattern": "[A-z][0-9A-z_]+", + "internal_var_pattern": "_[A-z][0-9A-z_]+", + "local_var_pattern": "[A-z][A-z0-9_]+", + "private_var_pattern": "_[0-9A-z_]+", + "public_var_pattern": "[A-z][0-9A-z_]+", + "argument_var_pattern": "[A-z][A-z0-9_]+", + "keyword_pattern": "[A-z][0-9A-z_]+" + } +} diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake new file mode 100644 index 0000000..894dc96 --- /dev/null +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -0,0 +1,414 @@ +# ============================================================================= +# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Finding arrow is far more complex than it should be, and as a result we violate multiple linting +# rules aiming to limit complexity. Since all our other CMake scripts conform to expectations +# without undue difficulty, disabling those rules for just this function is our best approach for +# now. The spacing between this comment, the cmake-lint directives, and the function docstring is +# necessary to prevent cmake-format from trying to combine the lines. + +# cmake-lint: disable=R0912,R0913,R0915 + +include_guard(GLOBAL) + +# Generate a FindArrow module for the case where we need to search for arrow within a pip install +# pyarrow. +function(find_libarrow_in_python_wheel PYARROW_VERSION) + string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}") + list(GET PYARROW_VER_COMPONENTS 0 PYARROW_SO_VER) + # The soname for Arrow libraries is constructed using the major version plus "00". Note that, + # although it may seem like it due to Arrow almost exclusively releasing new major versions (i.e. + # `${MINOR_VERSION}${PATCH_VERSION}` is almost always equivalent to "00"), + # the soname is not generated by concatenating the major, minor, and patch versions into a single + # version number soname, just `${MAJOR_VERSION}00` + set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") + + find_package(Python REQUIRED) + execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" + OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") + rapids_find_generate_module( + Arrow NO_CONFIG + VERSION "${PYARROW_VERSION}" + LIBRARY_NAMES "${PYARROW_LIB}" + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports + HEADER_NAMES arrow/python/arrow_to_pandas.h + ) + + find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) + add_library(arrow_shared ALIAS Arrow::Arrow) + + # When using the libarrow inside a wheel we must build libcudf with the old ABI because pyarrow's + # `libarrow.so` is compiled for manylinux2014 (centos7 toolchain) which uses the old ABI. Note + # that these flags will often be redundant because we build wheels in manylinux containers that + # actually have the old libc++ anyway, but setting them explicitly ensures correct and consistent + # behavior in all other cases such as aarch builds on newer manylinux or testing builds in newer + # containers. Note that tests will not build successfully without also propagating these options + # to builds of GTest. Similarly, benchmarks will not work without updating GBench (and possibly + # NVBench) builds. We are currently ignoring these limitations since we don't anticipate using + # this feature except for building wheels. + target_compile_options( + Arrow::Arrow INTERFACE "$<$:-D_GLIBCXX_USE_CXX11_ABI=0>" + "$<$:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>" + ) + + rapids_export_package(BUILD Arrow cudf-exports) + rapids_export_package(INSTALL Arrow cudf-exports) + + list(POP_BACK CMAKE_PREFIX_PATH) +endfunction() + +# This function finds arrow and sets any additional necessary environment variables. +function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON + ENABLE_PARQUET +) + + if(USE_LIBARROW_FROM_PYARROW) + # Generate a FindArrow.cmake to find pyarrow's libarrow.so + find_libarrow_in_python_wheel(${VERSION}) + set(ARROW_FOUND + TRUE + PARENT_SCOPE + ) + set(ARROW_LIBRARIES + arrow_shared + PARENT_SCOPE + ) + return() + endif() + + if(BUILD_STATIC) + if(TARGET arrow_static) + set(ARROW_FOUND + TRUE + PARENT_SCOPE + ) + set(ARROW_LIBRARIES + arrow_static + PARENT_SCOPE + ) + return() + endif() + else() + if(TARGET arrow_shared) + set(ARROW_FOUND + TRUE + PARENT_SCOPE + ) + set(ARROW_LIBRARIES + arrow_shared + PARENT_SCOPE + ) + return() + endif() + endif() + + if(NOT ARROW_ARMV8_ARCH) + set(ARROW_ARMV8_ARCH "armv8-a") + endif() + + if(NOT ARROW_SIMD_LEVEL) + set(ARROW_SIMD_LEVEL "NONE") + endif() + + if(BUILD_STATIC) + set(ARROW_BUILD_STATIC ON) + set(ARROW_BUILD_SHARED OFF) + # Turn off CPM using `find_package` so we always download and make sure we get proper static + # library. + set(CPM_DOWNLOAD_Arrow TRUE) + # By default ARROW will try to search for a static version of OpenSSL which is a bad idea given + # that shared linking is advised for critical components like SSL. If a static build is + # requested, we honor ARROW's default of static linking, but users may consider setting + # ARROW_OPENSSL_USE_SHARED even in static builds. + else() + set(ARROW_BUILD_SHARED ON) + set(ARROW_BUILD_STATIC OFF) + # By default ARROW will try to search for a static version of OpenSSL which is a bad idea given + # that shared linking is advised for critical components like SSL + set(ARROW_OPENSSL_USE_SHARED ON) + endif() + + set(ARROW_PYTHON_OPTIONS "") + if(ENABLE_PYTHON) + list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON") + # Arrow's logic to build Boost from source is busted, so we have to get it from the system. + list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM") + list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO") + endif() + + set(ARROW_PARQUET_OPTIONS "") + if(ENABLE_PARQUET) + # Arrow's logic to build Boost from source is busted, so we have to get it from the system. + list(APPEND ARROW_PARQUET_OPTIONS "BOOST_SOURCE SYSTEM") + list(APPEND ARROW_PARQUET_OPTIONS "Thrift_SOURCE BUNDLED") + list(APPEND ARROW_PARQUET_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO") + endif() + + rapids_cpm_find( + Arrow ${VERSION} + GLOBAL_TARGETS arrow_shared parquet_shared arrow_acero_shared arrow_dataset_shared arrow_static + parquet_static arrow_acero_static arrow_dataset_static + CPM_ARGS + GIT_REPOSITORY https://github.com/apache/arrow.git + GIT_TAG apache-arrow-${VERSION} + GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + OPTIONS "CMAKE_VERBOSE_MAKEFILE ON" + "ARROW_ACERO ON" + "ARROW_IPC ON" + "ARROW_DATASET ON" + "ARROW_WITH_BACKTRACE ON" + "ARROW_CXXFLAGS -w" + "ARROW_JEMALLOC OFF" + "ARROW_S3 ${ENABLE_S3}" + "ARROW_ORC ${ENABLE_ORC}" + # e.g. needed by blazingsql-io + ${ARROW_PARQUET_OPTIONS} + "ARROW_PARQUET ${ENABLE_PARQUET}" + "ARROW_FILESYSTEM ON" + ${ARROW_PYTHON_OPTIONS} + # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off + "ARROW_USE_CCACHE OFF" + "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}" + "ARROW_SIMD_LEVEL ${ARROW_SIMD_LEVEL}" + "ARROW_BUILD_STATIC ${ARROW_BUILD_STATIC}" + "ARROW_BUILD_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_POSITION_INDEPENDENT_CODE ON" + "ARROW_DEPENDENCY_USE_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_BOOST_USE_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_BROTLI_USE_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_GFLAGS_USE_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_GRPC_USE_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_PROTOBUF_USE_SHARED ${ARROW_BUILD_SHARED}" + "ARROW_ZSTD_USE_SHARED ${ARROW_BUILD_SHARED}" + "xsimd_SOURCE AUTO" + ) + + set(ARROW_FOUND + TRUE + PARENT_SCOPE + ) + + if(BUILD_STATIC) + set(ARROW_LIBRARIES arrow_static) + else() + set(ARROW_LIBRARIES arrow_shared) + endif() + + # Arrow_DIR: set if CPM found Arrow on the system/conda/etc. + if(Arrow_DIR) + # This extra find_package is necessary because rapids_cpm_find does not propagate all the + # variables from find_package that we might need. This is especially problematic when + # rapids_cpm_find builds from source. + find_package(Arrow REQUIRED QUIET) + if(ENABLE_PARQUET) + # Setting Parquet_DIR is conditional because parquet may be installed independently of arrow. + if(NOT Parquet_DIR) + # Set this to enable `find_package(Parquet)` + set(Parquet_DIR "${Arrow_DIR}") + endif() + # Set this to enable `find_package(ArrowDataset)`. This will call find_package(ArrowAcero) for + # us + set(ArrowDataset_DIR "${Arrow_DIR}") + find_package(ArrowDataset REQUIRED QUIET) + endif() + # Arrow_ADDED: set if CPM downloaded Arrow from Github + elseif(Arrow_ADDED) + # Copy these files so we can avoid adding paths in Arrow_BINARY_DIR to + # target_include_directories. That defeats ccache. + file(INSTALL "${Arrow_BINARY_DIR}/src/arrow/util/config.h" + DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/util" + ) + if(ENABLE_PARQUET) + file(INSTALL "${Arrow_BINARY_DIR}/src/parquet/parquet_version.h" + DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/parquet" + ) + endif() + # Arrow populates INTERFACE_INCLUDE_DIRECTORIES for the `arrow_static` and `arrow_shared` + # targets in FindArrow, so for static source-builds, we have to do it after-the-fact. + # + # This only works because we know exactly which components we're using. Don't forget to update + # this list if we add more! + # + foreach(ARROW_LIBRARY ${ARROW_LIBRARIES}) + target_include_directories( + ${ARROW_LIBRARY} + INTERFACE "$" + "$" + "$" + "$" + ) + endforeach() + else() + set(ARROW_FOUND + FALSE + PARENT_SCOPE + ) + message(FATAL_ERROR "CUDF: Arrow library not found or downloaded.") + endif() + + if(Arrow_ADDED) + + set(arrow_code_string + [=[ + if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) + add_library(arrow_shared ALIAS cudf::arrow_shared) + endif() + if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static)) + add_library(arrow_static ALIAS cudf::arrow_static) + endif() + if (NOT TARGET arrow::flatbuffers) + add_library(arrow::flatbuffers INTERFACE IMPORTED) + endif() + if (NOT TARGET arrow::hadoop) + add_library(arrow::hadoop INTERFACE IMPORTED) + endif() + ]=] + ) + if(ENABLE_PARQUET) + string( + APPEND + arrow_code_string + " + find_package(Boost) + if (NOT TARGET Boost::headers) + add_library(Boost::headers INTERFACE IMPORTED) + endif() + " + ) + endif() + if(NOT TARGET xsimd) + string( + APPEND + arrow_code_string + " + if(NOT TARGET arrow::xsimd) + add_library(arrow::xsimd INTERFACE IMPORTED) + target_include_directories(arrow::xsimd INTERFACE \"${Arrow_BINARY_DIR}/xsimd_ep/src/xsimd_ep-install/include\") + endif() + " + ) + endif() + + rapids_export( + BUILD Arrow + VERSION ${VERSION} + EXPORT_SET arrow_targets + GLOBAL_TARGETS arrow_shared arrow_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_code_string + ) + + if(ENABLE_PARQUET) + + set(arrow_acero_code_string + [=[ + if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared)) + add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared) + endif() + if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static)) + add_library(arrow_acero_static ALIAS cudf::arrow_acero_static) + endif() + ]=] + ) + + rapids_export( + BUILD ArrowAcero + VERSION ${VERSION} + EXPORT_SET arrow_acero_targets + GLOBAL_TARGETS arrow_acero_shared arrow_acero_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_acero_code_string + ) + + set(arrow_dataset_code_string + [=[ + if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) + add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) + endif() + if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) + add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) + endif() + ]=] + ) + + rapids_export( + BUILD ArrowDataset + VERSION ${VERSION} + EXPORT_SET arrow_dataset_targets + GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_dataset_code_string + ) + + set(parquet_code_string + [=[ + if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) + add_library(parquet_shared ALIAS cudf::parquet_shared) + endif() + if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) + add_library(parquet_static ALIAS cudf::parquet_static) + endif() + ]=] + ) + + rapids_export( + BUILD Parquet + VERSION ${VERSION} + EXPORT_SET parquet_targets + GLOBAL_TARGETS parquet_shared parquet_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK parquet_code_string + ) + endif() + endif() + # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency` + rapids_export_package(BUILD Arrow cudf-exports) + rapids_export_package(INSTALL Arrow cudf-exports) + + if(ENABLE_PARQUET) + rapids_export_package(BUILD Parquet cudf-exports) + rapids_export_package(BUILD ArrowDataset cudf-exports) + endif() + + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + if(ENABLE_PARQUET) + rapids_export_find_package_root(BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + rapids_export_find_package_root(BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + endif() + + set(ARROW_LIBRARIES + "${ARROW_LIBRARIES}" + PARENT_SCOPE + ) +endfunction() + +if(NOT DEFINED CUDF_VERSION_Arrow) + set(CUDF_VERSION_Arrow + # This version must be kept in sync with the libarrow version pinned for builds in + # dependencies.yaml. + 12.0.1 + CACHE STRING "The version of Arrow to find (or build)" + ) +endif() + +find_and_configure_arrow( + ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} + ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} +) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake new file mode 100644 index 0000000..9758958 --- /dev/null +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -0,0 +1,25 @@ +# ============================================================================= +# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds cuCollections and performs any additional configuration. +function(find_and_configure_cucollections) + include(${rapids-cmake-dir}/cpm/cuco.cmake) + if(BUILD_SHARED_LIBS) + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports) + else() + rapids_cpm_cuco(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) + endif() +endfunction() + +find_and_configure_cucollections() diff --git a/cpp/cmake/thirdparty/get_cufile.cmake b/cpp/cmake/thirdparty/get_cufile.cmake new file mode 100644 index 0000000..c0235eb --- /dev/null +++ b/cpp/cmake/thirdparty/get_cufile.cmake @@ -0,0 +1,32 @@ +# ============================================================================= +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds nvcomp and sets any additional necessary environment variables. +function(find_and_configure_cufile) + + list(APPEND CMAKE_MODULE_PATH ${CUDF_SOURCE_DIR}/cmake/Modules) + rapids_find_package(cuFile) + + if(cuFile_FOUND AND NOT BUILD_SHARED_LIBS) + include("${rapids-cmake-dir}/export/find_package_file.cmake") + rapids_export_find_package_file( + BUILD "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + ) + rapids_export_find_package_file( + INSTALL "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + ) + endif() +endfunction() + +find_and_configure_cufile() diff --git a/cpp/cmake/thirdparty/get_dlpack.cmake b/cpp/cmake/thirdparty/get_dlpack.cmake new file mode 100644 index 0000000..65b5f4f --- /dev/null +++ b/cpp/cmake/thirdparty/get_dlpack.cmake @@ -0,0 +1,41 @@ +# ============================================================================= +# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds dlpack and sets any additional necessary environment variables. +function(find_and_configure_dlpack VERSION) + + include(${rapids-cmake-dir}/find/generate_module.cmake) + rapids_find_generate_module(DLPACK HEADER_NAMES dlpack.h) + + rapids_cpm_find( + dlpack ${VERSION} + GIT_REPOSITORY https://github.com/dmlc/dlpack.git + GIT_TAG v${VERSION} + GIT_SHALLOW TRUE + DOWNLOAD_ONLY TRUE + OPTIONS "BUILD_MOCK OFF" + ) + + if(DEFINED dlpack_SOURCE_DIR) + # otherwise find_package(DLPACK) will set this variable + set(DLPACK_INCLUDE_DIR + "${dlpack_SOURCE_DIR}/include" + PARENT_SCOPE + ) + endif() +endfunction() + +set(CUDF_MIN_VERSION_dlpack 0.5) + +find_and_configure_dlpack(${CUDF_MIN_VERSION_dlpack}) diff --git a/cpp/cmake/thirdparty/get_fmt.cmake b/cpp/cmake/thirdparty/get_fmt.cmake new file mode 100644 index 0000000..083dd1d --- /dev/null +++ b/cpp/cmake/thirdparty/get_fmt.cmake @@ -0,0 +1,22 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone fmt +function(find_and_configure_fmt) + + include(${rapids-cmake-dir}/cpm/fmt.cmake) + rapids_cpm_fmt(INSTALL_EXPORT_SET cudf-exports BUILD_EXPORT_SET cudf-exports) +endfunction() + +find_and_configure_fmt() diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake new file mode 100644 index 0000000..1363f43 --- /dev/null +++ b/cpp/cmake/thirdparty/get_gtest.cmake @@ -0,0 +1,39 @@ +# ============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds gtest and sets any additional necessary environment variables. +function(find_and_configure_gtest) + include(${rapids-cmake-dir}/cpm/gtest.cmake) + + # Find or install GoogleTest + rapids_cpm_gtest(BUILD_EXPORT_SET cudf-testing-exports INSTALL_EXPORT_SET cudf-testing-exports) + + if(GTest_ADDED) + rapids_export( + BUILD GTest + VERSION ${GTest_VERSION} + EXPORT_SET GTestTargets + GLOBAL_TARGETS gtest gmock gtest_main gmock_main + NAMESPACE GTest:: + ) + + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + BUILD GTest [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-testing-exports + ) + endif() + +endfunction() + +find_and_configure_gtest() diff --git a/cpp/cmake/thirdparty/get_jitify.cmake b/cpp/cmake/thirdparty/get_jitify.cmake new file mode 100644 index 0000000..d98abdf --- /dev/null +++ b/cpp/cmake/thirdparty/get_jitify.cmake @@ -0,0 +1,32 @@ +# ============================================================================= +# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Jitify doesn't have a version :/ + +# This function finds Jitify and sets any additional necessary environment variables. +function(find_and_configure_jitify) + rapids_cpm_find( + jitify 2.0.0 + GIT_REPOSITORY https://github.com/rapidsai/jitify.git + GIT_TAG jitify2 + GIT_SHALLOW TRUE + DOWNLOAD_ONLY TRUE + ) + set(JITIFY_INCLUDE_DIR + "${jitify_SOURCE_DIR}" + PARENT_SCOPE + ) +endfunction() + +find_and_configure_jitify() diff --git a/cpp/cmake/thirdparty/get_kvikio.cmake b/cpp/cmake/thirdparty/get_kvikio.cmake new file mode 100644 index 0000000..e94e024 --- /dev/null +++ b/cpp/cmake/thirdparty/get_kvikio.cmake @@ -0,0 +1,36 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds KvikIO +function(find_and_configure_kvikio VERSION) + + rapids_cpm_find( + KvikIO ${VERSION} + GLOBAL_TARGETS kvikio::kvikio + CPM_ARGS + GIT_REPOSITORY https://github.com/rapidsai/kvikio.git + GIT_TAG branch-${VERSION} + GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + OPTIONS "KvikIO_BUILD_EXAMPLES OFF" + ) + + if(KvikIO_BINARY_DIR) + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root(BUILD KvikIO "${KvikIO_BINARY_DIR}" cudf-exports) + endif() + +endfunction() + +set(KVIKIO_MIN_VERSION_cudf "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") +find_and_configure_kvikio(${KVIKIO_MIN_VERSION_cudf}) diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake new file mode 100644 index 0000000..0e03352 --- /dev/null +++ b/cpp/cmake/thirdparty/get_libcudacxx.cmake @@ -0,0 +1,37 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds thrust and sets any additional necessary environment variables. +function(find_and_configure_libcudacxx) + # Make sure we install libcudacxx beside our patched version of thrust + include(GNUInstallDirs) + set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf") + set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib") + + include(${rapids-cmake-dir}/cpm/libcudacxx.cmake) + rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) + + if(libcudacxx_SOURCE_DIR) + # Store where CMake can find our custom Thrust install + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + INSTALL + libcudacxx + [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/libcudacxx]=] + cudf-exports + ) + endif() +endfunction() + +find_and_configure_libcudacxx() diff --git a/cpp/cmake/thirdparty/get_nvbench.cmake b/cpp/cmake/thirdparty/get_nvbench.cmake new file mode 100644 index 0000000..f064214 --- /dev/null +++ b/cpp/cmake/thirdparty/get_nvbench.cmake @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds nvbench and applies any needed patches. +function(find_and_configure_nvbench) + + include(${rapids-cmake-dir}/cpm/nvbench.cmake) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + + set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches") + rapids_cpm_package_override("${cudf_patch_dir}/nvbench_override.json") + + rapids_cpm_nvbench() + +endfunction() + +find_and_configure_nvbench() diff --git a/cpp/cmake/thirdparty/get_nvcomp.cmake b/cpp/cmake/thirdparty/get_nvcomp.cmake new file mode 100644 index 0000000..41bbf44 --- /dev/null +++ b/cpp/cmake/thirdparty/get_nvcomp.cmake @@ -0,0 +1,31 @@ +# ============================================================================= +# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds nvcomp and sets any additional necessary environment variables. +function(find_and_configure_nvcomp) + + include(${rapids-cmake-dir}/cpm/nvcomp.cmake) + rapids_cpm_nvcomp( + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports + USE_PROPRIETARY_BINARY ${CUDF_USE_PROPRIETARY_NVCOMP} + ) + + # Per-thread default stream + if(TARGET nvcomp AND CUDF_USE_PER_THREAD_DEFAULT_STREAM) + target_compile_definitions(nvcomp PRIVATE CUDA_API_PER_THREAD_DEFAULT_STREAM) + endif() +endfunction() + +find_and_configure_nvcomp() diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake new file mode 100644 index 0000000..854bd3d --- /dev/null +++ b/cpp/cmake/thirdparty/get_rmm.cmake @@ -0,0 +1,24 @@ +# ============================================================================= +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds rmm and sets any additional necessary environment variables. +function(find_and_configure_rmm) + include(${rapids-cmake-dir}/cpm/rmm.cmake) + + # Find or install RMM + rapids_cpm_rmm(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports) + +endfunction() + +find_and_configure_rmm() diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake new file mode 100644 index 0000000..fff5b84 --- /dev/null +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -0,0 +1,34 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone speedlog +function(find_and_configure_spdlog) + + include(${rapids-cmake-dir}/cpm/spdlog.cmake) + rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET cudf-exports) + rapids_export_package(BUILD spdlog cudf-exports) + + if(spdlog_ADDED) + rapids_export( + BUILD spdlog + EXPORT_SET spdlog + GLOBAL_TARGETS spdlog spdlog_header_only + NAMESPACE spdlog:: + ) + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root(BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + endif() +endfunction() + +find_and_configure_spdlog() diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake new file mode 100644 index 0000000..39a9de1 --- /dev/null +++ b/cpp/cmake/thirdparty/get_thrust.cmake @@ -0,0 +1,46 @@ +# ============================================================================= +# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds thrust and sets any additional necessary environment variables. +function(find_and_configure_thrust) + + include(${rapids-cmake-dir}/cpm/thrust.cmake) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + + set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches") + rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json") + + # Make sure we install thrust into the `include/libcudf` subdirectory instead of the default + include(GNUInstallDirs) + set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf") + set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib") + + # Find or install Thrust with our custom set of patches + rapids_cpm_thrust( + NAMESPACE cudf + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports + ) + + if(Thrust_SOURCE_DIR) + # Store where CMake can find our custom Thrust install + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + INSTALL Thrust + [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/thrust]=] cudf-exports + ) + endif() +endfunction() + +find_and_configure_thrust() diff --git a/cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff b/cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff new file mode 100644 index 0000000..7c40fd4 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/cub_segmented_sort_with_bool_key.diff @@ -0,0 +1,14 @@ +diff --git a/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh b/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh +index ad65f2a3..ad45a21e 100644 +--- a/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh ++++ b/dependencies/cub/cub/agent/agent_sub_warp_merge_sort.cuh +@@ -221,7 +221,8 @@ public: + using UnsignedBitsT = typename Traits::UnsignedBits; + UnsignedBitsT default_key_bits = IS_DESCENDING ? Traits::LOWEST_KEY + : Traits::MAX_KEY; +- KeyT oob_default = reinterpret_cast(default_key_bits); ++ KeyT oob_default = std::is_same_v ? !IS_DESCENDING ++ : reinterpret_cast(default_key_bits); + + WarpLoadKeysT(storage.load_keys) + .Load(keys_input, keys, segment_size, oob_default); diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff new file mode 100644 index 0000000..04f96f4 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff @@ -0,0 +1,29 @@ +diff --git a/nvbench/main.cuh b/nvbench/main.cuh +index 0ba82d7..cca5273 100644 +--- a/nvbench/main.cuh ++++ b/nvbench/main.cuh +@@ -54,6 +54,16 @@ + // clang-format on + #endif + ++#ifndef NVBENCH_ENVIRONMENT ++namespace nvbench { ++struct no_environment ++{ ++ no_environment(int, char const *const *) {} ++}; ++} ++#define NVBENCH_ENVIRONMENT nvbench::no_environment ++#endif ++ + #define NVBENCH_MAIN_PARSE(argc, argv) \ + nvbench::option_parser parser; \ + parser.parse(argc, argv) +@@ -77,6 +87,7 @@ + printer.set_total_state_count(total_states); \ + \ + printer.set_completed_state_count(0); \ ++ [[maybe_unused]] auto env_state = NVBENCH_ENVIRONMENT(argc, argv); \ + for (auto &bench_ptr : benchmarks) \ + { \ + bench_ptr->set_printer(printer); \ diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json new file mode 100644 index 0000000..7be8680 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/nvbench_override.json @@ -0,0 +1,19 @@ + +{ + "packages" : { + "nvbench" : { + "patches" : [ + { + "file" : "${current_json_dir}/nvbench_global_setup.diff", + "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]", + "fixed_in" : "" + }, + { + "file" : "nvbench/use_existing_fmt.diff", + "issue" : "Fix add support for using an existing fmt [https://github.com/NVIDIA/nvbench/pull/125]", + "fixed_in" : "" + } + ] + } + } +} diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff new file mode 100644 index 0000000..382f7dc --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff @@ -0,0 +1,29 @@ +diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h +index d0e3f94..76774b0 100644 +--- a/thrust/system/cuda/detail/dispatch.h ++++ b/thrust/system/cuda/detail/dispatch.h +@@ -32,9 +32,8 @@ + status = call arguments; \ + } \ + else { \ +- auto THRUST_PP_CAT2(count, _fixed) = static_cast(count); \ +- status = call arguments; \ +- } ++ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ ++ } + + /** + * Dispatch between 32-bit and 64-bit index based versions of the same algorithm +@@ -52,10 +51,8 @@ + status = call arguments; \ + } \ + else { \ +- auto THRUST_PP_CAT2(count1, _fixed) = static_cast(count1); \ +- auto THRUST_PP_CAT2(count2, _fixed) = static_cast(count2); \ +- status = call arguments; \ +- } ++ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \ ++ } + /** + * Dispatch between 32-bit and 64-bit index based versions of the same algorithm + * implementation. This version allows using different token sequences for callables diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff new file mode 100644 index 0000000..6bf1658 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff @@ -0,0 +1,39 @@ +diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh +index b188c75f..3f36656f 100644 +--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh ++++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh +@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy + + + /// SM60 (GP100) +- struct Policy600 : ChainedPolicy<600, Policy600, Policy500> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100) +diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh +index e0470ccb..6a0c2ed6 100644 +--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh ++++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh +@@ -280,7 +280,7 @@ struct DeviceReducePolicy + }; + + /// SM60 +- struct Policy600 : ChainedPolicy<600, Policy600, Policy350> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) + typedef AgentReducePolicy< +diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh +index c2d04588..ac2d10e0 100644 +--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh ++++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh +@@ -177,7 +177,7 @@ struct DeviceScanPolicy + }; + + /// SM600 +- struct Policy600 : ChainedPolicy<600, Policy600, Policy520> ++ struct Policy600 : ChainedPolicy<600, Policy600, Policy600> + { + typedef AgentScanPolicy< + 128, 15, ///< Threads per block, items per thread diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff new file mode 100644 index 0000000..864c89d --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff @@ -0,0 +1,48 @@ +diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh +index 4769df36..d86d6342 100644 +--- a/dependencies/cub/cub/block/block_merge_sort.cuh ++++ b/dependencies/cub/cub/block/block_merge_sort.cuh +@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared, + KeyT key1 = keys_shared[keys1_beg]; + KeyT key2 = keys_shared[keys2_beg]; + +-#pragma unroll ++#pragma unroll 1 + for (int item = 0; item < ITEMS_PER_THREAD; ++item) + { + bool p = (keys2_beg < keys2_end) && +@@ -383,7 +383,7 @@ public: + // + KeyT max_key = oob_default; + +- #pragma unroll ++ #pragma unroll 1 + for (int item = 1; item < ITEMS_PER_THREAD; ++item) + { + if (ITEMS_PER_THREAD * linear_tid + item < valid_items) +@@ -407,7 +407,7 @@ public: + // each thread has sorted keys + // merge sort keys in shared memory + // +- #pragma unroll ++ #pragma unroll 1 + for (int target_merged_threads_number = 2; + target_merged_threads_number <= NUM_THREADS; + target_merged_threads_number *= 2) +diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh +index 5d486789..b42fb5f0 100644 +--- a/dependencies/cub/cub/thread/thread_sort.cuh ++++ b/dependencies/cub/cub/thread/thread_sort.cuh +@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], + { + constexpr bool KEYS_ONLY = std::is_same::value; + +- #pragma unroll ++ #pragma unroll 1 + for (int i = 0; i < ITEMS_PER_THREAD; ++i) + { +- #pragma unroll ++ #pragma unroll 1 + for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2) + { + if (compare_op(keys[j + 1], keys[j])) diff --git a/cpp/cmake/thirdparty/patches/thrust_override.json b/cpp/cmake/thirdparty/patches/thrust_override.json new file mode 100644 index 0000000..ded2b90 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_override.json @@ -0,0 +1,39 @@ + +{ + "packages" : { + "Thrust" : { + "patches" : [ + { + "file" : "Thrust/install_rules.diff", + "issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]", + "fixed_in" : "2.0.0" + }, + { + "file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff", + "issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]", + "fixed_in" : "2.1" + }, + { + "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff", + "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]", + "fixed_in" : "" + }, + { + "file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff", + "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]", + "fixed_in" : "" + }, + { + "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff", + "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]", + "fixed_in" : "" + }, + { + "file" : "${current_json_dir}/cub_segmented_sort_with_bool_key.diff", + "issue" : "Fix an error in CUB DeviceSegmentedSort when the keys are bool type [https://github.com/NVIDIA/cub/issues/594]", + "fixed_in" : "2.1" + } + ] + } + } +} diff --git a/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff new file mode 100644 index 0000000..6a56af9 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff @@ -0,0 +1,26 @@ +diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h +index f512a36..a5f725d 100644 +--- a/thrust/iterator/transform_input_output_iterator.h ++++ b/thrust/iterator/transform_input_output_iterator.h +@@ -102,6 +102,8 @@ template + /*! \endcond + */ + ++ transform_input_output_iterator() = default; ++ + /*! This constructor takes as argument a \c Iterator an \c InputFunction and an + * \c OutputFunction and copies them to a new \p transform_input_output_iterator + * +diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h +index 66fb46a..4a68cb5 100644 +--- a/thrust/iterator/transform_output_iterator.h ++++ b/thrust/iterator/transform_output_iterator.h +@@ -104,6 +104,8 @@ template + /*! \endcond + */ + ++ transform_output_iterator() = default; ++ + /*! This constructor takes as argument an \c OutputIterator and an \c + * UnaryFunction and copies them to a new \p transform_output_iterator + * diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile new file mode 100644 index 0000000..b072d25 --- /dev/null +++ b/cpp/doxygen/Doxyfile @@ -0,0 +1,2590 @@ +# Doxyfile 1.9.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = libcudf + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = 23.10.00 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all generated output in the proper direction. +# Possible values are: None, LTR, RTL and Context. +# The default value is: None. + +OUTPUT_TEXT_DIRECTION = None + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = NO + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines (in the resulting output). You can put ^^ in the value part of an +# alias to insert a newline as if a physical newline was in the original file. +# When you need a literal { or } or , in the value part of an alias you have to +# escape them by means of a backslash (\), this can lead to conflicts with the +# commands \{ and \} for these it is advised to use the version @{ and @} or use +# a double escape (\\{ and \\}) + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, +# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. + +EXTENSION_MAPPING = cu=C++ \ + cuh=C++ + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See https://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 5. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 5 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which efficively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# declarations. If set to NO, these declarations will be included in the +# documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = DoxygenLayout.xml + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. If +# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# The default value is: NO. + +WARN_NO_PARAMDOC = YES + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = main_page.md \ + regex.md \ + unicode.md \ + developer_guide/BENCHMARKING.md \ + developer_guide/DOCUMENTATION.md \ + developer_guide/DEVELOPER_GUIDE.md \ + developer_guide/TESTING.md \ + ../include \ + ../include/cudf_test/column_wrapper.hpp \ + ../include/cudf_test/column_utilities.hpp \ + ../include/cudf_test/iterator_utilities.hpp \ + ../include/cudf_test/table_utilities.hpp \ + ../include/cudf_test/type_lists.hpp \ + ../include/cudf_test/type_list_utilities.hpp \ + ../libcudf_kafka/include + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), +# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, +# *.ucf, *.qsf and *.ice. + +FILE_PATTERNS = *.cpp \ + *.hpp \ + *.h \ + *.c \ + *.cu \ + *.cuh + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = */nvtx/* \ + */detail/* \ + */cudf_test/* + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = org::apache \ + *_impl \ + *Impl + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = *.md=./modify_fences.sh + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = main_page.md + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# entity all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see https://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = header.html + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 266 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 255 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 52 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: +# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the main .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANSPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. + +FORMULA_MACROFILE = + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# https://www.mathjax.org) which uses client side JavaScript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from https://www.mathjax.org before deployment. +# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /