Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into verify-copyright
Browse files Browse the repository at this point in the history
  • Loading branch information
KyleFromNVIDIA committed Mar 4, 2024
2 parents c640477 + 903dcac commit bf5950c
Show file tree
Hide file tree
Showing 90 changed files with 1,402 additions and 671 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects

```bash
conda install -c rapidsai -c conda-forge -c nvidia \
cudf=24.04 python=3.10 cuda-version=11.8
cudf=24.04 python=3.11 cuda-version=12.2
```

We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
Expand Down
11 changes: 5 additions & 6 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
- pytorch
- conda-forge
- nvidia
dependencies:
Expand Down Expand Up @@ -59,7 +58,7 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy>=1.21
- numpy>=1.23
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==3.0.6
Expand All @@ -79,8 +78,8 @@ dependencies:
- pytest-xdist
- pytest<8
- python-confluent-kafka>=1.9.0,<1.10.0a0
- python>=3.9,<3.11
- pytorch<1.12.0
- python>=3.9,<3.12
- pytorch>=2.1.0
- rapids-dask-dependency==24.4.*
- rich
- rmm==24.4.*
Expand All @@ -96,8 +95,8 @@ dependencies:
- sphinxcontrib-websupport
- streamz
- sysroot_linux-64==2.17
- tokenizers==0.13.1
- transformers==4.24.0
- tokenizers==0.15.2
- transformers==4.38.1
- typing_extensions>=4.0.0
- zlib>=1.2.13
- pip:
Expand Down
11 changes: 5 additions & 6 deletions conda/environments/all_cuda-122_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ channels:
- rapidsai
- rapidsai-nightly
- dask/label/dev
- pytorch
- conda-forge
- nvidia
dependencies:
Expand Down Expand Up @@ -58,7 +57,7 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy>=1.21
- numpy>=1.23
- numpydoc
- nvcomp==3.0.6
- nvtx>=0.2.1
Expand All @@ -77,8 +76,8 @@ dependencies:
- pytest-xdist
- pytest<8
- python-confluent-kafka>=1.9.0,<1.10.0a0
- python>=3.9,<3.11
- pytorch<1.12.0
- python>=3.9,<3.12
- pytorch>=2.1.0
- rapids-dask-dependency==24.4.*
- rich
- rmm==24.4.*
Expand All @@ -94,8 +93,8 @@ dependencies:
- sphinxcontrib-websupport
- streamz
- sysroot_linux-64==2.17
- tokenizers==0.13.1
- transformers==4.24.0
- tokenizers==0.15.2
- transformers==4.38.1
- typing_extensions>=4.0.0
- zlib>=1.2.13
- pip:
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ requirements:
- scikit-build-core >=0.7.0
- setuptools
- dlpack >=0.5,<0.6.0a0
- numpy 1.23
- pyarrow ==14.0.2.*
- libcudf ={{ version }}
- rmm ={{ minor_version }}
Expand All @@ -83,7 +84,7 @@ requirements:
- pandas >=2.0,<2.2.2dev0
- cupy >=12.0.0
- numba >=0.57
- numpy >=1.21
- {{ pin_compatible('numpy', max_pin='x') }}
- {{ pin_compatible('pyarrow', max_pin='x') }}
- libcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
7 changes: 6 additions & 1 deletion cpp/benchmarks/groupby/group_max.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,7 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>

#include <cudf/groupby.hpp>

Expand Down Expand Up @@ -50,9 +51,13 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
requests[0].values = vals->view();
requests[0].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());

auto const mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });

state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

NVBENCH_BENCH_TYPES(bench_groupby_max,
Expand Down
9 changes: 7 additions & 2 deletions cpp/benchmarks/groupby/group_struct_keys.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,7 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>

#include <cudf_test/column_wrapper.hpp>

Expand Down Expand Up @@ -80,11 +81,15 @@ void bench_groupby_struct_keys(nvbench::state& state)
requests[0].aggregations.push_back(cudf::make_min_aggregation<cudf::groupby_aggregation>());

// Set up nvbench default stream
auto stream = cudf::get_default_stream();
auto const mem_stats_logger = cudf::memory_stats_logger();
auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });

state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

NVBENCH_BENCH(bench_groupby_struct_keys)
Expand Down
3 changes: 1 addition & 2 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ auto build_json_string_column(int desired_bytes, int num_rows)
desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
auto [offsets, chars] = cudf::strings::detail::make_strings_children(
jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
return cudf::make_strings_column(
num_rows, std::move(offsets), std::move(chars->release().data.release()[0]), 0, {});
return cudf::make_strings_column(num_rows, std::move(offsets), chars.release(), 0, {});
}

void BM_case(benchmark::State& state, std::string query_arg)
Expand Down
5 changes: 5 additions & 0 deletions cpp/include/cudf/detail/cuco_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,16 @@

#pragma once

#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/polymorphic_allocator.hpp>

namespace cudf::detail {

/// Sentinel value for `cudf::size_type`
static cudf::size_type constexpr CUDF_SIZE_TYPE_SENTINEL = -1;

/// Default load factor for cuco data structures
static double constexpr CUCO_DESIRED_LOAD_FACTOR = 0.5;

Expand Down
12 changes: 11 additions & 1 deletion cpp/include/cudf/detail/transform.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -100,5 +100,15 @@ std::unique_ptr<column> row_bit_count(table_view const& t,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::segmented_row_bit_count
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> segmented_row_bit_count(table_view const& t,
size_type segment_length,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace cudf
11 changes: 5 additions & 6 deletions cpp/include/cudf/strings/detail/strings_children.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace strings {
namespace detail {

/**
* @brief Creates child offsets and chars columns by applying the template function that
* @brief Creates child offsets and chars data by applying the template function that
* can be used for computing the output size of each string as well as create the output
*
* @throws std::overflow_error if the output strings column exceeds the column size limit
Expand All @@ -49,7 +49,7 @@ namespace detail {
* @param strings_count Number of strings.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned columns' device memory.
* @return offsets child column and chars child column for a strings column
* @return Offsets child column and chars data for a strings column
*/
template <typename SizeAndExecuteFunction>
auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
Expand Down Expand Up @@ -84,18 +84,17 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
std::overflow_error);

// Now build the chars column
std::unique_ptr<column> chars_column =
create_chars_child_column(static_cast<size_type>(bytes), stream, mr);
rmm::device_uvector<char> chars(bytes, stream, mr);

// Execute the function fn again to fill the chars column.
// Note that if the output chars column has zero size, the function fn should not be called to
// avoid accidentally overwriting the offsets.
if (bytes > 0) {
size_and_exec_fn.d_chars = chars_column->mutable_view().template data<char>();
size_and_exec_fn.d_chars = chars.data();
for_each_fn(size_and_exec_fn);
}

return std::pair(std::move(offsets_column), std::move(chars_column));
return std::pair(std::move(offsets_column), std::move(chars));
}

/**
Expand Down
13 changes: 6 additions & 7 deletions cpp/include/cudf/table/row_operators.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,19 +20,16 @@
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/hashing/detail/hashing.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <cuda/std/limits>
#include <thrust/equal.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/swap.h>
#include <thrust/transform_reduce.h>

#include <limits>

namespace cudf {

/**
Expand Down Expand Up @@ -470,7 +467,9 @@ class element_hasher {
template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
__device__ hash_value_type operator()(column_device_view col, size_type row_index) const
{
if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
if (has_nulls && col.is_null(row_index)) {
return cuda::std::numeric_limits<hash_value_type>::max();
}
return hash_function<T>{}(col.element<T>(row_index));
}

Expand Down Expand Up @@ -554,7 +553,7 @@ class element_hasher_with_seed {

private:
uint32_t _seed{DEFAULT_HASH_SEED};
hash_value_type _null_hash{std::numeric_limits<hash_value_type>::max()};
hash_value_type _null_hash{cuda::std::numeric_limits<hash_value_type>::max()};
Nullate _has_nulls;
};

Expand Down
25 changes: 24 additions & 1 deletion cpp/include/cudf/transform.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -224,5 +224,28 @@ std::unique_ptr<column> row_bit_count(
table_view const& t,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for
* each segment of rows.
*
* This is similar to counting bit size per row for the input table in `cudf::row_bit_count`,
* except that row sizes are accumulated by segments.
*
* Currently, only fixed-length segments are supported. In case the input table has number of rows
* not divisible by `segment_length`, its last segment is considered as shorter than the others.
*
* @throw std::invalid_argument if the input `segment_length` is non-positive or larger than the
* number of rows in the input table.
*
* @param t The table view to perform the computation on
* @param segment_length The number of rows in each segment for which the total size is computed
* @param mr Device memory resource used to allocate the returned columns' device memory
* @return A 32-bit integer column containing the bit counts for each segment of rows
*/
std::unique_ptr<column> segmented_row_bit_count(
table_view const& t,
size_type segment_length,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
43 changes: 3 additions & 40 deletions cpp/include/cudf_test/column_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,7 @@ std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view
* `column_view`'s data, and second is the column's bitmask.
*/
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
{
using namespace numeric;
using Rep = typename T::rep;

auto host_rep_types = thrust::host_vector<Rep>(c.size());

CUDF_CUDA_TRY(
cudaMemcpy(host_rep_types.data(), c.begin<Rep>(), c.size() * sizeof(Rep), cudaMemcpyDefault));

auto to_fp = [&](Rep val) { return T{scaled_integer<Rep>{val, scale_type{c.type().scale()}}}; };
auto begin = thrust::make_transform_iterator(std::cbegin(host_rep_types), to_fp);
auto const host_fixed_points = thrust::host_vector<T>(begin, begin + c.size());

return {host_fixed_points, bitmask_to_host(c)};
}
//! @endcond
std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c);

/**
* @brief Copies the data and bitmask of a `column_view` of strings
Expand All @@ -223,29 +207,8 @@ std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view
* and second is the column's bitmask.
*/
template <>
inline std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to_host(column_view c)
{
thrust::host_vector<std::string> host_data(c.size());
auto stream = cudf::get_default_stream();
if (c.size() > c.null_count()) {
auto const scv = strings_column_view(c);
auto const h_chars = cudf::detail::make_std_vector_sync<char>(
cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
auto const h_offsets = cudf::detail::make_std_vector_sync(
cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
scv.size() + 1),
stream);

// build std::string vector from chars and offsets
std::transform(
std::begin(h_offsets),
std::end(h_offsets) - 1,
std::begin(h_offsets) + 1,
host_data.begin(),
[&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); });
}
return {std::move(host_data), bitmask_to_host(c)};
}
std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to_host(column_view c);
//! @endcond

} // namespace cudf::test

Expand Down
Loading

0 comments on commit bf5950c

Please sign in to comment.