Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into fea-py-chunker-orc-write-stripe
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Jan 19, 2024
2 parents b92fb95 + 5f02306 commit 1b9221f
Show file tree
Hide file tree
Showing 91 changed files with 508 additions and 534 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ print(tips_df.groupby("size").tip_percentage.mean())
- [libcudf (C++/CUDA) documentation](https://docs.rapids.ai/api/libcudf/stable/)
- [RAPIDS Community](https://rapids.ai/learn-more/#get-involved): Get help, contribute, and collaborate.

See the [RAPIDS install page](https://docs.rapids.ai/install) for
the most up-to-date information and commands for installing cuDF
and other RAPIDS packages.

## Installation

### CUDA/GPU requirements
Expand All @@ -64,6 +68,24 @@ print(tips_df.groupby("size").tip_percentage.mean())
* NVIDIA driver 450.80.02+
* Volta architecture or better (Compute Capability >=7.0)

### Pip

cuDF can be installed via `pip` from the NVIDIA Python Package Index.
Be sure to select the appropriate cuDF package depending
on the major version of CUDA available in your environment:

For CUDA 11.x:

```bash
pip install --extra-index-url=https://pypi.nvidia.com cudf-cu11
```

For CUDA 12.x:

```bash
pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12
```

### Conda

cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects/miniconda/en/latest/) or the full [Anaconda distribution](https://www.anaconda.com/download) from the `rapidsai` channel:
Expand Down
4 changes: 2 additions & 2 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ popd

rapids-logger "Build Python docs"
pushd docs/cudf
make dirhtml O="-j 4"
make text O="-j 4"
make dirhtml
make text
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/"{html,txt}
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/cudf/html"
mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt"
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ fi
if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
sed -i "/ptxcompiler/d" ${pyproject_file}
sed -i "s/ptxcompiler/pynvjitlink/g" ${pyproject_file}
sed -i "/cubinlinker/d" ${pyproject_file}
fi

Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ dependencies:
- pydata-sphinx-theme!=0.14.2
- pytest
- pytest-benchmark
- pytest-cases<3.8.2
- pytest-cases>=3.8.2
- pytest-cov
- pytest-xdist
- python-confluent-kafka>=1.9.0,<1.10.0a0
Expand Down
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ dependencies:
- protobuf>=4.21,<5
- pyarrow==14.0.1.*
- pydata-sphinx-theme!=0.14.2
- pynvjitlink
- pytest
- pytest-benchmark
- pytest-cases<3.8.2
- pytest-cases>=3.8.2
- pytest-cov
- pytest-xdist
- python-confluent-kafka>=1.9.0,<1.10.0a0
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -98,6 +98,7 @@ requirements:
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- nvtx >=0.2.1
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -540,7 +540,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
return cudf::make_strings_column(
num_rows,
std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
std::make_unique<cudf::column>(std::move(chars), rmm::device_buffer{}, 0),
chars.release(),
null_count,
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ auto build_json_string_column(int desired_bytes, int num_rows)
auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
json_benchmark_row_builder jb{
desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
auto children = cudf::strings::detail::make_strings_children(
auto [offsets, chars] = cudf::strings::detail::make_strings_children(
jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
return cudf::make_strings_column(
num_rows, std::move(children.first), std::move(children.second), 0, {});
num_rows, std::move(offsets), std::move(chars->release().data.release()[0]), 0, {});
}

void BM_case(benchmark::State& state, std::string query_arg)
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,7 +109,7 @@ std::unique_ptr<cudf::column> copy_if_else(StringIterLeft lhs_begin,

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/copy_range.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -205,7 +205,7 @@ std::unique_ptr<column> copy_range(SourceValueIterator source_value_begin,

return make_strings_column(target.size(),
std::move(p_offsets_column),
std::move(p_chars_column),
std::move(p_chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
return make_strings_column(output_count,
std::move(out_offsets_column),
std::move(out_chars_column),
std::move(out_chars_column->release().data.release()[0]),
0, // caller sets these
rmm::device_buffer{});
}
Expand Down
14 changes: 5 additions & 9 deletions cpp/include/cudf/strings/detail/merge.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,9 +89,8 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
auto d_offsets = offsets_column->view().template data<int32_t>();

// create the chars column
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// merge the strings
auto d_chars = chars_column->mutable_view().template data<char>();
rmm::device_uvector<char> chars(bytes, stream, mr);
auto d_chars = chars.data();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
Expand All @@ -103,11 +102,8 @@ std::unique_ptr<column> merge(strings_column_view const& lhs,
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask));
return make_strings_column(
strings_count, std::move(offsets_column), chars.release(), null_count, std::move(null_mask));
}

} // namespace detail
Expand Down
9 changes: 4 additions & 5 deletions cpp/include/cudf/strings/detail/strings_column_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
null_count,
std::move(null_mask));
}
Expand Down Expand Up @@ -187,13 +187,12 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
[] __device__(auto offset) { return static_cast<int32_t>(offset); }));

// build chars column
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_view.data<char>());
rmm::device_uvector<char> chars_data(bytes, stream, mr);
thrust::copy(rmm::exec_policy(stream), chars_begin, chars_end, chars_data.begin());

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
chars_data.release(),
null_count,
std::move(null_mask));
}
Expand Down
32 changes: 17 additions & 15 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -757,20 +757,21 @@ class strings_column_wrapper : public detail::column_wrapper {
strings_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
{
size_type num_strings = std::distance(begin, end);
if (num_strings == 0) {
wrapped = cudf::make_empty_column(cudf::type_id::STRING);
return;
}
auto all_valid = thrust::make_constant_iterator(true);
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
auto d_chars = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_chars = cudf::detail::make_device_uvector_async(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
wrapped =
cudf::make_strings_column(num_strings, std::move(d_offsets), std::move(d_chars), 0, {});
cudf::make_strings_column(num_strings, std::move(d_offsets), d_chars.release(), 0, {});
}

/**
Expand Down Expand Up @@ -805,23 +806,24 @@ class strings_column_wrapper : public detail::column_wrapper {
strings_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
: column_wrapper{}
{
size_type num_strings = std::distance(begin, end);
size_type num_strings = std::distance(begin, end);
if (num_strings == 0) {
wrapped = cudf::make_empty_column(cudf::type_id::STRING);
return;
}
auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
auto [null_mask, null_count] = detail::make_null_mask_vector(v, v + num_strings);
auto d_chars = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_chars = cudf::detail::make_device_uvector_async(
chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
auto d_offsets = std::make_unique<cudf::column>(
cudf::detail::make_device_uvector_sync(
cudf::detail::make_device_uvector_async(
offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
rmm::device_buffer{},
0);
auto d_bitmask = cudf::detail::make_device_uvector_sync(
null_mask, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
wrapped = cudf::make_strings_column(
num_strings, std::move(d_offsets), std::move(d_chars), null_count, d_bitmask.release());
num_strings, std::move(d_offsets), d_chars.release(), null_count, d_bitmask.release());
}

/**
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/hash/md5_hash.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -333,9 +333,8 @@ std::unique_ptr<column> md5(table_view const& input,
auto [offsets_column, bytes] =
cudf::detail::make_offsets_child_column(begin, begin + input.num_rows(), stream, mr);

auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
rmm::device_uvector<char> chars(bytes, stream, mr);
auto d_chars = chars.data();

auto const device_input = table_device_view::create(input, stream);

Expand Down Expand Up @@ -366,8 +365,7 @@ std::unique_ptr<column> md5(table_view const& input,
}
});

return make_strings_column(
input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, {});
return make_strings_column(input.num_rows(), std::move(offsets_column), chars.release(), 0, {});
}

} // namespace detail
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/interop/from_arrow.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -290,7 +290,7 @@ std::unique_ptr<column> dispatch_to_cudf_column::operator()<cudf::string_view>(
auto const num_rows = offsets_column->size() - 1;
auto out_col = make_strings_column(num_rows,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
array.null_count(),
std::move(*get_mask_buffer(array, stream, mr)));

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -202,7 +202,7 @@ struct dispatch_from_durations_fn {
//
return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(chars_column->release().data.release()[0]),
durations.null_count(),
std::move(null_mask));
}
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,12 @@ struct column_to_strings_fn {

auto d_column = column_device_view::create(column_v, stream_);
escape_strings_fn fn{*d_column, delimiter.value(stream_)};
auto children = cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);
auto [offsets_column, chars_column] =
cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);

return make_strings_column(column_v.size(),
std::move(children.first),
std::move(children.second),
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
column_v.null_count(),
cudf::detail::copy_bitmask(column_v, stream_, mr_));
}
Expand Down
Loading

0 comments on commit 1b9221f

Please sign in to comment.