Skip to content

Commit

Permalink
Merge branch 'branch-22.12' into udf-string-class
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Nov 1, 2022
2 parents cadcf79 + d236779 commit b3a43b8
Show file tree
Hide file tree
Showing 41 changed files with 731 additions and 397 deletions.
29 changes: 9 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,32 +65,21 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids

cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:

For `cudf version == 22.06` :
```bash
# for CUDA 11.0
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=22.06 python=3.9 cudatoolkit=11.0

# or, for CUDA 11.2
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=22.06 python=3.9 cudatoolkit=11.2

# for CUDA 11.5
conda install -c rapidsai -c conda-forge -c nvidia \
cudf=22.10 python=3.9 cudatoolkit=11.5
# for CUDA 11.2
conda install -c rapidsai -c conda-forge -c nvidia \
cudf=22.10 python=3.9 cudatoolkit=11.2
```

For the nightly version of `cudf` :
```bash
# for CUDA 11.0
conda install -c rapidsai-nightly -c nvidia -c numba -c conda-forge \
cudf python=3.9 cudatoolkit=11.0

# or, for CUDA 11.2
conda install -c rapidsai-nightly -c nvidia -c numba -c conda-forge \
cudf python=3.9 cudatoolkit=11.2
```
We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
of our latest development branch.

Note: cuDF is supported only on Linux, and with Python versions 3.8 and later.

See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info.
See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info.

## Build/Install from Source
See build [instructions](CONTRIBUTING.md#setting-up-your-build-environment).
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
name: cudf_dev
channels:
- rapidsai
- nvidia
- rapidsai-nightly
- dask/label/dev
- conda-forge
- nvidia
dependencies:
- c-compiler
- cxx-compiler
Expand Down Expand Up @@ -38,7 +38,7 @@ dependencies:
- ipython
- pandoc<=2.0.0
- cudatoolkit=11.5
- cuda-python>=11.5,<11.7.1
- cuda-python>=11.7.1,<12.0
- pip
- doxygen=1.8.20
- typing_extensions
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ requirements:
- packaging
- cachetools
- cubinlinker # [linux64] # CUDA enhanced compatibility.
- cuda-python >=11.5,<11.7.1
- cuda-python >=11.7.1,<12.0
test: # [linux64]
requires: # [linux64]
- cudatoolkit {{ cuda_version }}.* # [linux64]
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/strings_udf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ requirements:
- numba >=0.54
- libcudf ={{ version }}
- cudf ={{ version }}
- cudatoolkit ={{ cuda_version }}
- cudatoolkit {{ cuda_version }}.*
run:
- python
- typing_extensions
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cudf/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ struct column_scatterer_impl<dictionary32> {
// first combine keys so both dictionaries have the same set
auto target_matched = dictionary::detail::add_keys(target, source.keys(), stream, mr);
auto const target_view = dictionary_column_view(target_matched->view());
auto source_matched = dictionary::detail::set_keys(source, target_view.keys(), stream);
auto source_matched = dictionary::detail::set_keys(
source, target_view.keys(), stream, rmm::mr::get_current_device_resource());
auto const source_view = dictionary_column_view(source_matched->view());

// now build the new indices by doing a scatter on just the matched indices
Expand Down
7 changes: 3 additions & 4 deletions cpp/include/cudf/dictionary/detail/concatenate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ namespace detail {
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with concatenated results.
*/
std::unique_ptr<column> concatenate(
host_span<column_view const> columns,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> concatenate(host_span<column_view const> columns,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace dictionary
Expand Down
16 changes: 7 additions & 9 deletions cpp/include/cudf/dictionary/detail/encode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,10 @@ namespace detail {
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Returns a dictionary column.
*/
std::unique_ptr<column> encode(
column_view const& column,
data_type indices_type = data_type{type_id::UINT32},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> encode(column_view const& column,
data_type indices_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Create a column by gathering the keys from the provided
Expand All @@ -72,10 +71,9 @@ std::unique_ptr<column> encode(
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with type matching the dictionary_column's keys.
*/
std::unique_ptr<column> decode(
dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> decode(dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Return minimal integer type for the given number of elements.
Expand Down
18 changes: 8 additions & 10 deletions cpp/include/cudf/dictionary/detail/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ namespace detail {
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column with null rows replaced.
*/
std::unique_ptr<column> replace_nulls(
dictionary_column_view const& input,
dictionary_column_view const& replacement,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
dictionary_column_view const& replacement,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Create a new dictionary column by replacing nulls with a
Expand All @@ -57,11 +56,10 @@ std::unique_ptr<column> replace_nulls(
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column with null rows replaced.
*/
std::unique_ptr<column> replace_nulls(
dictionary_column_view const& input,
scalar const& replacement,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
scalar const& replacement,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace dictionary
Expand Down
18 changes: 8 additions & 10 deletions cpp/include/cudf/dictionary/detail/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,10 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<scalar> get_index(
dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<scalar> get_index(dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Get the index for a key if it were added to the given dictionary.
Expand All @@ -56,11 +55,10 @@ std::unique_ptr<scalar> get_index(
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Numeric scalar index value of the key within the dictionary
*/
std::unique_ptr<scalar> get_insert_index(
dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<scalar> get_insert_index(dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace dictionary
Expand Down
38 changes: 17 additions & 21 deletions cpp/include/cudf/dictionary/detail/update_keys.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,46 +32,42 @@ namespace detail {
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> add_keys(
dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::dictionary::remove_keys(dictionary_column_view const&,column_view
* const&,mm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> remove_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::dictionary::remove_unused_keys(dictionary_column_view
* const&,mm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> remove_unused_keys(
dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc cudf::dictionary::set_keys(dictionary_column_view
* const&,mm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> set_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc
Expand All @@ -82,7 +78,7 @@ std::unique_ptr<column> set_keys(
std::vector<std::unique_ptr<column>> match_dictionaries(
cudf::host_span<dictionary_column_view const> input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
rmm::mr::device_memory_resource* mr);

/**
* @brief Create new dictionaries that have keys merged from dictionary columns
Expand All @@ -106,7 +102,7 @@ std::vector<std::unique_ptr<column>> match_dictionaries(
std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_dictionaries(
std::vector<table_view> tables,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace dictionary
Expand Down
38 changes: 16 additions & 22 deletions cpp/include/nvtext/detail/tokenize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,10 @@ namespace detail {
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings columns of tokens.
*/
std::unique_ptr<cudf::column> tokenize(
cudf::strings_column_view const& strings,
cudf::string_scalar const& delimiter = cudf::string_scalar{""},
// Move before delimiter?
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
cudf::string_scalar const& delimiter,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view
Expand All @@ -52,11 +50,10 @@ std::unique_ptr<cudf::column> tokenize(
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings columns of tokens.
*/
std::unique_ptr<cudf::column> tokenize(
cudf::strings_column_view const& strings,
cudf::strings_column_view const& delimiters,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
cudf::strings_column_view const& delimiters,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc nvtext::count_tokens(strings_column_view const&, string_scalar
Expand All @@ -69,12 +66,10 @@ std::unique_ptr<cudf::column> tokenize(
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New INT32 column of token counts.
*/
std::unique_ptr<cudf::column> count_tokens(
cudf::strings_column_view const& strings,
cudf::string_scalar const& delimiter = cudf::string_scalar{""},
// Move before delimiter?
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
cudf::string_scalar const& delimiter,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view
Expand All @@ -86,11 +81,10 @@ std::unique_ptr<cudf::column> count_tokens(
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New INT32 column of token counts.
*/
std::unique_ptr<cudf::column> count_tokens(
cudf::strings_column_view const& strings,
cudf::strings_column_view const& delimiters,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
cudf::strings_column_view const& delimiters,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace nvtext
3 changes: 2 additions & 1 deletion cpp/src/copying/copy_range.cu
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ std::unique_ptr<cudf::column> out_of_place_copy_range_dispatch::operator()<cudf:
auto target_matched =
cudf::dictionary::detail::add_keys(dict_target, dict_source.keys(), stream, mr);
auto const target_view = cudf::dictionary_column_view(target_matched->view());
auto source_matched = cudf::dictionary::detail::set_keys(dict_source, target_view.keys(), stream);
auto source_matched = cudf::dictionary::detail::set_keys(
dict_source, target_view.keys(), stream, rmm::mr::get_current_device_resource());
auto const source_view = cudf::dictionary_column_view(source_matched->view());

// build the new indices by calling in_place_copy_range on just the indices
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,9 @@ struct column_scalar_scatterer_impl<dictionary32, MapIterator> {
stream,
mr);
auto dict_view = dictionary_column_view(dict_target->view());
auto scalar_index = dictionary::detail::get_index(dict_view, source.get(), stream);
auto scalar_iter = thrust::make_permutation_iterator(
auto scalar_index = dictionary::detail::get_index(
dict_view, source.get(), stream, rmm::mr::get_current_device_resource());
auto scalar_iter = thrust::make_permutation_iterator(
indexalator_factory::make_input_iterator(*scalar_index), thrust::make_constant_iterator(0));
auto new_indices = std::make_unique<column>(dict_view.get_indices_annotated(), stream, mr);
auto target_iter = indexalator_factory::make_output_iterator(new_indices->mutable_view());
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/dictionary/add_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,10 @@ namespace detail {
* d2 is now {[a, b, c, d, e, f], [5, 0, 3, 1, 2, 2, 2, 5, 0]}
* ```
*/
std::unique_ptr<column> add_keys(
dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls");
auto old_keys = dictionary_column.keys(); // [a,b,c,d,f]
Expand Down
Loading

0 comments on commit b3a43b8

Please sign in to comment.