Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.04' into 10049
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Mar 15, 2022
2 parents 08c64a5 + 4596244 commit f671c6d
Show file tree
Hide file tree
Showing 15 changed files with 906 additions and 205 deletions.
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ test:
- test -f $PREFIX/include/cudf/labeling/label_bins.hpp
- test -f $PREFIX/include/cudf/lists/detail/combine.hpp
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/contains.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/detail/extract.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp
- test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp
- test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
Expand Down
78 changes: 78 additions & 0 deletions cpp/include/cudf/lists/detail/contains.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/contains.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
cudf::lists::duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
cudf::lists::duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::scalar const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> contains(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> contains(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
} // namespace detail
} // namespace lists
} // namespace cudf
49 changes: 49 additions & 0 deletions cpp/include/cudf/lists/detail/extract.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/extract.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::extract_list_element(lists_column_view, size_type,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(
lists_column_view lists_column,
size_type const index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::extract_list_element(lists_column_view, column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(
lists_column_view lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace lists
} // namespace cudf
61 changes: 30 additions & 31 deletions cpp/src/lists/contains.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/contains.hpp>
#include <cudf/lists/detail/contains.hpp>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/lists/lists_column_device_view.cuh>
#include <cudf/lists/lists_column_view.hpp>
Expand Down Expand Up @@ -251,18 +252,17 @@ std::unique_ptr<column> to_contains(std::unique_ptr<column>&& key_positions,

namespace detail {
/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return search_key.is_valid(stream)
? cudf::type_dispatcher(search_key.type(),
Expand All @@ -282,18 +282,17 @@ std::unique_ptr<column> index_of(
}

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(search_keys.size() == lists.size(),
"Number of search keys must match list column size.");
Expand All @@ -316,10 +315,10 @@ std::unique_ptr<column> index_of(
}

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::scalar const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::contains(cudf::lists_column_view const&,
* cudf::scalar const&,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
Expand All @@ -331,10 +330,10 @@ std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
}

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::contains(cudf::lists_column_view const&,
* cudf::column_view const&,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
Expand Down
35 changes: 28 additions & 7 deletions cpp/src/lists/extract.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/sequence.hpp>
#include <cudf/lists/detail/extract.hpp>
#include <cudf/lists/detail/gather.cuh>
#include <cudf/lists/extract.hpp>
#include <cudf/scalar/scalar_factories.hpp>
Expand Down Expand Up @@ -107,10 +108,10 @@ std::unique_ptr<cudf::column> make_index_offsets(size_type num_lists, rmm::cuda_
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
template <typename index_t>
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
index_t const& index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<column> extract_list_element_impl(lists_column_view lists_column,
index_t const& index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const num_lists = lists_column.size();
if (num_lists == 0) { return empty_like(lists_column.child()); }
Expand All @@ -135,6 +136,26 @@ std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
return std::move(extracted_lists->release().children[lists_column_view::child_column_index]);
}

/**
* @copydoc cudf::lists::extract_list_element
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
size_type const index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element_impl(lists_column, index, stream, mr);
}

std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element_impl(lists_column, indices, stream, mr);
}

} // namespace detail

/**
Expand All @@ -146,7 +167,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
size_type index,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element(lists_column, index, rmm::cuda_stream_default, mr);
return detail::extract_list_element_impl(lists_column, index, rmm::cuda_stream_default, mr);
}

/**
Expand All @@ -160,7 +181,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
{
CUDF_EXPECTS(indices.size() == lists_column.size(),
"Index column must have as many elements as lists column.");
return detail::extract_list_element(lists_column, indices, rmm::cuda_stream_default, mr);
return detail::extract_list_element_impl(lists_column, indices, rmm::cuda_stream_default, mr);
}

} // namespace lists
Expand Down
11 changes: 11 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,17 @@ public static ColumnVector emptyStructs(HostColumnVector.DataType dataType, long
}
}

/**
* Create a new vector from the given values.
*/
public static ColumnVector fromBooleans(boolean... values) {
byte[] bytes = new byte[values.length];
for (int i = 0; i < values.length; i++) {
bytes[i] = values[i] ? (byte) 1 : (byte) 0;
}
return build(DType.BOOL8, values.length, (b) -> b.appendArray(bytes));
}

/**
* Create a new vector from the given values.
*/
Expand Down
27 changes: 16 additions & 11 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -3244,17 +3244,23 @@ public final ColumnVector urlEncode() throws CudfException {
return new ColumnVector(urlEncode(getNativeView()));
}

/** For a column of type List<Struct<String, String>> and a passed in String key, return a string column
* for all the values in the struct that match the key, null otherwise.
* @param key the String scalar to lookup in the column
* @return a string column of values or nulls based on the lookup result
private static void assertIsSupportedMapKeyType(DType keyType) {
boolean isSupportedKeyType =
!keyType.equals(DType.EMPTY) && !keyType.equals(DType.LIST) && !keyType.equals(DType.STRUCT);
assert isSupportedKeyType : "Map lookup by STRUCT and LIST keys is not supported.";
}

/**
* Given a column of type List<Struct<X, Y>> and a key of type X, return a column of type Y,
* where each row in the output column is the Y value corresponding to the X key.
* If the key is not found, the corresponding output value is null.
* @param key the scalar key to lookup in the column
* @return a column of values or nulls based on the lookup result
*/
public final ColumnVector getMapValue(Scalar key) {

assert type.equals(DType.LIST) : "column type must be a LIST";
assert key != null : "target string may not be null";
assert key.getType().equals(DType.STRING) : "target string must be a string scalar";

assert key != null : "Lookup key may not be null";
assertIsSupportedMapKeyType(key.getType());
return new ColumnVector(mapLookup(getNativeView(), key.getScalarHandle()));
}

Expand All @@ -3266,9 +3272,8 @@ public final ColumnVector getMapValue(Scalar key) {
*/
public final ColumnVector getMapKeyExistence(Scalar key) {
assert type.equals(DType.LIST) : "column type must be a LIST";
assert key != null : "target string may not be null";
assert key.getType().equals(DType.STRING) : "target must be a string scalar";

assert key != null : "Lookup key may not be null";
assertIsSupportedMapKeyType(key.getType());
return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle()));
}

Expand Down
4 changes: 2 additions & 2 deletions java/src/main/java/ai/rapids/cudf/GroupByAggregation.java
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ public static GroupByAggregation collectList(NullPolicy nullPolicy) {
}

/**
* Collect the values into a set. All null values will be excluded, and all nan values are regarded as
* Collect the values into a set. All null values will be excluded, and all NaN values are regarded as
* unique instances.
*/
public static GroupByAggregation collectSet() {
Expand All @@ -270,7 +270,7 @@ public static GroupByAggregation mergeLists() {
}

/**
* Merge the partial sets produced by multiple CollectSetAggregations. Each null/nan value will be regarded as
* Merge the partial sets produced by multiple CollectSetAggregations. Each null/NaN value will be regarded as
* a unique instance.
*/
public static GroupByAggregation mergeSets() {
Expand Down
Loading

0 comments on commit f671c6d

Please sign in to comment.