Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement maps_column_view abstraction over LIST<STRUCT<K,V>> #10380

Merged
merged 26 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0702b6d
First attempt at restoring maps_column_view.
mythrocks Feb 27, 2022
fe5b88b
Detail headers added, for lists functions.
mythrocks Mar 1, 2022
04a8c9a
More tests for empties, nulls, slices, etc.
mythrocks Mar 1, 2022
ef8e97b
Formatting.
mythrocks Mar 1, 2022
be700ed
Doxygen for maps_column_view.
mythrocks Mar 1, 2022
7110ad0
Update Java bindings for mapLookup.
mythrocks Mar 3, 2022
9c0742c
Updated libcudf/meta.yaml for new detail header files.
mythrocks Mar 3, 2022
2752d9a
Updated formatting.
mythrocks Mar 3, 2022
1713718
Updated JNI bindings, tests, for integral types.
mythrocks Mar 8, 2022
736b16a
Added contains() to maps_column_view.
mythrocks Mar 8, 2022
62a1cb5
Update JNI bindings for mapContains.
mythrocks Mar 8, 2022
ddc1eed
Formatting.
mythrocks Mar 8, 2022
3014496
Updated JNI tests for MapKeyExistence.
mythrocks Mar 8, 2022
3b39271
Updated CUDF test for maps_column_view::contains().
mythrocks Mar 9, 2022
ed0bd0a
Moved maps_column_view to JNI.
mythrocks Mar 9, 2022
598ac17
Formatting, again.
mythrocks Mar 9, 2022
1910009
Updated copyright dates.
mythrocks Mar 9, 2022
cf722ff
Review comments:
mythrocks Mar 10, 2022
b67d5b0
Changed null offset to use numeric_limit::min(), not max().
mythrocks Mar 10, 2022
2febaaf
Added default for device_memory_resource in detail::extract().
mythrocks Mar 11, 2022
7ea5dbb
Return const references from keys(), values().
mythrocks Mar 14, 2022
631f0f8
Merge remote-tracking branch 'origin/branch-22.04' into maps-column-view
mythrocks Mar 14, 2022
d38648d
clang-format for java/ CMakeLists.txt.
mythrocks Mar 14, 2022
6138074
Alphabetical listing of source files.
mythrocks Mar 14, 2022
5c2c6b6
Revert "Alphabetical listing of source files."
mythrocks Mar 14, 2022
f69cf7f
Revert "clang-format for java/ CMakeLists.txt."
mythrocks Mar 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ add_library(
src/lists/lists_column_view.cu
src/lists/segmented_sort.cu
src/lists/sequences.cu
src/maps/maps_column_view.cu
src/merge/merge.cu
src/partitioning/partitioning.cu
src/partitioning/round_robin.cu
Expand Down
55 changes: 55 additions & 0 deletions cpp/include/cudf/lists/detail/contains.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/contains.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
cudf::lists::duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
cudf::lists::duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace lists
} // namespace cudf
47 changes: 47 additions & 0 deletions cpp/include/cudf/lists/detail/extract.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/extract.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::extract_list_element(lists_column_view, size_type,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
size_type const index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
mythrocks marked this conversation as resolved.
Show resolved Hide resolved

/**
* @copydoc cudf::lists::extract_list_element(lists_column_view, column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace lists
} // namespace cudf
109 changes: 109 additions & 0 deletions cpp/include/cudf/maps/maps_column_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {

/**
* @brief Given a column-view of LIST<STRUCT<K,V>>, an instance of this class
* provides an abstraction of a column of maps.
*
* Each list row is treated as a map of key->value, with possibly repeated keys.
* The list may be looked up by a scalar key, or by a column of keys, to
* retrieve the corresponding value.
*/
class maps_column_view {
public:
maps_column_view(lists_column_view const& lists_of_structs,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);

// Rule of 5.
maps_column_view(maps_column_view const& maps_view) = default;
maps_column_view(maps_column_view&& maps_view) = default;
maps_column_view& operator=(maps_column_view const&) = default;
maps_column_view& operator=(maps_column_view&&) = default;
~maps_column_view() = default;

/**
* @brief Returns number of map rows in the column.
*/
size_type size() const { return keys_.size(); }

/**
* @brief Getter for keys as a list column.
*
* Note: Keys are not deduped. Repeated keys are returned in order.
*/
lists_column_view keys() const { return keys_; }

/**
* @brief Getter for values as a list column.
*
* Note: Values for repeated keys are not dropped.
*/
lists_column_view values() const { return values_; }

/**
* @brief Map lookup by a column of keys.
*
* The lookup column must have as many rows as the map column,
* and must match the key-type of the map.
* A column of values is returned, with the same number of rows as the map column.
* If a key is repeated in a map row, the value corresponding to the last matching
* key is returned.
* If a lookup key is null or not found, the corresponding value is null.
*
* @param keys Column of keys to be looked up in each corresponding map row.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return std::unique_ptr<column> Column of values corresponding the value of the lookup key.
*/
std::unique_ptr<column> get_values_for(
column_view const& keys,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;

/**
* @brief Map lookup by a scalar key.
*
* The type of the lookup scalar must match the key-type of the map.
* A column of values is returned, with the same number of rows as the map column.
* If a key is repeated in a map row, the value corresponding to the last matching
* key is returned.
* If the lookup key is null or not found, the corresponding value is null.
*
* @param keys Column of keys to be looked up in each corresponding map row.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return std::unique_ptr<column>
*/
std::unique_ptr<column> get_values_for(
scalar const& key,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;

private:
lists_column_view keys_, values_;
};

} // namespace cudf
43 changes: 21 additions & 22 deletions cpp/src/lists/contains.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/contains.hpp>
#include <cudf/lists/detail/contains.hpp>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/lists/lists_column_device_view.cuh>
#include <cudf/lists/lists_column_view.hpp>
Expand Down Expand Up @@ -251,18 +252,17 @@ std::unique_ptr<column> to_contains(std::unique_ptr<column>&& key_positions,

namespace detail {
/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return search_key.is_valid(stream)
? cudf::type_dispatcher(search_key.type(),
Expand All @@ -282,18 +282,17 @@ std::unique_ptr<column> index_of(
}

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(search_keys.size() == lists.size(),
"Number of search keys must match list column size.");
Expand Down
33 changes: 27 additions & 6 deletions cpp/src/lists/extract.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/sequence.hpp>
#include <cudf/lists/detail/extract.hpp>
#include <cudf/lists/detail/gather.cuh>
#include <cudf/lists/extract.hpp>
#include <cudf/scalar/scalar_factories.hpp>
Expand Down Expand Up @@ -107,10 +108,10 @@ std::unique_ptr<cudf::column> make_index_offsets(size_type num_lists, rmm::cuda_
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
template <typename index_t>
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
index_t const& index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<column> extract_list_element_impl(lists_column_view lists_column,
index_t const& index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const num_lists = lists_column.size();
if (num_lists == 0) { return empty_like(lists_column.child()); }
Expand All @@ -135,6 +136,26 @@ std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
return std::move(extracted_lists->release().children[lists_column_view::child_column_index]);
}

/**
* @copydoc cudf::lists::extract_list_element
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
size_type const index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element_impl(lists_column, index, stream, mr);
}

std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element_impl(lists_column, indices, stream, mr);
}

} // namespace detail

/**
Expand All @@ -146,7 +167,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
size_type index,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element(lists_column, index, rmm::cuda_stream_default, mr);
return detail::extract_list_element_impl(lists_column, index, rmm::cuda_stream_default, mr);
}

/**
Expand All @@ -160,7 +181,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
{
CUDF_EXPECTS(indices.size() == lists_column.size(),
"Index column must have as many elements as lists column.");
return detail::extract_list_element(lists_column, indices, rmm::cuda_stream_default, mr);
return detail::extract_list_element_impl(lists_column, indices, rmm::cuda_stream_default, mr);
}

} // namespace lists
Expand Down
Loading