Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Define and implement new search APIs #3229

Merged
merged 23 commits into from
Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9b517ab
search code with broken tester - waiting for column_wrapper
ChuckHastings Oct 17, 2019
26dd539
Merge remote-tracking branch 'jake/fea-ext-new-column-wrapper' into f…
ChuckHastings Oct 17, 2019
661dda4
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Oct 18, 2019
47b8624
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Oct 25, 2019
a5f9cbc
allow access to column 0 in the table device view
ChuckHastings Oct 28, 2019
37abb6e
move bit_is_set to be host callable for use in testing
ChuckHastings Oct 28, 2019
9bffd6b
implement all of the upper_bound and lower_bound tests for search
ChuckHastings Oct 28, 2019
fc83cf9
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Oct 28, 2019
ecf0610
documentation changes from PR
ChuckHastings Oct 28, 2019
34a19c1
Merge remote-tracking branch 'devavret/fea-cudf-scalar' into fea_move…
ChuckHastings Oct 28, 2019
6f30761
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Nov 1, 2019
94c1044
Merge remote-tracking branch 'devavret/fea-cudf-scalar' into fea_move…
ChuckHastings Nov 1, 2019
c5fe14c
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Nov 4, 2019
e14d573
use new fill mechanism to populate column from scalar
ChuckHastings Nov 5, 2019
077d113
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Nov 7, 2019
67d3289
Update search to use latest from branch-0.11
ChuckHastings Nov 7, 2019
ab5d958
update changelog
ChuckHastings Nov 7, 2019
9912a6a
add string support and string tests
ChuckHastings Nov 13, 2019
fddd2ff
Merge branch 'branch-0.11' into fea_move_search_part2
ChuckHastings Nov 13, 2019
69d426d
remove some dead and useless code
ChuckHastings Nov 14, 2019
4e8ac4f
clean up some code in search_test, revert column_utilities to elimina…
ChuckHastings Nov 14, 2019
105d33b
Apply suggestions from code review
ChuckHastings Nov 18, 2019
d06b092
make last changes compile
ChuckHastings Nov 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- PR #3278 Add `to_host` utility to copy `column_view` to host
- PR #3087 Add new cudf::experimental bool8 wrapper
- PR #3219 Construct column from column_view
- PR #3229 Define and implement new search APIs
- PR #3308 java add API for memory usage callbacks
- PR #2691 Row-wise reduction and scan operations via CuPy
- PR #3291 Add normalize_nans_and_zeros
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ add_library(cudf
src/filling/legacy/repeat.cu
src/filling/legacy/tile.cu
src/search/legacy/search.cu
src/search/search.cu
src/column/column.cu
src/column/column_view.cpp
src/column/column_device_view.cu
Expand Down
7 changes: 6 additions & 1 deletion cpp/include/cudf/scalar/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,18 @@ class fixed_width_scalar : public scalar {
*
* @param stream The CUDA stream to do the operation in
*/
T value(cudaStream_t stream = 0) { return _data.value(stream); }
T value(cudaStream_t stream = 0) const { return _data.value(stream); }

/**
* @brief Returns a raw pointer to the value in device memory
*/
T* data() { return _data.data(); }

/**
* @brief Returns a raw pointer to the value in device memory
*/
T const* data() const { return _data.data(); }

protected:
rmm::device_scalar<T> _data{}; ///< device memory containing the value

Expand Down
135 changes: 135 additions & 0 deletions cpp/include/cudf/search.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/types.hpp>
#include <cudf/column/column.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/table/table.hpp>

#include <vector>

namespace cudf {
namespace experimental {

/**---------------------------------------------------------------------------*
* @brief Find smallest indices in a sorted table where values should be
* inserted to maintain order
*
* For each row v in @p values, find the first index in @p t where
* inserting the row will maintain the sort order of @p t
*
* Example:
*
* Single column:
* idx 0 1 2 3 4
* column = { 10, 20, 20, 30, 50 }
* values = { 20 }
* result = { 1 }
*
* Multi Column:
* idx 0 1 2 3 4
* t = {{ 10, 20, 20, 20, 20 },
* { 5.0, .5, .5, .7, .7 },
* { 90, 77, 78, 61, 61 }}
* values = {{ 20 },
* { .7 },
* { 61 }}
* result = { 3 }
*
* @param t Table to search
* @param values Find insert locations for these values
* @param column_order Vector of column sort order
* @param null_precedence Vector of null_precedence enums
* values
* @param mr Device memory resource to use for device memory allocation
* @return std::unique_ptr<column> A non-nullable column of cudf::size_type elements
* containing the insertion points.
*---------------------------------------------------------------------------**/
std::unique_ptr<column> lower_bound(table_view const& t,
table_view const& values,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/**---------------------------------------------------------------------------*
* @brief Find largest indices in a sorted table where values should be
* inserted to maintain order
*
* For each row v in @p values, find the last index in @p t where
* inserting the row will maintain the sort order of @p t
*
* Example:
*
* Single Column:
* idx 0 1 2 3 4
* column = { 10, 20, 20, 30, 50 }
* values = { 20 }
* result = { 3 }
*
* Multi Column:
* idx 0 1 2 3 4
* t = {{ 10, 20, 20, 20, 20 },
* { 5.0, .5, .5, .7, .7 },
* { 90, 77, 78, 61, 61 }}
* values = {{ 20 },
* { .7 },
* { 61 }}
* result = { 5 * *
* @param column Table to search
* @param values Find insert locations for these values
* @param column_order Vector of column sort order
* @param null_precedence Vector of null_precedence enums
* values
* @param mr Device memory resource to use for device memory allocation
* @return std::unique_ptr<column> A non-nullable column of cudf::size_type elements
* containing the insertion points.
*---------------------------------------------------------------------------**/
std::unique_ptr<column> upper_bound(table_view const& t,
table_view const& values,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/**---------------------------------------------------------------------------*
* @brief Find if the `value` is present in the `col`
*
* @throws cudf::logic_error
* If `col.type() != values.type()`
*
* @example:
*
* Single Column:
* idx 0 1 2 3 4
* col = { 10, 20, 20, 30, 50 }
* Scalar:
* value = { 20 }
* result = true
*
* @param col A column object
* @param value A scalar value to search for in `col`
* @param mr Device memory resource to use for device memory allocation
*
* @return bool If `value` is found in `column` true, else false.
*---------------------------------------------------------------------------**/
bool contains(column_view const& col, scalar const& value,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

} // namespace experimental
} // namespace cudf


6 changes: 3 additions & 3 deletions cpp/include/cudf/table/table_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ class table_device_view_base {

__device__ ColumnDeviceView const& column(size_type column_index) const
noexcept {
assert(column_index > 0);
assert(column_index >= 0);
assert(column_index < _num_columns);
return _columns[column_index];
}

__device__ ColumnDeviceView& column(size_type column_index) noexcept {
assert(column_index > 0);
assert(column_index >= 0);
assert(column_index < _num_columns);
return _columns[column_index];
}
Expand Down Expand Up @@ -104,4 +104,4 @@ class mutable_table_device_view
mutable_table_view>(source_view,
stream) {}
};
} // namespace cudf
} // namespace cudf
Loading