Skip to content

Commit

Permalink
Merge pull request #10 from rapidsai/branch-0.10
Browse files Browse the repository at this point in the history
New set of changes
  • Loading branch information
rgsl888prabhu authored Sep 5, 2019
2 parents 1416252 + 0579fda commit 504eca6
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- PR #2653 Add Java bindings for rolling window operations
- PR #2674 Add __contains__ for Index/Series/Column
- PR #2722 Add Java bindings for NVTX ranges
- PR #2724 Add libcudf support for __contains__

## Improvements

Expand Down Expand Up @@ -47,6 +48,7 @@
- PR #2698 Return RangeIndex from contiguous slice of RangeIndex
- PR #2672 Fix null and integer handling in round
- PR #2725 Fix Jitify issue with running on Turing using CUDA version < 10
- PR #2731 Fix building of benchmarks


# cuDF 0.9.0 (Date TBD)
Expand Down
7 changes: 2 additions & 5 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,18 @@ include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
"${ARROW_INCLUDE_DIR}"
"${FLATBUFFERS_INCLUDE_DIR}"
"${RMM_INCLUDE}"
"${NVSTRINGS_INCLUDE}"
"${CMAKE_CURRENT_SOURCE_DIR}")

###################################################################################################
# - library paths ---------------------------------------------------------------------------------

link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
"${CMAKE_BINARY_DIR}/lib"
"${CMAKE_BINARY_DIR}"
"${FLATBUFFERS_LIBRARY_DIR}"
"${GTEST_LIBRARY_DIR}"
"${GBENCH_LIBRARY_DIR}"
"${RMM_LIBRARY}"
"${NVSTRINGS_LIBRARY}"
"${NVCATEGORY_LIBRARY}"
"${NVTEXT_LIBRARY}")
"${RMM_LIBRARY}")

###################################################################################################
### test sources ##################################################################################
Expand Down
22 changes: 22 additions & 0 deletions cpp/include/cudf/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,27 @@ gdf_column upper_bound(table const& t,
std::vector<bool> const& desc_flags,
bool nulls_as_largest = true);

/**---------------------------------------------------------------------------*
* @brief Find if the `value` is present in the `column` and dtype of both
* `value` and `column` should match.
*
* @throws cudf::logic_error
* If dtype of `column` and `value` doesn't match
*
* @example:
*
* Single Column:
* idx 0 1 2 3 4
* column = { 10, 20, 20, 30, 50 }
* Scalar:
* value = { 20 }
* result = true
*
* @param column A gdf column
* @param value A scalar value to search for in `column`
*
* @return bool If `value` is found in `column` true, else false.
*---------------------------------------------------------------------------**/
bool contains(gdf_column const& column, gdf_scalar const& value);
} // namespace cudf

61 changes: 61 additions & 0 deletions cpp/src/search/search.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <table/legacy/device_table_row_operators.cuh>
#include <cudf/utilities/legacy/wrapper_types.hpp>
#include <utilities/column_utils.hpp>
#include <io/utilities/wrapper_utils.hpp>

#include <cudf/search.hpp>
#include <cudf/copying.hpp>
Expand Down Expand Up @@ -111,6 +112,62 @@ gdf_column search_ordered(table const& t,
return result;
}

template <bool nullable = true>
struct compare_with_value{

compare_with_value(device_table t, device_table val, bool nulls_are_equal = true)
: compare(t, val, nulls_are_equal) {}

__device__ bool operator()(gdf_index_type i){
return compare(i, 0);
}
row_equality_comparator<nullable> compare;
};

bool contains(gdf_column const& column,
gdf_scalar const& value,
cudaStream_t stream = 0)
{
CUDF_EXPECTS(column.dtype == value.dtype, "DTYPE mismatch");

// No element to compare against
if (column.size == 0) {
return false;
}

if (value.is_valid == false){
return cudf::has_nulls(column);
}

// Create column with scalar's data
gdf_column_wrapper val (1, value.dtype, gdf_dtype_extra_info{}, "");
RMM_TRY(RMM_ALLOC(&val.get()->data, cudf::size_of(value.dtype), stream));
CUDA_TRY(cudaMemcpyAsync(val.get()->data, (void*) &value.data,
cudf::size_of(value.dtype), cudaMemcpyHostToDevice, stream));

gdf_column* tmp_column = const_cast<gdf_column *> (&column);
gdf_column* tmp_value = val.get();

// Creating a single column device table
auto d_t = device_table::create(1, &tmp_column, stream);
auto d_value = device_table::create(1, &tmp_value, stream);
auto data_it = thrust::make_counting_iterator(0);

if (cudf::has_nulls(column)) {
auto eq_op = compare_with_value<true>(*d_t, *d_value, true);

return thrust::any_of(rmm::exec_policy(stream)->on(stream),
data_it, data_it + column.size,
eq_op);
}
else {
auto eq_op = compare_with_value<false>(*d_t, *d_value, true);

return thrust::any_of(rmm::exec_policy(stream)->on(stream),
data_it, data_it + column.size,
eq_op);
}
}
} // namespace detail

gdf_column lower_bound(table const& t,
Expand All @@ -129,4 +186,8 @@ gdf_column upper_bound(table const& t,
return detail::search_ordered(t, values, false, desc_flags, nulls_as_largest);
}

bool contains(gdf_column const& column, gdf_scalar const& value)
{
return detail::contains(column, value);
}
} // namespace cudf
108 changes: 108 additions & 0 deletions cpp/tests/search/search_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@

#include <tests/utilities/cudf_test_fixtures.h>
#include <tests/utilities/column_wrapper.cuh>
#include <tests/utilities/scalar_wrapper.cuh>
#include <cudf/search.hpp>

using cudf::test::column_wrapper;
using cudf::test::scalar_wrapper;

class SearchTest : public GdfTest {};

Expand Down Expand Up @@ -687,3 +689,109 @@ TEST_F(SearchTest, table__find_last__nulls_as_largest)
<< "Expected:" << expect.to_str();
}

TEST_F(SearchTest, contains_true)
{
using element_type = int64_t;
bool expect = true;
bool result = false;

auto column = column_wrapper<element_type> {0, 1, 17, 19, 23, 29, 71};
auto value = scalar_wrapper<element_type>{23};;

result = cudf::contains(
column.get()[0],
value.get()[0]
);

ASSERT_EQ(result, expect);
}

TEST_F(SearchTest, contains_false)
{
using element_type = int64_t;
bool expect = false;
bool result = false;

auto column = column_wrapper<element_type> {0, 1, 17, 19, 23, 29, 71};
auto value = scalar_wrapper<element_type> {24};

result = cudf::contains(
column.get()[0],
value.get()[0]);

ASSERT_EQ(result, expect);
}

TEST_F(SearchTest, contains_empty_value)
{
using element_type = int64_t;
bool expect = false;
bool result = false;

auto column = column_wrapper<element_type> {0, 1, 17, 19, 23, 29, 71};
auto value = scalar_wrapper<element_type> (23, false);

result = cudf::contains(
column.get()[0],
value.get()[0]);

ASSERT_EQ(result, expect);
}

TEST_F(SearchTest, contains_empty_column)
{
using element_type = int64_t;
bool expect = false;
bool result = false;

auto column = column_wrapper<element_type> {};
auto value = scalar_wrapper<element_type> {24};

result = cudf::contains(
column.get()[0],
value.get()[0]);

ASSERT_EQ(result, expect);
}

TEST_F(SearchTest, contains_nullable_column_true)
{
using element_type = int64_t;
bool result = false;
bool expect = true;

std::vector<element_type> column_data { 0, 1, 17, 19, 23, 29, 71};
std::vector<gdf_valid_type> column_valids { 0, 0, 1, 1, 1, 1, 1 };
auto value = scalar_wrapper<element_type> {23};

auto column = column_wrapper<element_type> ( column_data,
[&]( gdf_index_type row ) { return column_valids[row]; }
);

result = cudf::contains(
column.get()[0],
value.get()[0]);

ASSERT_EQ(result, expect);
}

TEST_F(SearchTest, contains_nullable_column_false)
{
using element_type = int64_t;
bool result = false;
bool expect = false;

std::vector<element_type> column_data { 0, 1, 17, 19, 23, 29, 71};
std::vector<gdf_valid_type> column_valids { 0, 0, 1, 1, 0, 1, 1};
auto value = scalar_wrapper<element_type> {23};

auto column = column_wrapper<element_type> ( column_data,
[&]( gdf_index_type row ) { return column_valids[row]; }
);

result = cudf::contains(
column.get()[0],
value.get()[0]);

ASSERT_EQ(result, expect);
}
5 changes: 5 additions & 0 deletions python/cudf/cudf/_lib/includes/search.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
const cudf_table& values,
vector[bool] c_desc_flags,
) except +

cdef bool contains(
const gdf_column& t,
const gdf_scalar& value
) except +
21 changes: 21 additions & 0 deletions python/cudf/cudf/_lib/search.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,24 @@ def search_sorted(column, values, side):
free_table(c_values)

return gdf_column_to_column(&c_out_col)


def contains (column, item):
"""Check whether column contains the value
Parameters
----------
column : NumericalColumn
Column to search in
item :
value to be searched
"""
if (len(column) == 0 or item is None):
return False

cdef gdf_column* col = column_view_from_column(column)
cdef gdf_scalar* item_scalar = gdf_scalar_from_scalar(item)

cdef bool result = cpp_search.contains(col[0], item_scalar[0])

return result
11 changes: 1 addition & 10 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,7 @@ def __contains__(self, item):
return False
except Exception:
return False
# Issue with cudautils with bool araray, always returns True.
if self.data.mem.dtype == np.bool:
return (
cudautils.find_first(
self.data.mem.view("int8"), item.view("int8")
)
!= -1
)
else:
return cudautils.find_first(self.data.mem, item) != -1
return libcudf.search.contains(self, item)

def replace(self, **kwargs):
if "data" in kwargs and "dtype" not in kwargs:
Expand Down

0 comments on commit 504eca6

Please sign in to comment.