Skip to content

Commit

Permalink
Merge branch 'branch-0.13' of https://github.com/galipremsagar/cudf i…
Browse files Browse the repository at this point in the history
…nto branch-0.13
  • Loading branch information
galipremsagar committed Mar 17, 2020
2 parents 0279efd + 5037bf4 commit ba27ff3
Show file tree
Hide file tree
Showing 55 changed files with 1,341 additions and 710 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,13 @@
- PR #4316 Add Java and JNI bindings for substring expression
- PR #4314 Add Java and JNI bindings for string contains
- PR #4461 Port nvstrings Miscellaneous functions to cuDF Python/Cython
- PR #4495 Port nvtext to cuDF Python/Cython
- PR #4503 Port binaryop.pyx to libcudf++ API
- PR #4499 Adding changes to handle include `keep_index` and `RangeIndex`
- PR #4533 Import `tlz` for optional `cytoolz` support
- PR #4493 Skip legacy testing in CI
- PR #4524 Updating `__setitem__` for DataFrame to use scalar scatter
- PR #4534 Disable deprecation warnings as errors.
- PR #4506 Check for multi-dimensional data in column/Series creation

## Bug Fixes
Expand Down Expand Up @@ -210,6 +216,7 @@
- PR #4137 Update Java for mutating fill and rolling window changes
- PR #4184 Add missing except+ to Cython bindings
- PR #4141 Fix NVStrings test_convert failure in 10.2 build
- PR #4156 Make fill/copy_range no-op on empty columns
- PR #4158 Fix merge issue with empty table return if one of the two tables are empty
- PR #4162 Properly handle no index metadata generation for to_parquet
- PR #4175 Fix `__sizeof__` calculation in `StringColumn`
Expand Down Expand Up @@ -248,6 +255,7 @@
- PR #4358 Fix strings::concat where narep is an empty string
- PR #4369 Fix race condition in gpuinflate
- PR #4390 Disable ScatterValid and ScatterNull legacy tests
- PR #4398 Fixes the failure in groupby in MIN/MAX on strings when some groups are empty
- PR #4406 Fix sorted merge issue with null values and ascending=False
- PR #4445 Fix string issue for parquet reader and support `keep_index` for `scatter_to_tables`
- PR #4423 Tighten up Dask serialization checks
Expand All @@ -260,6 +268,11 @@
- PR #4474 Fix to not materialize RangeIndex in copy_categories
- PR #4496 Skip tests which require 2+ GPUs
- PR #4494 Update Java memory event handler for new RMM resource API
- PR #4505 Fix 0 length buffers during serialization
- PR #4482 Fix `.str.rsplit`, `.str.split`, `.str.find`, `.str.rfind`, `.str.index`, `.str.rindex` and enable related tests
- PR #4513 Backport scalar virtual destructor fix
- PR #4519 Remove `n` validation for `nlargest` & `nsmallest` and add negative support for `n`
- PR #4526 Fix index slicing issue for index incase of an empty dataframe


# cuDF 0.12.0 (04 Feb 2020)
Expand Down
4 changes: 2 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations")

# Suppress parentheses warning which causes gmock to fail
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-parentheses")
Expand Down Expand Up @@ -110,7 +110,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-

# set warnings as errors
# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror cross-execution-space-call -Xcompiler -Wall,-Werror")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror cross-execution-space-call -Xcompiler -Wall,-Werror,-Wno-error=deprecated-declarations")

# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF)
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace experimental {
/**
* @brief Types of binary operations that can be performed on data.
*/
enum class binary_operator {
enum class binary_operator : int32_t {
ADD, ///< operator +
SUB, ///< operator -
MUL, ///< operator *
Expand Down
10 changes: 4 additions & 6 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,8 @@ std::unique_ptr<table> empty_like(table_view const& input_table);
* variable width types).
* @throws `cudf::logic_error` for invalid range (if
* @p source_begin > @p source_end, @p source_begin < 0,
* @p source_begin >= @p source.size(), @p source_end > @p source.size(),
* @p target_begin < 0, target_begin >= @p target.size(), or
* @p target_begin + (@p source_end - @p source_begin) > @p target.size()).
* @p source_end > @p source.size(), @p target_begin < 0,
* or @p target_begin + (@p source_end - @p source_begin) > @p target.size()).
* @throws `cudf::logic_error` if @p target and @p source have different types.
* @throws `cudf::logic_error` if @p source has null values and @p target is not
* nullable.
Expand Down Expand Up @@ -278,9 +277,8 @@ void copy_range_in_place(column_view const& source,
*
* @throws `cudf::logic_error` for invalid range (if
* @p source_begin > @p source_end, @p source_begin < 0,
* @p source_begin >= @p source.size(), @p source_end > @p source.size(),
* @p target_begin < 0, target_begin >= @p target.size(), or
* @p target_begin + (@p source_end - @p source_begin) > @p target.size()).
* @p source_end > @p source.size(), @p target_begin < 0,
* or @p target_begin + (@p source_end - @p source_begin) > @p target.size()).
* @throws `cudf::logic_error` if @p target and @p source have different types.
*
* @param source The column to copy from inside the range.
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ std::unique_ptr<column> replace_nulls(column_view const& input,
* @returns Copy of `input` with null values replaced by `replacement`.
*/
std::unique_ptr<column> replace_nulls(column_view const& input,
scalar const* replacement,
scalar const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

Expand Down
6 changes: 2 additions & 4 deletions cpp/include/cudf/filling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ namespace experimental {
* @throws `cudf::logic_error` if memory reallocation is required (e.g. for
* variable width types).
* @throws `cudf::logic_error` for invalid range (if @p begin < 0,
* @p begin > @p end, @p begin >= @p destination.size(), or
* @p end > @p destination.size()).
* @p begin > @p end, or @p end > @p destination.size()).
* @throws `cudf::logic_error` if @p destination and @p value have different
* types.
* @throws `cudf::logic_error` if @p value is invalid but @p destination is not
Expand All @@ -62,8 +61,7 @@ void fill_in_place(mutable_column_view& destination, size_type begin,
* indicated by the indices [@p begin, @p end) were overwritten by @p value.
*
* @throws `cudf::logic_error` for invalid range (if @p begin < 0,
* @p begin > @p end, @p begin >= @p destination.size(), or
* @p end > @p destination.size()).
* @p begin > @p end, or @p end > @p destination.size()).
* @throws `cudf::logic_error` if @p destination and @p value have different
* types.
*
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/scalar/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace cudf {
*/
class scalar {
public:
~scalar() = default;
virtual ~scalar() = default;
scalar(scalar&& other) = default;
scalar(scalar const& other) = default;
scalar& operator=(scalar const& other) = delete;
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/column/column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ size_type column_view_base::null_count() const {
}

size_type column_view_base::null_count(size_type begin, size_type end) const {
CUDF_EXPECTS((begin <= end) && (begin >= 0) && (begin < size()) &&
(end <= size()),
CUDF_EXPECTS((begin >= 0) && (end <= size()) && (begin <= end),
"Range is out of bounds.");
return (null_count() == 0) ?
0 : cudf::count_unset_bits(null_mask(), offset() + begin, offset() + end);
Expand Down
28 changes: 9 additions & 19 deletions cpp/src/copying/copy_range.cu
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,11 @@ void copy_range_in_place(column_view const& source, mutable_column_view& target,
cudaStream_t stream) {
CUDF_EXPECTS(cudf::is_fixed_width(target.type()) == true,
"In-place copy_range does not support variable-sized types.");
CUDF_EXPECTS((source_begin <= source_end) &&
(source_begin >= 0) &&
(source_begin < source.size()) &&
(source_end <= source.size()) &&
(target_begin >= 0) &&
(target_begin < target.size()) &&
(target_begin + (source_end - source_begin) <=
target.size()) &&
// overflow
(target_begin + (source_end - source_begin) >= target_begin),
CUDF_EXPECTS((source_begin >= 0) &&
(source_end <= source.size()) &&
(source_begin <= source_end) &&
(target_begin >= 0) &&
(target_begin <= target.size() - (source_end - source_begin)),
"Range is out of bounds.");
CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch.");
CUDF_EXPECTS((target.nullable() == true) || (source.has_nulls() == false),
Expand All @@ -193,15 +188,10 @@ std::unique_ptr<column> copy_range(column_view const& source,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream) {
CUDF_EXPECTS((source_begin >= 0) &&
(source_begin <= source_end) &&
(source_begin < source.size()) &&
(source_end <= source.size()) &&
(target_begin >= 0) &&
(target_begin < target.size()) &&
(target_begin + (source_end - source_begin) <=
target.size()) &&
// overflow
(target_begin + (source_end - source_begin) >= target_begin),
(source_end <= source.size()) &&
(source_begin <= source_end) &&
(target_begin >= 0) &&
(target_begin <= target.size() - (source_end - source_begin)),
"Range is out of bounds.");
CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch.");

Expand Down
10 changes: 4 additions & 6 deletions cpp/src/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,8 @@ void fill_in_place(mutable_column_view& destination,
CUDF_EXPECTS(cudf::is_fixed_width(destination.type()) == true,
"In-place fill does not support variable-sized types.");
CUDF_EXPECTS((begin >= 0) &&
(begin <= end) &&
(begin < destination.size()) &&
(end <= destination.size()),
(end <= destination.size()) &&
(begin <= end),
"Range is out of bounds.");
CUDF_EXPECTS((destination.nullable() == true) || (value.is_valid() == true),
"destination should be nullable or value should be non-null.");
Expand All @@ -157,9 +156,8 @@ std::unique_ptr<column> fill(column_view const& input,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream) {
CUDF_EXPECTS((begin >= 0) &&
(begin <= end) &&
(begin < input.size()) &&
(end <= input.size()),
(end <= input.size()) &&
(begin <= end),
"Range is out of bounds.");
CUDF_EXPECTS(input.type() == value.type(), "Data type mismatch.");

Expand Down
18 changes: 14 additions & 4 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/groupby.hpp>
#include <cudf/detail/groupby.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/replace.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/table/table_device_view.cuh>
Expand Down Expand Up @@ -165,10 +167,18 @@ void sparse_to_dense_results(
auto transformed_result =
[&col, to_dense_agg_result, mr, stream]
(auto const& agg_kind) {
auto tranformed_agg = std::make_unique<aggregation>(agg_kind);
auto argmax_result = to_dense_agg_result(tranformed_agg);
auto transformed_result = experimental::detail::gather(
table_view({col}), *argmax_result, false, false, false, mr, stream);
auto transformed_agg = std::make_unique<aggregation>(agg_kind);
auto arg_result = to_dense_agg_result(transformed_agg);
// We make a view of ARG(MIN/MAX) result without a null mask and gather
// using this map. The values in data buffer of ARG(MIN/MAX) result
// corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL
// which is an out of bounds index value (-1) and causes the gathered
// value to be null.
column_view null_removed_map(data_type(type_to_id<size_type>()),
arg_result->size(),
static_cast<void const*>(arg_result->view().template data<size_type>()));
auto transformed_result = experimental::detail::gather(table_view({col}),
null_removed_map, false, arg_result->nullable(), false, mr, stream);
return std::move(transformed_result->release()[0]);
};

Expand Down
17 changes: 15 additions & 2 deletions cpp/tests/copying/copy_range_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ TEST_F(CopyRangeErrorTestFixture, InvalidRange)
thrust::make_counting_iterator(0) + size);

cudf::mutable_column_view target_view{target};
cudf::column_view source_view{source};

// empty_range == no-op, this is valid
EXPECT_NO_THROW(cudf::experimental::copy_range_in_place(
Expand All @@ -447,10 +448,10 @@ TEST_F(CopyRangeErrorTestFixture, InvalidRange)

// source_begin >= source.size()
EXPECT_THROW(cudf::experimental::copy_range_in_place(
source, target_view, 100, 100, 0),
source, target_view, 101, 100, 0),
cudf::logic_error);
EXPECT_THROW(auto p_ret = cudf::experimental::copy_range(
source, target, 100, 100, 0),
source, target, 101, 100, 0),
cudf::logic_error);

// source_end > source.size()
Expand Down Expand Up @@ -484,6 +485,18 @@ TEST_F(CopyRangeErrorTestFixture, InvalidRange)
EXPECT_THROW(auto p_ret = cudf::experimental::copy_range(
source, target, 50, 100, 80),
cudf::logic_error);

// Empty column
target = cudf::test::fixed_width_column_wrapper<int32_t>{};
source = cudf::test::fixed_width_column_wrapper<int32_t>{};
target_view = target;
source_view = source;

// empty column == no-op, this is valid
EXPECT_NO_THROW(cudf::experimental::copy_range_in_place(
source_view, target_view, 0, source_view.size(), 0));
EXPECT_NO_THROW(auto p_ret = cudf::experimental::copy_range(
source_view, target, 0, source_view.size(), 0));
}

TEST_F(CopyRangeErrorTestFixture, DTypeMismatch)
Expand Down
18 changes: 14 additions & 4 deletions cpp/tests/filling/fill_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,10 @@ TEST_F(FillErrorTestFixture, InvalidRange)
*p_val),
cudf::logic_error);

// out_begin >= destination.size()
EXPECT_THROW(cudf::experimental::fill_in_place(destination_view, 100, 100, *p_val),
cudf::logic_error);
EXPECT_THROW(auto p_ret = cudf::experimental::fill(destination, 100, 100,
// out_begin > destination.size()
EXPECT_THROW(cudf::experimental::fill_in_place(destination_view, 101, 100, *p_val),
cudf::logic_error);
EXPECT_THROW(auto p_ret = cudf::experimental::fill(destination, 101, 100,
*p_val),
cudf::logic_error);

Expand All @@ -365,6 +365,16 @@ TEST_F(FillErrorTestFixture, InvalidRange)
EXPECT_THROW(auto p_ret = cudf::experimental::fill(destination, 99, 101,
*p_val),
cudf::logic_error);

// Empty Column
destination = cudf::test::fixed_width_column_wrapper<int32_t>{};
destination_view = destination;

// empty column, this is valid
EXPECT_NO_THROW(cudf::experimental::fill_in_place(destination_view, 0,
destination_view.size(), *p_val));
EXPECT_NO_THROW(auto p_ret = cudf::experimental::fill(destination, 0,
destination_view.size(), *p_val));
}

TEST_F(FillErrorTestFixture, DTypeMismatch)
Expand Down
14 changes: 14 additions & 0 deletions cpp/tests/groupby/sort/group_max_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,19 @@ TEST_F(groupby_max_string_test, basic)
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TEST_F(groupby_max_string_test, zero_valid_values)
{
using K = int32_t;

fixed_width_column_wrapper<K> keys { 1, 1, 1};
strings_column_wrapper vals ( { "año", "bit", "₹1"}, all_null() );

fixed_width_column_wrapper<K> expect_keys { 1 };
strings_column_wrapper expect_vals({ "" }, all_null());

auto agg = cudf::experimental::make_max_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

} // namespace test
} // namespace cudf
14 changes: 14 additions & 0 deletions cpp/tests/groupby/sort/group_min_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,19 @@ TEST_F(groupby_min_string_test, basic)
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TEST_F(groupby_min_string_test, zero_valid_values)
{
using K = int32_t;

fixed_width_column_wrapper<K> keys { 1, 1, 1};
strings_column_wrapper vals ( { "año", "bit", "₹1"}, all_null() );

fixed_width_column_wrapper<K> expect_keys { 1 };
strings_column_wrapper expect_vals({ "" }, all_null());

auto agg = cudf::experimental::make_min_aggregation();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

} // namespace test
} // namespace cudf
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from . import (
avro,
binops,
concat,
copying,
csv,
Expand Down
Loading

0 comments on commit ba27ff3

Please sign in to comment.